config.toml

[server]
host = "0.0.0.0"
port = 8080
workers = 4
request_timeout_secs = 30
max_request_size_mb = 10
enable_cors = true
cors_origins = ["*"]

[cache]
redis_url = "redis://127.0.0.1:6379"
redis_pool_size = 10
default_ttl_secs = 3600
max_cached_queries = 10000
similarity_threshold = 0.95
enable_local_cache = true
local_cache_size_mb = 100

[classifier]
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
complexity_threshold = 0.6
max_query_length = 4096
enable_semantic_classification = true
simple_model_keywords = [
    "hello",
    "hi",
    "what is",
    "who is",
    "when",
    "where",
    "simple",
    "define",
    "how to",
    "yes",
    "no",
]
complex_model_keywords = [
    "analyze",
    "compare",
    "explain in detail",
    "code",
    "implement",
    "algorithm",
    "complex",
    "reasoning",
    "step by step",
    "optimize",
    "design pattern",
    "architecture",
]

[classifier.complexity_weights]
length_weight = 0.2
vocabulary_weight = 0.25
structure_weight = 0.25
semantic_weight = 0.3

[models]
fallback_enabled = true
retry_attempts = 3
retry_delay_ms = 1000
request_timeout_secs = 60

[models.ollama]
base_url = "http://host.docker.internal:11434"
simple_model = "llama3.2:1b"
complex_model = "llama3.2:3b"
api_key = "ollama"

[monitoring]
metrics_enabled = true
metrics_endpoint = "/metrics"
tracing_enabled = true
log_level = "info"
enable_request_logging = true
slow_request_threshold_ms = 1000
health_check_interval_secs = 30

[security]
jwt_secret = "change-this-in-production"
jwt_expiration_hours = 24
rate_limit_requests_per_minute = 60
enable_auth = false
allowed_api_keys = []