Skip to content

Configuration

Production Configuration Guide

Comprehensive guide to configuring StrataRouter for production deployments.


Environment Variables

Core Configuration

# Router settings
STRATAROUTER_DIMENSION=384
STRATAROUTER_THRESHOLD=0.5
STRATAROUTER_MAX_ROUTES=10000

# Performance tuning
STRATAROUTER_HNSW_M=16
STRATAROUTER_HNSW_EF_CONSTRUCTION=200
STRATAROUTER_HNSW_EF_SEARCH=50

# Caching
STRATAROUTER_CACHE_ENABLED=true
STRATAROUTER_CACHE_TTL=3600
STRATAROUTER_CACHE_MAX_SIZE=10000
STRATAROUTER_REDIS_URL=redis://localhost:6379

# Batching
STRATAROUTER_BATCH_SIZE=32
STRATAROUTER_BATCH_TIMEOUT_MS=50

# Observability
STRATAROUTER_METRICS_ENABLED=true
STRATAROUTER_METRICS_PORT=9090
STRATAROUTER_TRACING_ENABLED=true
STRATAROUTER_LOG_LEVEL=info

Provider Configuration

# OpenAI
OPENAI_API_KEY=sk-...
OPENAI_ORG_ID=org-...
OPENAI_TIMEOUT=30

# Anthropic
ANTHROPIC_API_KEY=sk-ant-...
ANTHROPIC_TIMEOUT=30

# Google
GOOGLE_API_KEY=...
GOOGLE_PROJECT_ID=...

# Custom providers
CUSTOM_PROVIDER_URL=https://api.example.com
CUSTOM_PROVIDER_API_KEY=...

Configuration File

config.toml

[router]
dimension = 384
threshold = 0.5
max_routes = 10000

[hnsw]
m = 16
ef_construction = 200
ef_search = 50

[cache]
enabled = true
backend = "redis"
url = "redis://localhost:6379"
ttl = 3600
max_size = 10000
similarity_threshold = 0.95

[batch]
enabled = true
size = 32
timeout_ms = 50
max_queue_size = 1000

[retry]
enabled = true
max_attempts = 3
initial_delay_ms = 100
max_delay_ms = 5000
backoff_multiplier = 2.0

[timeout]
routing_ms = 10
execution_ms = 30000
total_ms = 60000

[limits]
max_concurrent_requests = 1000
rate_limit_per_second = 10000
max_tokens_per_request = 4096

[observability]
metrics_enabled = true
metrics_port = 9090
tracing_enabled = true
tracing_endpoint = "http://localhost:4318"
log_level = "info"
log_format = "json"

[security]
api_key_required = true
tls_enabled = true
cert_path = "/etc/stratarouter/cert.pem"
key_path = "/etc/stratarouter/key.pem"

Python Configuration

Basic Setup

from stratarouter import Router, RouterConfig

config = RouterConfig(
    dimension=384,
    threshold=0.5,
    max_routes=10000,
    hnsw_m=16,
    hnsw_ef_construction=200,
    hnsw_ef_search=50
)

router = Router(config=config)

With Runtime

from stratarouter.runtime import RuntimeExecutor, RuntimeConfig

runtime_config = RuntimeConfig(
    cache_enabled=True,
    cache_ttl=3600,
    batch_size=32,
    batch_timeout_ms=50,
    retry_max_attempts=3,
    metrics_enabled=True,
    tracing_enabled=True
)

executor = RuntimeExecutor(router, config=runtime_config)

Performance Tuning

HNSW Index Parameters

# Accuracy vs Speed tradeoff
config = RouterConfig(
    # Higher = more accurate, slower build
    hnsw_ef_construction=200,  # Default: 200

    # Higher = more accurate, slower search  
    hnsw_ef_search=50,  # Default: 50

    # Higher = more memory, better recall
    hnsw_m=16  # Default: 16
)

Recommendations:

Use Case ef_construction ef_search M
Speed-optimized 100 30 8
Balanced 200 50 16
Accuracy-optimized 400 100 32

Cache Configuration

config = RuntimeConfig(
    cache_enabled=True,

    # Cache TTL (seconds)
    cache_ttl=3600,  # 1 hour

    # Max cache entries
    cache_max_size=10000,

    # Similarity threshold for cache hits
    cache_similarity_threshold=0.95  # 95% similar
)

Batch Processing

config = RuntimeConfig(
    batch_size=32,  # Queries per batch
    batch_timeout_ms=50,  # Max wait time
    max_queue_size=1000  # Max pending queries
)

Resource Limits

Memory Management

config = RouterConfig(
    max_routes=10000,  # Hard limit

    # Index memory budget (MB)
    max_index_memory_mb=512
)

Concurrency Limits

config = RuntimeConfig(
    max_concurrent_requests=1000,
    rate_limit_per_second=10000,

    # Per-tenant limits
    tenant_rate_limit=100
)

Security Configuration

API Authentication

config = RuntimeConfig(
    api_key_required=True,
    api_keys=["key1", "key2"],

    # JWT authentication
    jwt_enabled=True,
    jwt_secret="your-secret",
    jwt_algorithm="HS256"
)

TLS/SSL

config = RuntimeConfig(
    tls_enabled=True,
    cert_path="/etc/stratarouter/cert.pem",
    key_path="/etc/stratarouter/key.pem",

    # Client certificate verification
    verify_client_cert=True
)

Monitoring Configuration

Metrics

config = RuntimeConfig(
    metrics_enabled=True,
    metrics_port=9090,

    # Prometheus labels
    metrics_labels={
        "environment": "production",
        "region": "us-east-1"
    }
)

Tracing

config = RuntimeConfig(
    tracing_enabled=True,
    tracing_endpoint="http://jaeger:4318",
    tracing_sample_rate=0.1,  # Sample 10%

    # Service name
    service_name="stratarouter"
)

Logging

import logging

config = RuntimeConfig(
    log_level=logging.INFO,
    log_format="json",
    log_output="/var/log/stratarouter.log",

    # Structured logging
    log_structured=True
)

Provider Configuration

OpenAI

from stratarouter.runtime.clients import OpenAIClient

openai_client = OpenAIClient(
    api_key=os.getenv("OPENAI_API_KEY"),
    organization=os.getenv("OPENAI_ORG_ID"),
    timeout=30,
    max_retries=3
)

Anthropic

from stratarouter.runtime.clients import AnthropicClient

anthropic_client = AnthropicClient(
    api_key=os.getenv("ANTHROPIC_API_KEY"),
    timeout=30,
    max_retries=3
)

Google

from stratarouter.runtime.clients import GoogleClient

google_client = GoogleClient(
    api_key=os.getenv("GOOGLE_API_KEY"),
    project_id=os.getenv("GOOGLE_PROJECT_ID"),
    timeout=30
)

Environment-Specific Configs

Development

[router]
threshold = 0.3  # Lower for testing
log_level = "debug"

[cache]
enabled = false  # Disable for testing

[retry]
max_attempts = 1  # Fail fast

Staging

[router]
threshold = 0.5

[cache]
enabled = true
ttl = 1800  # 30 min

[observability]
tracing_sample_rate = 0.5  # 50% sample

Production

[router]
threshold = 0.7  # Higher confidence

[cache]
enabled = true
ttl = 3600  # 1 hour

[observability]
tracing_sample_rate = 0.1  # 10% sample

[security]
api_key_required = true
tls_enabled = true

Configuration Validation

Validate Config

from stratarouter import RouterConfig

config = RouterConfig.from_file("config.toml")

# Validate
errors = config.validate()
if errors:
    print(f"Configuration errors: {errors}")
else:
    print("Configuration valid")

Best Practices

Use environment variables for secrets
Enable caching in production
Set appropriate timeouts for your workload
Monitor resource usage and adjust limits
Use TLS/SSL in production
Sample traces (don't trace 100%)
Test configuration in staging first


Next Steps

📊 Monitoring

Set up observability

Configure Monitoring →

⚡ Tuning

Optimize performance

Performance Tuning →

🔒 Security

Secure your deployment

Security Guide →