Provider Clients¶

Unified interface for LLM providers with automatic failover.

Supported Providers¶

✅ OpenAI (GPT-3.5, GPT-4, Embeddings)
✅ Anthropic (Claude 2, Claude 3, Sonnet 4)
✅ Google (Gemini, Vertex AI, PaLM 2)
✅ Cohere (Command, Embed)
✅ Azure OpenAI
✅ Local Models (Ollama, vLLM, HuggingFace)

Client Registry¶

from stratarouter_runtime import LLMClientRegistry

registry = LLMClientRegistry()

# Register providers
registry.register("openai", OpenAIClient(api_key="sk-..."))
registry.register("anthropic", AnthropicClient(api_key="sk-ant-..."))
registry.register("google", GoogleClient(api_key="..."))

Usage¶

Simple Completion¶

result = await registry.complete(
    provider="openai",
    messages=[
        {"role": "user", "content": "Hello!"}
    ],
    model="gpt-4"
)

print(result.content)

With Fallback¶

result = await registry.complete(
    primary="openai",
    fallback=["anthropic", "google"],
    messages=[...]
)

if result.fallback_used:
    print(f"Primary failed, used: {result.provider}")

OpenAI Client¶

from stratarouter_runtime import OpenAIClient

client = OpenAIClient(
    api_key="sk-...",
    organization="org-...",
    timeout=30,
    max_retries=3
)

# Chat completion
result = await client.chat(
    model="gpt-4",
    messages=[{"role": "user", "content": "Hello"}],
    temperature=0.7,
    max_tokens=1000
)

# Streaming
async for chunk in client.chat_stream(
    model="gpt-4",
    messages=[...]
):
    print(chunk.content, end="")

# Embeddings
embedding = await client.embed(
    text="Hello world",
    model="text-embedding-3-small"
)

Anthropic Client¶

from stratarouter_runtime import AnthropicClient

client = AnthropicClient(
    api_key="sk-ant-...",
    timeout=30
)

result = await client.complete(
    model="claude-sonnet-4-20250514",
    messages=[{"role": "user", "content": "Hello"}],
    max_tokens=1000
)

Google Client¶

from stratarouter_runtime import GoogleClient

client = GoogleClient(
    api_key="...",
    project_id="...",
    location="us-central1"
)

result = await client.generate(
    model="gemini-pro",
    prompt="Hello",
    max_tokens=1000
)

Local Models¶

from stratarouter_runtime import LocalClient

# Ollama
client = LocalClient(
    endpoint="http://localhost:11434",
    model="llama2"
)

# vLLM
client = LocalClient(
    endpoint="http://localhost:8000",
    model="meta-llama/Llama-2-7b-hf"
)

result = await client.complete(
    prompt="Hello",
    max_tokens=1000
)

Rate Limiting¶

client = OpenAIClient(
    api_key="sk-...",
    rate_limit_rpm=3000,      # Requests per minute
    rate_limit_tpm=90000      # Tokens per minute
)

# Automatic rate limiting
result = await client.chat(...)  # May wait if rate limit reached

Cost Tracking¶

# Track costs automatically
registry = LLMClientRegistry(track_costs=True)

result = await registry.complete(provider="openai", ...)

print(f"Cost: ${result.cost_usd:.4f}")
print(f"Tokens: {result.tokens_used}")

# Get total costs
costs = registry.get_total_costs()
print(f"Total spent: ${costs['total_usd']}")
print(f"By provider: {costs['by_provider']}")

Error Handling¶

from stratarouter_runtime import (
    ProviderError,
    RateLimitError,
    AuthenticationError,
    InvalidRequestError
)

try:
    result = await client.chat(...)
except RateLimitError:
    logger.warning("Rate limit hit, retrying...")
except AuthenticationError:
    logger.error("Invalid API key")
except ProviderError as e:
    logger.error(f"Provider error: {e}")

Custom Providers¶

from stratarouter_runtime import BaseProvider

class CustomProvider(BaseProvider):
    async def complete(self, messages, **kwargs):
        # Your implementation
        response = await self.api_call(messages)
        return ProviderResponse(
            content=response.text,
            tokens_used=response.tokens,
            cost_usd=self.calculate_cost(response.tokens)
        )

registry.register("custom", CustomProvider())

Runtime Index | Architecture