Some checks failed
Build and Deploy Docs / build-and-deploy (push) Has been cancelled
- Created logs/ structure (sessions, operations, incidents) - Added session-start/log/end scripts - Installed Git hooks for auto-logging commits/pushes - Added shell integration for zsh - Created CHANGELOG.md - Documented today's session (2026-01-10)
208 lines
6.0 KiB
Python
208 lines
6.0 KiB
Python
"""
|
|
DAARION LLM Proxy Service
|
|
Port: 7007
|
|
Multi-provider LLM gateway with usage tracking and rate limiting
|
|
"""
|
|
import os
|
|
from fastapi import FastAPI, HTTPException, Header
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from contextlib import asynccontextmanager
|
|
|
|
from models import LLMRequest, LLMResponse
|
|
from router import ModelRouter
|
|
from middlewares import RateLimiter, UsageTracker
|
|
from providers import OpenAIProvider, DeepSeekProvider, LocalProvider
|
|
|
|
# ============================================================================
|
|
# App Setup
|
|
# ============================================================================
|
|
|
|
model_router = ModelRouter()
|
|
rate_limiter = RateLimiter(requests_per_minute=10)
|
|
usage_tracker = UsageTracker()
|
|
|
|
providers = {}
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
"""Startup and shutdown"""
|
|
# Startup
|
|
print("🚀 Starting LLM Proxy service...")
|
|
|
|
# Initialize providers
|
|
for provider_name, provider_config in model_router.providers.items():
|
|
if provider_name == "openai":
|
|
providers[provider_name] = OpenAIProvider(provider_config)
|
|
elif provider_name == "deepseek":
|
|
providers[provider_name] = DeepSeekProvider(provider_config)
|
|
elif provider_name == "local":
|
|
providers[provider_name] = LocalProvider(provider_config)
|
|
print(f"✅ Initialized provider: {provider_name}")
|
|
|
|
print(f"✅ LLM Proxy ready with {len(model_router.models)} models")
|
|
print(f"📋 Available models: {', '.join(model_router.get_available_models())}")
|
|
|
|
yield
|
|
|
|
# Shutdown
|
|
print("🛑 Shutting down LLM Proxy...")
|
|
for provider in providers.values():
|
|
if hasattr(provider, 'close'):
|
|
await provider.close()
|
|
|
|
app = FastAPI(
|
|
title="DAARION LLM Proxy",
|
|
version="1.0.0",
|
|
description="Multi-provider LLM gateway",
|
|
lifespan=lifespan
|
|
)
|
|
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# ============================================================================
|
|
# API Endpoints
|
|
# ============================================================================
|
|
|
|
@app.post("/internal/llm/proxy", response_model=LLMResponse)
|
|
async def llm_proxy(
|
|
request: LLMRequest,
|
|
x_internal_secret: str = Header(None, alias="X-Internal-Secret")
|
|
):
|
|
"""
|
|
Proxy LLM requests to appropriate provider
|
|
|
|
Internal-only endpoint (requires X-Internal-Secret header)
|
|
"""
|
|
# Simple auth check (for Phase 3)
|
|
expected_secret = os.getenv("LLM_PROXY_SECRET", "dev-secret-token")
|
|
if x_internal_secret != expected_secret:
|
|
raise HTTPException(401, "Invalid or missing X-Internal-Secret header")
|
|
|
|
# Extract metadata
|
|
agent_id = request.metadata.get("agent_id")
|
|
microdao_id = request.metadata.get("microdao_id")
|
|
|
|
# Rate limiting (per agent)
|
|
if agent_id:
|
|
allowed, remaining = rate_limiter.check_limit(f"agent:{agent_id}")
|
|
if not allowed:
|
|
raise HTTPException(429, f"Rate limit exceeded for agent {agent_id}")
|
|
|
|
# Route model
|
|
try:
|
|
model_config, provider_config = model_router.route_model(request.model)
|
|
except ValueError as e:
|
|
raise HTTPException(400, str(e))
|
|
|
|
# Get provider instance
|
|
provider = providers.get(model_config.provider)
|
|
if not provider:
|
|
raise HTTPException(500, f"Provider not initialized: {model_config.provider}")
|
|
|
|
# Call LLM
|
|
try:
|
|
response = await provider.chat(
|
|
messages=request.messages,
|
|
model_name=model_config.physical_name,
|
|
max_tokens=request.max_tokens or model_config.max_tokens,
|
|
temperature=request.temperature,
|
|
top_p=request.top_p
|
|
)
|
|
|
|
# Log usage
|
|
usage_tracker.log_usage(
|
|
agent_id=agent_id,
|
|
microdao_id=microdao_id,
|
|
model=request.model,
|
|
provider=model_config.provider,
|
|
prompt_tokens=response.usage.prompt_tokens,
|
|
completion_tokens=response.usage.completion_tokens,
|
|
latency_ms=response.latency_ms or 0,
|
|
success=True
|
|
)
|
|
|
|
return response
|
|
|
|
except Exception as e:
|
|
# Log failure
|
|
usage_tracker.log_usage(
|
|
agent_id=agent_id,
|
|
microdao_id=microdao_id,
|
|
model=request.model,
|
|
provider=model_config.provider,
|
|
prompt_tokens=0,
|
|
completion_tokens=0,
|
|
latency_ms=0,
|
|
success=False,
|
|
error=str(e)
|
|
)
|
|
|
|
raise HTTPException(500, f"LLM request failed: {str(e)}")
|
|
|
|
@app.get("/internal/llm/models")
|
|
async def list_models():
|
|
"""List available models"""
|
|
return {
|
|
"models": [
|
|
{
|
|
"name": model_config.logical_name,
|
|
"provider": model_config.provider,
|
|
"physical_name": model_config.physical_name,
|
|
"max_tokens": model_config.max_tokens
|
|
}
|
|
for model_config in model_router.models.values()
|
|
]
|
|
}
|
|
|
|
@app.get("/internal/llm/usage")
|
|
async def get_usage(
|
|
agent_id: str | None = None,
|
|
x_internal_secret: str = Header(None, alias="X-Internal-Secret")
|
|
):
|
|
"""Get usage statistics"""
|
|
expected_secret = os.getenv("LLM_PROXY_SECRET", "dev-secret-token")
|
|
if x_internal_secret != expected_secret:
|
|
raise HTTPException(401, "Invalid or missing X-Internal-Secret header")
|
|
|
|
return usage_tracker.get_usage_summary(agent_id)
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
"""Health check"""
|
|
return {
|
|
"status": "ok",
|
|
"service": "llm-proxy",
|
|
"providers": list(providers.keys()),
|
|
"models": len(model_router.models)
|
|
}
|
|
|
|
# ============================================================================
|
|
# Run
|
|
# ============================================================================
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
uvicorn.run(app, host="0.0.0.0", port=7007)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|