Files
microdao-daarion/services/router/sofiia_auto_router.py
Apple 129e4ea1fc feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store,
architecture_pressure, backlog_generator/store, cost_analyzer, data_governance,
dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment,
platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files),
signature_state_store, sofiia_auto_router, tool_governance

New services:
- sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static
- memory-service: integration_endpoints, integrations, voice_endpoints, static UI
- aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents)
- sofiia-supervisor: new supervisor service
- aistalk-bridge-lite: Telegram bridge lite
- calendar-service: CalDAV calendar service with reminders
- mlx-stt-service / mlx-tts-service: Apple Silicon speech services
- binance-bot-monitor: market monitor service
- node-worker: STT/TTS memory providers

New tools (9): agent_email, browser_tool, contract_tool, observability_tool,
oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault

New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus,
farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine,
session_context, style_adapter, telemetry)

Tests: 85+ test files for all new modules
Made-with: Cursor
2026-03-03 07:14:14 -08:00

768 lines
32 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Sofiia Smart Auto-Router — Cursor-style model selection for Sofiia agent.
Classifies incoming prompt by task type and selects the best available model,
balancing capability, speed, cost, and provider availability.
Full model catalog includes:
- Cloud: Anthropic Claude, xAI Grok, DeepSeek, Mistral AI, GLM-5 (Z.AI)
- Local Ollama (NODA2/MacBook): qwen3.5:35b-a3b, qwen3:14b, glm-4.7-flash:32k,
deepseek-r1:70b, deepseek-coder:33b, gemma3, mistral-nemo:12b,
starcoder2:3b, phi3, llava:13b
Task taxonomy (inspired by Cursor Auto mode):
code_gen, code_review, code_debug, code_refactor,
architecture, devops, security, analysis, quick_answer, creative, reasoning,
math_code, vision, chatbot
"""
from __future__ import annotations
import logging
import os
import re
import time
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
# ── Task taxonomy ──────────────────────────────────────────────────────────────
# Each pattern group uses multi-word or context-aware patterns to reduce false
# positives. Single common words (system, design, check, list, graph, tree) are
# avoided unless paired with a qualifier.
TASK_PATTERNS: List[Tuple[str, List[str], float]] = [
# (task_type, patterns, base_weight) — weight scales final score
("code_gen", [
r"\апиши\s+(функці|код|клас|скрипт|модуль|endpoint|api)",
r"\bреалізуй\b", r"\bcreate\s+(function|class|module|endpoint|api|component)",
r"\bimplement\b", r"\bgenerate\s+code\b", r"\генеруй\s+код\b",
r"\ункці[юя]\s+для\b", r"\bклас\s+для\b", r"\апиши\s+код\b",
r"\bwrite\s+a?\s*(function|class|module|script|endpoint)\b",
r"\bcontroller\b", r"\bendpoint\s+(для|for)\b",
], 1.0),
("code_debug", [
r"\bвиправ\b", r"\bбаг\b", r"\bпомилк[аи]\b", r"\btraceback\b",
r"\bexception\b", r"\bfailed\b", r"\bcrash(es|ed)?\b", r"\е\s+працю",
r"\ебаг\b", r"\bdebug\b", r"\bfix\s+(the\s+)?(bug|error|issue|crash)\b",
r"\bsyntax\s*error\b", r"\btype\s*error\b", r"\battribute\s*error\b",
r"\bruntime\s*error\b", r"\bvalue\s*error\b",
], 1.0),
("code_review", [
r"\breview\s+(the\s+)?(code|pr|pull\s+request|diff)\b",
r"\bаудит\s+(код|сервіс|систем)\b", r"\baudit\s+(code|service)\b",
r"\bперевір\w*\s+(код|якість)\b", r"\bcode\s+quality\b",
r"\bcode\s+review\b", r"\brev'ю\b",
], 1.0),
("code_refactor", [
r"\bрефактор\b", r"\brefactor\b",
r"\bоптимізу[йї]\s+(код|функці|клас)\b", r"\boptimize\s+(the\s+)?(code|function|class)\b",
r"\bclean\s+up\s+(the\s+)?code\b", r"\bpolish\s+(the\s+)?code\b",
r"\bspeed\s+up\b", r"\bimprove\s+(the\s+)?code\b",
], 1.0),
("architecture", [
r"\bархітектур\w+\b", r"\barchitecture\b",
r"\bспроєктуй\b", r"\bsystem\s+design\b",
r"\bmicroservice\s+(architect|design|pattern)\b",
r"\bdatabase\s+design\b", r"\bapi\s+design\b",
r"\bscalab(le|ility)\b", r"\bscaling\s+strateg\b",
r"\bdesign\s+pattern\b", r"\bsystem\s+structure\b",
], 1.0),
("devops", [
r"\bdeploy\b", r"\bdocker\s*(file|compose|-compose|ize)?\b",
r"\bkubernetes\b", r"\bk8s\b", r"\bci[\s/]cd\b",
r"\bpipeline\b", r"\bnginx\b", r"\bcaddy\b",
r"\ода\d?\b", r"\bnoda\d?\b", r"\bcontainer\s+(start|stop|restart|build|image)\b",
r"\еплой\b", r"\bssh\s+(to|into|root|connect)\b",
r"\bhelm\b", r"\bterraform\b", r"\binfrastructure\b",
r"\bdocker\s+compose\s+up\b",
], 1.0),
("security", [
r"\bvulnerability\b", r"\bCVE-\d+\b", r"\bsecurity\s+(audit|review|issue|scan)\b",
r"\bauth(entication|orization)\b", r"\bencrypt(ion)?\b",
r"\bRBAC\b", r"\bpermission\s+(model|system)\b",
r"\bбезпек\w+\b", r"\bpentest\b", r"\b(sql|xss|csrf)\s*injection\b",
r"\bthreat\s+model\b",
], 1.0),
("reasoning", [
r"\ому\s+\w+\b", r"\bwhy\s+(does|is|do|did|should|would)\b",
r"\bpros\s+and\s+cons\b", r"\btrade[\s-]?off\b",
r"\bпорівняй\b", r"\bcompare\s+\w+\s+(vs|and|with|to)\b",
r"\bяк\s+краще\b", r"\bперевага\b", r"\едолік\b",
r"\bdecision\s+(between|about)\b",
r"\bversus\b", r"\b\w+\s+vs\s+\w+\b",
], 1.0),
("analysis", [
r"\bпроаналізуй\b", r"\bаналіз\s+\w+\b",
r"\banalyze\s+\w+\b", r"\binvestigate\b",
r"\bexplain\s+(how|why|what)\b", r"\bsummariz(e|ation)\b",
r"\ослідж\b", r"\bпоясни\s+(як|чому|що)\b",
r"\bhow\s+does\s+\w+\s+work\b",
], 1.0),
("creative", [
r"\апиши\s+(текст|стат|пост|лист|опис)\b",
r"\bwrite\s+a\s+(blog|article|post|email|description|letter)\b",
r"\bdraft\s+(a\s+)?(doc|email|message|proposal)\b",
r"\breadme\b", r"\bchangelog\b", r"\bdocumentation\b",
], 1.0),
("quick_answer", [
r"\о\s+таке\b", r"\bwhat\s+is\s+(a|an|the)?\b",
r"\bhow\s+to\s+\w+\b", r"\bdefinition\s+of\b",
r"\bшвидко\b", r"\bсинтаксис\s+\w+\b",
r"\bgive\s+me\s+an?\s+example\b", r"\bexample\s+of\b",
], 0.9),
("vision", [
r"\ображен\w+\b", r"\ото\b", r"\bimage\s+(analysis|recognition|detect)\b",
r"\bскріншот\b", r"\bscreenshot\b",
r"\ізуальн\w+\s+аналіз\b", r"\ідео\s+(аналіз|розпізна)\b",
], 1.0),
("math_code", [
r"\bалгоритм\s+\w+\b", r"\balgorithm\s+(for|to)\b",
r"\bсортуван\w+\b", r"\bsort(ing)?\s+algorithm\b",
r"\bdynamic\s+programming\b", r"\bgraph\s+(algorithm|traversal|search)\b",
r"\bmatrix\s+(mult|inver|decomp)\b",
r"\bcalculate\s+\w+\b", r"\bcompute\s+\w+\b",
r"\bformula\s+(for|to)\b", r"\ейкстр\b", r"\bDijkstra\b",
], 1.0),
# Chatbot / conversational — greetings, small talk, acknowledgements
("chatbot", [
r"^(привіт|вітаю|добрий|доброго|hi|hello|hey)\b",
r"^(дякую|спасибі|thank|thanks)\b",
r"^(ок|добре|зрозумів|зрозуміло|so?|ok|yes|no|ні|так)\s*[,!.]?\s*$",
r"\bяк\s+(справи|діла|ся маєш)\b", r"\bhow\s+are\s+you\b",
], 0.8),
]
# Pre-compile patterns once for performance
_COMPILED_PATTERNS: Optional[List[Tuple[str, List[re.Pattern], float]]] = None
def _get_compiled_patterns() -> List[Tuple[str, List[re.Pattern], float]]:
global _COMPILED_PATTERNS
if _COMPILED_PATTERNS is None:
_COMPILED_PATTERNS = [
(task_type, [re.compile(p, re.IGNORECASE) for p in patterns], weight)
for task_type, patterns, weight in TASK_PATTERNS
]
return _COMPILED_PATTERNS
# ── Model catalog ──────────────────────────────────────────────────────────────
@dataclass
class ModelSpec:
profile_name: str
provider: str
model_id: str
api_key_env: str = ""
strengths: List[str] = field(default_factory=list)
cost_tier: int = 1 # 0=free(local), 1=cheap, 2=mid, 3=expensive
speed_tier: int = 1 # 1=fast, 2=medium, 3=slow
context_k: int = 8 # context window in thousands
local: bool = False
max_tokens: int = 4096
vram_gb: float = 0.0
description: str = ""
@property
def available(self) -> bool:
if self.local:
return _is_ollama_model_available(self.model_id)
return bool(os.getenv(self.api_key_env, "").strip())
@property
def has_credits(self) -> bool:
return ProviderBudget.is_available(self.provider)
# ── Ollama model availability cache ───────────────────────────────────────────
_ollama_available_models: Optional[List[str]] = None
_ollama_cache_ts: float = 0.0
_OLLAMA_CACHE_TTL = 60.0
def _is_ollama_model_available(model_id: str) -> bool:
global _ollama_available_models, _ollama_cache_ts
now = time.time()
if _ollama_available_models is None or (now - _ollama_cache_ts) > _OLLAMA_CACHE_TTL:
_refresh_ollama_models_sync()
if _ollama_available_models is None:
return False
model_lower = model_id.lower()
model_base = model_lower.split(":")[0]
for m in _ollama_available_models:
ml = m.lower()
if ml == model_lower or ml.split(":")[0] == model_base:
return True
return False
def _refresh_ollama_models_sync() -> None:
global _ollama_available_models, _ollama_cache_ts
import urllib.request
import json as _json
ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
try:
with urllib.request.urlopen(f"{ollama_url}/api/tags", timeout=2) as resp:
data = _json.loads(resp.read())
_ollama_available_models = [m["name"] for m in data.get("models", [])]
_ollama_cache_ts = time.time()
except Exception:
_ollama_available_models = []
_ollama_cache_ts = time.time()
async def refresh_ollama_models_async() -> List[str]:
global _ollama_available_models, _ollama_cache_ts
try:
import httpx
ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
async with httpx.AsyncClient(timeout=2.0) as client:
resp = await client.get(f"{ollama_url}/api/tags")
data = resp.json()
_ollama_available_models = [m["name"] for m in data.get("models", [])]
_ollama_cache_ts = time.time()
return _ollama_available_models
except Exception:
_ollama_available_models = _ollama_available_models or []
return _ollama_available_models
# ── Full model catalog ─────────────────────────────────────────────────────────
SOFIIA_MODEL_CATALOG: List[ModelSpec] = [
# ── Anthropic Claude ─────────────────────────────────────────────────────
ModelSpec(
profile_name="cloud_claude_sonnet",
provider="anthropic", model_id="claude-sonnet-4-5",
api_key_env="ANTHROPIC_API_KEY",
strengths=["code_gen", "code_debug", "code_refactor", "architecture", "security", "reasoning"],
cost_tier=2, speed_tier=2, context_k=200, max_tokens=8192,
description="Claude Sonnet 4.5 — найкращий для коду та архітектури",
),
ModelSpec(
profile_name="cloud_claude_haiku",
provider="anthropic", model_id="claude-haiku-3-5",
api_key_env="ANTHROPIC_API_KEY",
strengths=["quick_answer", "code_review", "creative", "analysis", "chatbot"],
cost_tier=1, speed_tier=1, context_k=200, max_tokens=4096,
description="Claude Haiku 3.5 — швидкий та дешевий",
),
# ── xAI Grok ─────────────────────────────────────────────────────────────
ModelSpec(
profile_name="cloud_grok",
provider="grok", model_id="grok-4-1-fast-reasoning",
api_key_env="GROK_API_KEY",
strengths=["reasoning", "architecture", "analysis", "code_gen"],
cost_tier=2, speed_tier=1, context_k=2000, max_tokens=8192,
description="Grok 4.1 Fast — 2M контекст, кращий для reasoning",
),
# ── DeepSeek API ─────────────────────────────────────────────────────────
ModelSpec(
profile_name="cloud_deepseek",
provider="deepseek", model_id="deepseek-chat",
api_key_env="DEEPSEEK_API_KEY",
strengths=["code_gen", "code_debug", "code_refactor", "devops", "quick_answer"],
cost_tier=1, speed_tier=2, context_k=64, max_tokens=4096,
description="DeepSeek Chat — дешевий і добре знає код/devops",
),
# ── GLM-5 / Z.AI (API) ───────────────────────────────────────────────────
ModelSpec(
profile_name="cloud_glm5",
provider="glm", model_id="glm-4-plus",
api_key_env="GLM5_API_KEY",
strengths=["quick_answer", "creative", "analysis", "code_gen", "chatbot"],
cost_tier=1, speed_tier=1, context_k=128, max_tokens=4096,
description="GLM-4 Plus (Z.AI) — швидкий, дешевий, гарно знає українську/CJK",
),
ModelSpec(
profile_name="cloud_glm5_flash",
provider="glm", model_id="glm-4-flash",
api_key_env="GLM5_API_KEY",
strengths=["quick_answer", "creative", "chatbot"],
cost_tier=0, speed_tier=1, context_k=128, max_tokens=2048,
description="GLM-4 Flash (Z.AI) — безкоштовний, найшвидший",
),
# ── Mistral AI (API) ─────────────────────────────────────────────────────
ModelSpec(
profile_name="cloud_mistral",
provider="mistral", model_id="mistral-large-latest",
api_key_env="MISTRAL_API_KEY",
strengths=["analysis", "creative", "reasoning", "architecture"],
cost_tier=2, speed_tier=2, context_k=128, max_tokens=4096,
description="Mistral Large — добрий для аналізу та creative",
),
# ── Local: qwen3.5:35b-a3b (FLAGSHIP) ────────────────────────────────────
ModelSpec(
profile_name="local_qwen35_35b",
provider="ollama", model_id="qwen3.5:35b-a3b",
strengths=["code_gen", "code_debug", "code_refactor", "reasoning", "architecture",
"analysis", "devops", "security", "chatbot"],
cost_tier=0, speed_tier=2, context_k=32, max_tokens=4096,
local=True, vram_gb=24.0,
description="Qwen3.5 35B MoE (NODA2) — флагман локально, якість ≈ cloud",
),
# ── Local: qwen3:14b ─────────────────────────────────────────────────────
ModelSpec(
profile_name="local_qwen3_14b",
provider="ollama", model_id="qwen3:14b",
strengths=["code_gen", "code_debug", "quick_answer", "devops", "analysis", "chatbot"],
cost_tier=0, speed_tier=2, context_k=32, max_tokens=2048,
local=True, vram_gb=10.0,
description="Qwen3 14B (NODA2) — швидкий локальний загальний",
),
# ── Local: glm-4.7-flash:32k ─────────────────────────────────────────────
ModelSpec(
profile_name="local_glm47_32k",
provider="ollama", model_id="glm-4.7-flash:32k",
strengths=["quick_answer", "creative", "analysis", "code_review", "chatbot"],
cost_tier=0, speed_tier=2, context_k=32, max_tokens=2048,
local=True, vram_gb=20.0,
description="GLM-4.7 Flash 32K (NODA2) — локальний GLM, великий контекст",
),
# ── Local: deepseek-r1:70b ────────────────────────────────────────────────
ModelSpec(
profile_name="local_deepseek_r1_70b",
provider="ollama", model_id="deepseek-r1:70b",
strengths=["reasoning", "math_code", "architecture", "analysis"],
cost_tier=0, speed_tier=3, context_k=64, max_tokens=4096,
local=True, vram_gb=48.0,
description="DeepSeek-R1 70B (NODA2) — локальний reasoning як o1",
),
# ── Local: deepseek-coder:33b ─────────────────────────────────────────────
ModelSpec(
profile_name="local_deepseek_coder_33b",
provider="ollama", model_id="deepseek-coder:33b",
strengths=["code_gen", "code_debug", "code_refactor", "math_code"],
cost_tier=0, speed_tier=2, context_k=16, max_tokens=2048,
local=True, vram_gb=20.0,
description="DeepSeek Coder 33B (NODA2) — спеціаліст по коду",
),
# ── Local: gemma3:latest ──────────────────────────────────────────────────
ModelSpec(
profile_name="local_gemma3",
provider="ollama", model_id="gemma3:latest",
strengths=["quick_answer", "analysis", "creative", "chatbot"],
cost_tier=0, speed_tier=2, context_k=8, max_tokens=2048,
local=True, vram_gb=8.0,
description="Gemma3 (NODA2) — Google's ефективна модель",
),
# ── Local: mistral-nemo:12b ───────────────────────────────────────────────
ModelSpec(
profile_name="local_mistral_nemo",
provider="ollama", model_id="mistral-nemo:12b",
strengths=["creative", "quick_answer", "analysis", "chatbot"],
cost_tier=0, speed_tier=2, context_k=128, max_tokens=2048,
local=True, vram_gb=8.0,
description="Mistral Nemo 12B (NODA2) — 128K контекст локально",
),
# ── Local: starcoder2:3b ──────────────────────────────────────────────────
ModelSpec(
profile_name="local_starcoder2",
provider="ollama", model_id="starcoder2:3b",
strengths=["code_gen", "code_review"],
cost_tier=0, speed_tier=1, context_k=16, max_tokens=2048,
local=True, vram_gb=2.0,
description="StarCoder2 3B (NODA2) — мікро-модель для code completion",
),
# ── Local: phi3:latest ────────────────────────────────────────────────────
ModelSpec(
profile_name="local_phi3",
provider="ollama", model_id="phi3:latest",
strengths=["quick_answer", "analysis", "chatbot"],
cost_tier=0, speed_tier=1, context_k=128, max_tokens=2048,
local=True, vram_gb=4.0,
description="Phi-3 (NODA2) — Microsoft мала ефективна модель",
),
# ── Local: llava:13b (vision) ─────────────────────────────────────────────
ModelSpec(
profile_name="local_llava_13b",
provider="ollama", model_id="llava:13b",
strengths=["vision"],
cost_tier=0, speed_tier=2, context_k=4, max_tokens=2048,
local=True, vram_gb=10.0,
description="LLaVA 13B (NODA2) — vision модель для зображень",
),
# ── Local: gpt-oss:latest ─────────────────────────────────────────────────
ModelSpec(
profile_name="local_gpt_oss",
provider="ollama", model_id="gpt-oss:latest",
strengths=["code_gen", "quick_answer"],
cost_tier=0, speed_tier=2, context_k=8, max_tokens=2048,
local=True, vram_gb=8.0,
description="GPT-OSS (NODA2) — відкрита OSS GPT-like модель",
),
]
# ── Task → preferred model matrix ─────────────────────────────────────────────
TASK_MODEL_PRIORITY: Dict[str, List[str]] = {
# Principle: local-first for tasks where local quality is sufficient.
# Cloud only when the task genuinely needs it (complex code, deep reasoning,
# very long context, security audits).
#
# qwen3.5:35b-a3b is the flagship local — MoE with cloud-level quality.
# It should be preferred over cloud APIs for most routine tasks.
"code_gen": [
"local_qwen35_35b", "cloud_claude_sonnet", "local_deepseek_coder_33b",
"cloud_deepseek", "local_qwen3_14b", "cloud_grok",
],
"code_debug": [
"local_qwen35_35b", "local_deepseek_coder_33b", "cloud_claude_sonnet",
"cloud_deepseek", "local_qwen3_14b",
],
"code_review": [
"local_qwen35_35b", "cloud_claude_haiku", "local_deepseek_coder_33b",
"cloud_claude_sonnet", "cloud_deepseek",
],
"code_refactor": [
"local_qwen35_35b", "local_deepseek_coder_33b", "cloud_claude_sonnet",
"cloud_deepseek", "local_qwen3_14b",
],
"math_code": [
"local_deepseek_r1_70b", "local_qwen35_35b", "cloud_grok",
"cloud_claude_sonnet", "local_deepseek_coder_33b",
],
"architecture": [
"local_qwen35_35b", "cloud_grok", "cloud_claude_sonnet",
"local_deepseek_r1_70b", "cloud_mistral",
],
"devops": [
"local_qwen35_35b", "local_qwen3_14b", "cloud_deepseek",
"cloud_claude_sonnet", "local_glm47_32k",
],
"security": [
"cloud_claude_sonnet", "local_qwen35_35b", "cloud_grok", "cloud_mistral",
],
"reasoning": [
"local_deepseek_r1_70b", "local_qwen35_35b", "cloud_grok",
"cloud_claude_sonnet", "cloud_mistral",
],
"analysis": [
"local_qwen35_35b", "local_glm47_32k", "cloud_grok",
"cloud_claude_haiku", "local_mistral_nemo", "cloud_mistral",
],
"creative": [
"local_qwen35_35b", "local_mistral_nemo", "cloud_claude_haiku",
"local_glm47_32k", "cloud_mistral",
],
"quick_answer": [
"local_qwen3_14b", "local_qwen35_35b", "local_phi3",
"local_gemma3", "cloud_deepseek", "cloud_glm5_flash",
],
"chatbot": [
"local_qwen3_14b", "local_qwen35_35b", "local_gemma3",
"local_phi3", "local_mistral_nemo",
],
"vision": [
"local_llava_13b",
],
"unknown": [
"local_qwen35_35b", "local_qwen3_14b", "cloud_claude_sonnet",
"cloud_grok", "cloud_deepseek",
],
}
# ── Budget integration ─────────────────────────────────────────────────────────
class ProviderBudget:
"""In-memory budget gate: marks providers exhausted until TTL expires."""
_exhausted: Dict[str, float] = {}
_exhausted_ttl: int = 3600
@classmethod
def mark_exhausted(cls, provider: str) -> None:
cls._exhausted[provider] = time.time()
logger.warning("💸 Provider %s marked as budget-exhausted", provider)
@classmethod
def is_available(cls, provider: str) -> bool:
ts = cls._exhausted.get(provider)
if ts is None:
return True
if time.time() - ts > cls._exhausted_ttl:
cls._exhausted.pop(provider, None)
return True
return False
@classmethod
def reset(cls, provider: str) -> None:
cls._exhausted.pop(provider, None)
# ── Task classification ────────────────────────────────────────────────────────
@dataclass
class ClassificationResult:
task_type: str
confidence: float
all_scores: Dict[str, float]
ambiguous: bool = False
runner_up: Optional[str] = None
def classify_task(prompt: str, context_len: int = 0) -> Tuple[str, float]:
"""Classify prompt into a task type. Returns (task_type, confidence)."""
result = classify_task_detailed(prompt, context_len)
return result.task_type, result.confidence
def classify_task_detailed(prompt: str, context_len: int = 0) -> ClassificationResult:
"""Detailed classification with ambiguity detection and all scores."""
if not prompt or not prompt.strip():
return ClassificationResult("chatbot", 0.5, {}, ambiguous=False)
text = prompt.strip()
compiled = _get_compiled_patterns()
scores: Dict[str, float] = {}
for task_type, patterns, weight in compiled:
hits = sum(1 for p in patterns if p.search(text))
if hits > 0:
raw = hits / len(patterns)
scores[task_type] = raw * weight
if not scores:
return ClassificationResult("unknown", 0.3, {}, ambiguous=False)
sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
best_task, best_score = sorted_scores[0]
confidence = min(best_score * 10, 1.0)
# Penalize confidence for very short prompts (fewer signals)
word_count = len(text.split())
if word_count <= 3:
confidence *= 0.6
elif word_count <= 8:
confidence *= 0.85
# Detect ambiguity: second-place is within 30% of the best
ambiguous = False
runner_up = None
if len(sorted_scores) >= 2:
_, second_score = sorted_scores[1]
if second_score > 0 and second_score / best_score > 0.7:
ambiguous = True
runner_up = sorted_scores[1][0]
# For long conversations, slight preference for context-heavy models
# (influences scoring, not classification)
if context_len > 50:
confidence = max(confidence, 0.5)
return ClassificationResult(
task_type=best_task,
confidence=round(confidence, 3),
all_scores={k: round(v, 4) for k, v in sorted_scores[:5]},
ambiguous=ambiguous,
runner_up=runner_up,
)
def _prompt_complexity(prompt: str) -> str:
"""Estimate prompt complexity: simple | medium | complex"""
words = len(prompt.split())
lines = prompt.count("\n")
code_blocks = prompt.count("```")
if words < 20 and lines < 3 and code_blocks == 0:
return "simple"
if words > 200 or code_blocks >= 2 or lines > 20:
return "complex"
return "medium"
# ── Main selection function ────────────────────────────────────────────────────
@dataclass
class AutoRouteResult:
profile_name: str
model_id: str
provider: str
task_type: str
confidence: float
complexity: str
reason: str
fallback_used: bool = False
all_candidates: List[str] = field(default_factory=list)
ambiguous: bool = False
runner_up: Optional[str] = None
all_scores: Dict[str, float] = field(default_factory=dict)
def select_model_auto(
prompt: str,
force_fast: bool = False,
force_capable: bool = False,
prefer_local: bool = False,
prefer_cheap: bool = False,
budget_aware: bool = True,
context_messages_len: int = 0,
) -> AutoRouteResult:
"""
Cursor-style auto model selection for Sofiia.
Logic:
1. Classify task type from prompt (with ambiguity detection)
2. Estimate complexity (simple/medium/complex)
3. Apply modifiers (force_fast, force_capable, prefer_local, prefer_cheap)
4. Score candidates from priority list factoring availability, budget, speed, cost
5. For long conversations, prefer large-context models
"""
classification = classify_task_detailed(prompt, context_messages_len)
task_type = classification.task_type
confidence = classification.confidence
complexity = _prompt_complexity(prompt)
effective_task = task_type
# Modifier overrides (parentheses fix for operator precedence)
if force_fast and task_type not in ("code_gen", "code_debug", "math_code"):
effective_task = "quick_answer"
if (prefer_cheap or complexity == "simple") and task_type in ("quick_answer", "creative", "chatbot"):
effective_task = "quick_answer"
priority_list = TASK_MODEL_PRIORITY.get(effective_task, TASK_MODEL_PRIORITY["unknown"])
catalog_map = {m.profile_name: m for m in SOFIIA_MODEL_CATALOG}
candidates = [p for p in priority_list if p in catalog_map]
if prefer_local:
local_cands = [p for p in candidates if catalog_map[p].local]
if local_cands:
candidates = local_cands
def _score(profile_name: str) -> float:
spec = catalog_map[profile_name]
score = 0.0
if not spec.available:
score += 1000
if budget_aware and not spec.has_credits:
score += 500
# Priority-list position is the strongest signal
try:
pos = priority_list.index(profile_name)
score += pos * 20
except ValueError:
score += 200
if prefer_local and not spec.local:
score += 200
if force_fast:
score += spec.speed_tier * 15
if prefer_cheap or prefer_local:
score -= spec.cost_tier * 20
else:
score += spec.cost_tier * 2
if force_capable:
score -= spec.context_k / 100
if complexity == "complex" and spec.context_k < 32:
score += 40
# Long conversation bonus for large-context models
if context_messages_len > 30 and spec.context_k >= 128:
score -= 15
elif context_messages_len > 50 and spec.context_k < 32:
score += 25
return score
scored = sorted([c for c in candidates if c in catalog_map], key=_score)
if not scored:
for fallback in ["local_qwen35_35b", "local_qwen3_14b", "local_phi3"]:
if fallback in catalog_map:
scored = [fallback]
break
best = scored[0] if scored else "local_qwen3_14b"
spec = catalog_map.get(best)
fallback_used = best not in priority_list[:2]
reasons: List[str] = [f"task={task_type} ({confidence:.0%})", f"complexity={complexity}"]
if classification.ambiguous:
reasons.append(f"ambiguous (runner_up={classification.runner_up})")
if force_fast:
reasons.append("force_fast")
if prefer_local:
reasons.append("prefer_local")
if prefer_cheap:
reasons.append("prefer_cheap")
if force_capable:
reasons.append("force_capable")
if context_messages_len > 30:
reasons.append(f"long_conversation({context_messages_len})")
if fallback_used:
reasons.append("fallback (top unavailable)")
return AutoRouteResult(
profile_name=best,
model_id=spec.model_id if spec else best,
provider=spec.provider if spec else "unknown",
task_type=task_type,
confidence=confidence,
complexity=complexity,
reason=" | ".join(reasons),
fallback_used=fallback_used,
all_candidates=scored[:5],
ambiguous=classification.ambiguous,
runner_up=classification.runner_up,
all_scores=classification.all_scores,
)
def explain_selection(result: AutoRouteResult) -> str:
"""Human-readable explanation of model selection (for debug/UI)."""
lines = [
f"Auto-selected **{result.model_id}** ({result.provider})",
f"Task: `{result.task_type}` | Complexity: `{result.complexity}` | "
f"Confidence: {result.confidence:.0%}",
f"Reason: {result.reason}",
]
if result.ambiguous:
lines.append(f"Ambiguous: runner-up was `{result.runner_up}`")
if result.all_scores:
top3 = list(result.all_scores.items())[:3]
lines.append("Scores: " + ", ".join(f"{k}={v:.3f}" for k, v in top3))
return "\n".join(lines)
def get_full_catalog() -> List[Dict[str, Any]]:
"""Return full model catalog with availability status for dashboard."""
return [
{
"profile_name": m.profile_name,
"provider": m.provider,
"model_id": m.model_id,
"description": m.description,
"strengths": m.strengths,
"cost_tier": m.cost_tier,
"speed_tier": m.speed_tier,
"context_k": m.context_k,
"local": m.local,
"vram_gb": m.vram_gb,
"available": m.available,
"has_credits": m.has_credits,
}
for m in SOFIIA_MODEL_CATALOG
]