P1: NCS-first model selection + NATS capabilities + Grok 4.1
Router model selection: - New model_select.py: resolve_effective_profile → profile_requirements → select_best_model pipeline. NCS-first with graceful static fallback. - selection_policies in router-config.node2.yml define prefer order per profile without hardcoding models (e.g. local_default_coder prefers qwen3:14b then qwen3.5:35b-a3b). - Cloud profiles (cloud_grok, cloud_deepseek) skip NCS; on cloud failure use fallback_profile via NCS for local selection. - Structured logs: selected_profile, required_type, runtime, model, caps_age_s, fallback_reason on every infer request. Grok model fix: - grok-2-1212 no longer exists on xAI API → updated to grok-4-1-fast-reasoning across all 3 hardcoded locations in main.py and router-config.node2.yml. NCS NATS request/reply: - node-capabilities subscribes to node.noda2.capabilities.get (NATS request/reply). Enabled via ENABLE_NATS_CAPS=true in compose. - NODA1 router can query NODA2 capabilities over NATS leafnode without HTTP connectivity. Verified: - NCS: 14 served models from Ollama+Swapper+llama-server - NATS: request/reply returns full capabilities JSON - Sofiia: cloud_grok → grok-4-1-fast-reasoning (tested, 200 OK) - Helion: NCS → qwen3:14b via Ollama (caps_age=23.7s cache hit) - Router health: ok Made-with: Cursor
This commit is contained in:
@@ -123,8 +123,11 @@ services:
|
|||||||
- SWAPPER_URL=http://swapper-service:8890
|
- SWAPPER_URL=http://swapper-service:8890
|
||||||
- LLAMA_SERVER_URL=http://host.docker.internal:11435
|
- LLAMA_SERVER_URL=http://host.docker.internal:11435
|
||||||
- CACHE_TTL_SEC=15
|
- CACHE_TTL_SEC=15
|
||||||
|
- ENABLE_NATS_CAPS=true
|
||||||
|
- NATS_URL=nats://dagi-nats:4222
|
||||||
depends_on:
|
depends_on:
|
||||||
- swapper-service
|
- swapper-service
|
||||||
|
- dagi-nats
|
||||||
networks:
|
networks:
|
||||||
- dagi-network
|
- dagi-network
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|||||||
@@ -240,6 +240,55 @@ async def capabilities_refresh():
|
|||||||
return JSONResponse(content={"refreshed": True, "served_count": data["served_count"]})
|
return JSONResponse(content={"refreshed": True, "served_count": data["served_count"]})
|
||||||
|
|
||||||
|
|
||||||
|
# ── NATS request/reply (optional) ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
ENABLE_NATS = os.getenv("ENABLE_NATS_CAPS", "false").lower() in ("true", "1", "yes")
|
||||||
|
NATS_URL = os.getenv("NATS_URL", "nats://dagi-nats:4222")
|
||||||
|
NATS_SUBJECT = f"node.{NODE_ID.lower()}.capabilities.get"
|
||||||
|
|
||||||
|
_nats_client = None
|
||||||
|
|
||||||
|
|
||||||
|
async def _nats_capabilities_handler(msg):
|
||||||
|
"""Handle NATS request/reply for capabilities."""
|
||||||
|
import json as _json
|
||||||
|
try:
|
||||||
|
data = await _build_capabilities()
|
||||||
|
payload = _json.dumps(data).encode()
|
||||||
|
if msg.reply:
|
||||||
|
await _nats_client.publish(msg.reply, payload)
|
||||||
|
logger.debug(f"NATS reply sent to {msg.reply} ({len(payload)} bytes)")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"NATS handler error: {e}")
|
||||||
|
if msg.reply and _nats_client:
|
||||||
|
await _nats_client.publish(msg.reply, b'{"error":"internal"}')
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup_nats():
|
||||||
|
global _nats_client
|
||||||
|
if not ENABLE_NATS:
|
||||||
|
logger.info(f"NATS capabilities disabled (ENABLE_NATS_CAPS={ENABLE_NATS})")
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
import nats as nats_lib
|
||||||
|
_nats_client = await nats_lib.connect(NATS_URL)
|
||||||
|
await _nats_client.subscribe(NATS_SUBJECT, cb=_nats_capabilities_handler)
|
||||||
|
logger.info(f"✅ NATS subscribed: {NATS_SUBJECT} on {NATS_URL}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"⚠️ NATS init failed (non-fatal): {e}")
|
||||||
|
_nats_client = None
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("shutdown")
|
||||||
|
async def shutdown_nats():
|
||||||
|
if _nats_client:
|
||||||
|
try:
|
||||||
|
await _nats_client.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "8099")))
|
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "8099")))
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
fastapi>=0.110.0
|
fastapi>=0.110.0
|
||||||
uvicorn>=0.29.0
|
uvicorn>=0.29.0
|
||||||
httpx>=0.27.0
|
httpx>=0.27.0
|
||||||
|
nats-py>=2.7.0
|
||||||
|
|||||||
@@ -46,6 +46,15 @@ except ImportError:
|
|||||||
RUNTIME_GUARD_AVAILABLE = False
|
RUNTIME_GUARD_AVAILABLE = False
|
||||||
RuntimeGuard = None
|
RuntimeGuard = None
|
||||||
|
|
||||||
|
# NCS-first model selection
|
||||||
|
try:
|
||||||
|
import capabilities_client
|
||||||
|
from model_select import select_model_for_agent, ModelSelection, CLOUD_PROVIDERS as NCS_CLOUD_PROVIDERS
|
||||||
|
NCS_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
NCS_AVAILABLE = False
|
||||||
|
capabilities_client = None # type: ignore[assignment]
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -756,6 +765,23 @@ async def startup_event():
|
|||||||
else:
|
else:
|
||||||
tool_manager = None
|
tool_manager = None
|
||||||
|
|
||||||
|
# Initialize Node Capabilities client
|
||||||
|
if NCS_AVAILABLE and capabilities_client:
|
||||||
|
ncs_cfg = router_config.get("node_capabilities", {})
|
||||||
|
ncs_url = ncs_cfg.get("url", "") or os.getenv("NODE_CAPABILITIES_URL", "")
|
||||||
|
ncs_ttl = ncs_cfg.get("cache_ttl_sec", 30)
|
||||||
|
if ncs_url:
|
||||||
|
capabilities_client.configure(url=ncs_url, ttl=ncs_ttl)
|
||||||
|
caps = await capabilities_client.fetch_capabilities()
|
||||||
|
served = caps.get("served_count", 0)
|
||||||
|
logger.info(f"✅ NCS configured: url={ncs_url} ttl={ncs_ttl}s served={served} models")
|
||||||
|
else:
|
||||||
|
logger.warning("⚠️ NCS url not configured; model selection will use static config only")
|
||||||
|
elif NCS_AVAILABLE:
|
||||||
|
logger.info("ℹ️ NCS modules loaded but capabilities_client is None")
|
||||||
|
else:
|
||||||
|
logger.warning("⚠️ NCS modules not available (model_select / capabilities_client import failed)")
|
||||||
|
|
||||||
# Initialize CLAN runtime guard
|
# Initialize CLAN runtime guard
|
||||||
if RUNTIME_GUARD_AVAILABLE and RuntimeGuard and CLAN_RUNTIME_GUARD_ENABLED:
|
if RUNTIME_GUARD_AVAILABLE and RuntimeGuard and CLAN_RUNTIME_GUARD_ENABLED:
|
||||||
try:
|
try:
|
||||||
@@ -1279,7 +1305,7 @@ async def internal_llm_complete(request: InternalLLMRequest):
|
|||||||
cloud_providers = [
|
cloud_providers = [
|
||||||
{"name": "deepseek", "api_key_env": "DEEPSEEK_API_KEY", "base_url": "https://api.deepseek.com", "model": "deepseek-chat", "timeout": 60},
|
{"name": "deepseek", "api_key_env": "DEEPSEEK_API_KEY", "base_url": "https://api.deepseek.com", "model": "deepseek-chat", "timeout": 60},
|
||||||
{"name": "mistral", "api_key_env": "MISTRAL_API_KEY", "base_url": "https://api.mistral.ai", "model": "mistral-large-latest", "timeout": 60},
|
{"name": "mistral", "api_key_env": "MISTRAL_API_KEY", "base_url": "https://api.mistral.ai", "model": "mistral-large-latest", "timeout": 60},
|
||||||
{"name": "grok", "api_key_env": "GROK_API_KEY", "base_url": "https://api.x.ai", "model": "grok-2-1212", "timeout": 60}
|
{"name": "grok", "api_key_env": "GROK_API_KEY", "base_url": "https://api.x.ai", "model": "grok-4-1-fast-reasoning", "timeout": 60}
|
||||||
]
|
]
|
||||||
|
|
||||||
# Respect configured provider: local profiles should stay local.
|
# Respect configured provider: local profiles should stay local.
|
||||||
@@ -1603,38 +1629,68 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
|||||||
|
|
||||||
cloud_provider_names = {"deepseek", "mistral", "grok", "openai", "anthropic"}
|
cloud_provider_names = {"deepseek", "mistral", "grok", "openai", "anthropic"}
|
||||||
|
|
||||||
llm_profiles = router_config.get("llm_profiles", {})
|
# ── NCS-first model selection ────────────────────────────────────────
|
||||||
llm_profile = llm_profiles.get(default_llm, {})
|
ncs_selection = None
|
||||||
|
if NCS_AVAILABLE and capabilities_client:
|
||||||
if not llm_profile:
|
try:
|
||||||
fallback_llm = agent_config.get("fallback_llm", "local_default_coder")
|
caps = await capabilities_client.fetch_capabilities()
|
||||||
llm_profile = llm_profiles.get(fallback_llm, {})
|
if caps:
|
||||||
logger.warning(
|
caps["_fetch_ts"] = capabilities_client._cache_ts
|
||||||
f"⚠️ Profile '{default_llm}' not found for agent={agent_id} "
|
ncs_selection = await select_model_for_agent(
|
||||||
f"→ fallback to '{fallback_llm}' (local). "
|
agent_id, agent_config, router_config, caps, request.model,
|
||||||
f"NOT defaulting to cloud silently."
|
)
|
||||||
)
|
except Exception as e:
|
||||||
default_llm = fallback_llm
|
logger.warning(f"⚠️ NCS selection error: {e}; falling back to static config")
|
||||||
|
|
||||||
provider = llm_profile.get("provider", "ollama")
|
|
||||||
logger.info(f"🎯 Agent={agent_id}: profile={default_llm} provider={provider} model={llm_profile.get('model', '?')}")
|
|
||||||
|
|
||||||
# If explicit model is requested, try to resolve it to configured cloud profile.
|
llm_profiles = router_config.get("llm_profiles", {})
|
||||||
if request.model:
|
|
||||||
for profile_name, profile in llm_profiles.items():
|
if ncs_selection and ncs_selection.name:
|
||||||
if profile.get("model") == request.model and profile.get("provider") in cloud_provider_names:
|
provider = ncs_selection.provider
|
||||||
llm_profile = profile
|
model = ncs_selection.name
|
||||||
provider = profile.get("provider", provider)
|
llm_profile = llm_profiles.get(default_llm, {})
|
||||||
default_llm = profile_name
|
if ncs_selection.base_url and provider == "ollama":
|
||||||
logger.info(f"🎛️ Matched request.model={request.model} to profile={profile_name} provider={provider}")
|
llm_profile = {**llm_profile, "base_url": ncs_selection.base_url}
|
||||||
break
|
logger.info(
|
||||||
|
f"🎯 NCS select: agent={agent_id} profile={default_llm} "
|
||||||
# Determine model name
|
f"→ runtime={ncs_selection.runtime} model={model} "
|
||||||
if provider in ["deepseek", "openai", "anthropic", "mistral"]:
|
f"provider={provider} via_ncs={ncs_selection.via_ncs} "
|
||||||
model = llm_profile.get("model", "deepseek-chat")
|
f"caps_age={ncs_selection.caps_age_s}s "
|
||||||
|
f"fallback={ncs_selection.fallback_reason or 'none'}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
# For local ollama, use swapper model name format
|
llm_profile = llm_profiles.get(default_llm, {})
|
||||||
model = request.model or "qwen3:8b"
|
if not llm_profile:
|
||||||
|
fallback_llm = agent_config.get("fallback_llm", "local_default_coder")
|
||||||
|
llm_profile = llm_profiles.get(fallback_llm, {})
|
||||||
|
logger.warning(
|
||||||
|
f"⚠️ Profile '{default_llm}' not found for agent={agent_id} "
|
||||||
|
f"→ fallback to '{fallback_llm}' (local). "
|
||||||
|
f"NOT defaulting to cloud silently."
|
||||||
|
)
|
||||||
|
default_llm = fallback_llm
|
||||||
|
|
||||||
|
provider = llm_profile.get("provider", "ollama")
|
||||||
|
|
||||||
|
if request.model:
|
||||||
|
for profile_name, profile in llm_profiles.items():
|
||||||
|
if profile.get("model") == request.model and profile.get("provider") in cloud_provider_names:
|
||||||
|
llm_profile = profile
|
||||||
|
provider = profile.get("provider", provider)
|
||||||
|
default_llm = profile_name
|
||||||
|
logger.info(f"🎛️ Matched request.model={request.model} to profile={profile_name} provider={provider}")
|
||||||
|
break
|
||||||
|
|
||||||
|
if provider in ["deepseek", "openai", "anthropic", "mistral"]:
|
||||||
|
model = llm_profile.get("model", "deepseek-chat")
|
||||||
|
elif provider == "grok":
|
||||||
|
model = llm_profile.get("model", "grok-4-1-fast-reasoning")
|
||||||
|
else:
|
||||||
|
model = request.model or llm_profile.get("model", "qwen3:14b")
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"🎯 Static select: agent={agent_id} profile={default_llm} "
|
||||||
|
f"provider={provider} model={model}"
|
||||||
|
)
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# VISION PROCESSING (if images present)
|
# VISION PROCESSING (if images present)
|
||||||
@@ -1863,7 +1919,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
|||||||
"name": "grok",
|
"name": "grok",
|
||||||
"api_key_env": "GROK_API_KEY",
|
"api_key_env": "GROK_API_KEY",
|
||||||
"base_url": "https://api.x.ai",
|
"base_url": "https://api.x.ai",
|
||||||
"model": "grok-2-1212",
|
"model": "grok-4-1-fast-reasoning",
|
||||||
"timeout": 60
|
"timeout": 60
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
280
services/router/model_select.py
Normal file
280
services/router/model_select.py
Normal file
@@ -0,0 +1,280 @@
|
|||||||
|
"""NCS-first model selection for DAGI Router.
|
||||||
|
|
||||||
|
Resolves an agent's LLM profile into a concrete model+provider using live
|
||||||
|
capabilities from the Node Capabilities Service (NCS). Falls back to static
|
||||||
|
router-config.yml when NCS is unavailable.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger("model_select")
|
||||||
|
|
||||||
|
CLOUD_PROVIDERS = {"deepseek", "mistral", "grok", "openai", "anthropic"}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ProfileRequirements:
|
||||||
|
profile_name: str
|
||||||
|
required_type: str # llm | vision | code | stt | tts | cloud_llm
|
||||||
|
prefer: List[str] = field(default_factory=list)
|
||||||
|
provider: Optional[str] = None
|
||||||
|
fallback_profile: Optional[str] = None
|
||||||
|
constraints: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ModelSelection:
|
||||||
|
runtime: str # ollama | swapper | llama_server | cloud
|
||||||
|
name: str # model name as runtime knows it
|
||||||
|
model_type: str # llm | vision | code | …
|
||||||
|
base_url: str = ""
|
||||||
|
provider: str = "" # cloud provider name if applicable
|
||||||
|
via_ncs: bool = False
|
||||||
|
fallback_reason: str = ""
|
||||||
|
caps_age_s: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
# ── Profile resolution ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def resolve_effective_profile(
|
||||||
|
agent_id: str,
|
||||||
|
agent_cfg: Dict[str, Any],
|
||||||
|
router_cfg: Dict[str, Any],
|
||||||
|
request_model: Optional[str] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Determine the effective LLM profile name for a request."""
|
||||||
|
if request_model:
|
||||||
|
llm_profiles = router_cfg.get("llm_profiles", {})
|
||||||
|
for pname, pcfg in llm_profiles.items():
|
||||||
|
if pcfg.get("model") == request_model:
|
||||||
|
return pname
|
||||||
|
|
||||||
|
return agent_cfg.get("default_llm", "local_default_coder")
|
||||||
|
|
||||||
|
|
||||||
|
def profile_requirements(
|
||||||
|
profile_name: str,
|
||||||
|
agent_cfg: Dict[str, Any],
|
||||||
|
router_cfg: Dict[str, Any],
|
||||||
|
) -> ProfileRequirements:
|
||||||
|
"""Build selection requirements from a profile definition.
|
||||||
|
|
||||||
|
If the profile has `selection_policy` in config, use it directly.
|
||||||
|
Otherwise, infer from the legacy `provider`/`model` fields.
|
||||||
|
"""
|
||||||
|
llm_profiles = router_cfg.get("llm_profiles", {})
|
||||||
|
selection_policies = router_cfg.get("selection_policies", {})
|
||||||
|
profile_cfg = llm_profiles.get(profile_name, {})
|
||||||
|
|
||||||
|
policy = selection_policies.get(profile_name, {})
|
||||||
|
if policy:
|
||||||
|
return ProfileRequirements(
|
||||||
|
profile_name=profile_name,
|
||||||
|
required_type=policy.get("required_type", "llm"),
|
||||||
|
prefer=policy.get("prefer", []),
|
||||||
|
provider=policy.get("provider"),
|
||||||
|
fallback_profile=policy.get("fallback_profile")
|
||||||
|
or agent_cfg.get("fallback_llm"),
|
||||||
|
constraints=policy.get("constraints", {}),
|
||||||
|
)
|
||||||
|
|
||||||
|
provider = profile_cfg.get("provider", "ollama")
|
||||||
|
model = profile_cfg.get("model", "")
|
||||||
|
|
||||||
|
if provider in CLOUD_PROVIDERS:
|
||||||
|
return ProfileRequirements(
|
||||||
|
profile_name=profile_name,
|
||||||
|
required_type="cloud_llm",
|
||||||
|
prefer=[],
|
||||||
|
provider=provider,
|
||||||
|
fallback_profile=agent_cfg.get("fallback_llm", "local_default_coder"),
|
||||||
|
)
|
||||||
|
|
||||||
|
req_type = "llm"
|
||||||
|
if "vision" in profile_name or "vl" in model.lower():
|
||||||
|
req_type = "vision"
|
||||||
|
elif "coder" in profile_name or "code" in model.lower():
|
||||||
|
req_type = "code"
|
||||||
|
|
||||||
|
return ProfileRequirements(
|
||||||
|
profile_name=profile_name,
|
||||||
|
required_type=req_type,
|
||||||
|
prefer=[model] if model else [],
|
||||||
|
provider=provider,
|
||||||
|
fallback_profile=agent_cfg.get("fallback_llm"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── NCS-based selection ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def select_best_model(
|
||||||
|
reqs: ProfileRequirements,
|
||||||
|
capabilities: Dict[str, Any],
|
||||||
|
) -> Optional[ModelSelection]:
|
||||||
|
"""Choose the best served model from NCS capabilities.
|
||||||
|
|
||||||
|
Returns None if no suitable model found (caller should try static fallback).
|
||||||
|
"""
|
||||||
|
served = capabilities.get("served_models", [])
|
||||||
|
if not served:
|
||||||
|
return None
|
||||||
|
|
||||||
|
caps_age = time.time() - capabilities.get("_fetch_ts", time.time())
|
||||||
|
|
||||||
|
search_types = [reqs.required_type]
|
||||||
|
if reqs.required_type == "code":
|
||||||
|
search_types.append("llm")
|
||||||
|
if reqs.required_type == "llm":
|
||||||
|
search_types.append("code")
|
||||||
|
|
||||||
|
candidates = [m for m in served if m.get("type") in search_types]
|
||||||
|
if not candidates:
|
||||||
|
return None
|
||||||
|
|
||||||
|
prefer = reqs.prefer if reqs.prefer else []
|
||||||
|
|
||||||
|
for pref in prefer:
|
||||||
|
if pref == "*":
|
||||||
|
break
|
||||||
|
for m in candidates:
|
||||||
|
if pref == m.get("name") or pref in m.get("name", ""):
|
||||||
|
return _make_selection(m, capabilities, caps_age, reqs)
|
||||||
|
|
||||||
|
if candidates:
|
||||||
|
best = _pick_best_candidate(candidates)
|
||||||
|
return _make_selection(best, capabilities, caps_age, reqs)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_best_candidate(candidates: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||||
|
"""Prefer running models, then largest by size_gb."""
|
||||||
|
running = [m for m in candidates if m.get("running")]
|
||||||
|
pool = running if running else candidates
|
||||||
|
return max(pool, key=lambda m: m.get("size_gb", 0))
|
||||||
|
|
||||||
|
|
||||||
|
def _make_selection(
|
||||||
|
model: Dict[str, Any],
|
||||||
|
capabilities: Dict[str, Any],
|
||||||
|
caps_age: float,
|
||||||
|
reqs: ProfileRequirements,
|
||||||
|
) -> ModelSelection:
|
||||||
|
runtime = model.get("runtime", "ollama")
|
||||||
|
base_url = model.get("base_url", "")
|
||||||
|
if not base_url:
|
||||||
|
runtimes = capabilities.get("runtimes", {})
|
||||||
|
rt = runtimes.get(runtime, {})
|
||||||
|
base_url = rt.get("base_url", "")
|
||||||
|
|
||||||
|
return ModelSelection(
|
||||||
|
runtime=runtime,
|
||||||
|
name=model.get("name", ""),
|
||||||
|
model_type=model.get("type", "llm"),
|
||||||
|
base_url=base_url,
|
||||||
|
provider="ollama" if runtime in ("ollama", "llama_server") else runtime,
|
||||||
|
via_ncs=True,
|
||||||
|
caps_age_s=round(caps_age, 1),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Static fallback (from router-config profiles) ────────────────────────────
|
||||||
|
|
||||||
|
def static_fallback(
|
||||||
|
profile_name: str,
|
||||||
|
router_cfg: Dict[str, Any],
|
||||||
|
) -> Optional[ModelSelection]:
|
||||||
|
"""Build a ModelSelection from the static llm_profiles config."""
|
||||||
|
llm_profiles = router_cfg.get("llm_profiles", {})
|
||||||
|
cfg = llm_profiles.get(profile_name, {})
|
||||||
|
if not cfg:
|
||||||
|
return None
|
||||||
|
|
||||||
|
provider = cfg.get("provider", "ollama")
|
||||||
|
|
||||||
|
return ModelSelection(
|
||||||
|
runtime="cloud" if provider in CLOUD_PROVIDERS else "ollama",
|
||||||
|
name=cfg.get("model", ""),
|
||||||
|
model_type="cloud_llm" if provider in CLOUD_PROVIDERS else "llm",
|
||||||
|
base_url=cfg.get("base_url", ""),
|
||||||
|
provider=provider,
|
||||||
|
via_ncs=False,
|
||||||
|
fallback_reason="NCS unavailable or no match; using static config",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Top-level orchestrator ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def select_model_for_agent(
|
||||||
|
agent_id: str,
|
||||||
|
agent_cfg: Dict[str, Any],
|
||||||
|
router_cfg: Dict[str, Any],
|
||||||
|
capabilities: Optional[Dict[str, Any]],
|
||||||
|
request_model: Optional[str] = None,
|
||||||
|
) -> ModelSelection:
|
||||||
|
"""Full selection pipeline: resolve profile → NCS → static fallback.
|
||||||
|
|
||||||
|
This is the single entry point the router calls for each request.
|
||||||
|
"""
|
||||||
|
profile = resolve_effective_profile(
|
||||||
|
agent_id, agent_cfg, router_cfg, request_model,
|
||||||
|
)
|
||||||
|
|
||||||
|
reqs = profile_requirements(profile, agent_cfg, router_cfg)
|
||||||
|
|
||||||
|
if reqs.required_type == "cloud_llm":
|
||||||
|
static = static_fallback(profile, router_cfg)
|
||||||
|
if static:
|
||||||
|
static.fallback_reason = ""
|
||||||
|
logger.info(
|
||||||
|
f"[select] agent={agent_id} profile={profile} → cloud "
|
||||||
|
f"provider={static.provider} model={static.name}"
|
||||||
|
)
|
||||||
|
return static
|
||||||
|
|
||||||
|
if capabilities and capabilities.get("served_models"):
|
||||||
|
sel = select_best_model(reqs, capabilities)
|
||||||
|
if sel:
|
||||||
|
logger.info(
|
||||||
|
f"[select] agent={agent_id} profile={profile} → NCS "
|
||||||
|
f"runtime={sel.runtime} model={sel.name} caps_age={sel.caps_age_s}s"
|
||||||
|
)
|
||||||
|
return sel
|
||||||
|
logger.warning(
|
||||||
|
f"[select] agent={agent_id} profile={profile} → NCS had no match "
|
||||||
|
f"for type={reqs.required_type}; trying static"
|
||||||
|
)
|
||||||
|
|
||||||
|
static = static_fallback(profile, router_cfg)
|
||||||
|
if static:
|
||||||
|
logger.info(
|
||||||
|
f"[select] agent={agent_id} profile={profile} → static "
|
||||||
|
f"provider={static.provider} model={static.name} "
|
||||||
|
f"reason={static.fallback_reason}"
|
||||||
|
)
|
||||||
|
return static
|
||||||
|
|
||||||
|
if reqs.fallback_profile and reqs.fallback_profile != profile:
|
||||||
|
logger.warning(
|
||||||
|
f"[select] agent={agent_id} profile={profile} not found → "
|
||||||
|
f"trying fallback_profile={reqs.fallback_profile}"
|
||||||
|
)
|
||||||
|
return await select_model_for_agent(
|
||||||
|
agent_id, agent_cfg, router_cfg, capabilities,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.error(
|
||||||
|
f"[select] agent={agent_id} profile={profile} → ALL selection "
|
||||||
|
f"methods failed. Using hard default qwen3:14b"
|
||||||
|
)
|
||||||
|
return ModelSelection(
|
||||||
|
runtime="ollama",
|
||||||
|
name="qwen3:14b",
|
||||||
|
model_type="llm",
|
||||||
|
base_url="http://host.docker.internal:11434",
|
||||||
|
provider="ollama",
|
||||||
|
via_ncs=False,
|
||||||
|
fallback_reason="all methods failed; hard default",
|
||||||
|
)
|
||||||
@@ -128,11 +128,11 @@ llm_profiles:
|
|||||||
provider: grok
|
provider: grok
|
||||||
base_url: https://api.x.ai
|
base_url: https://api.x.ai
|
||||||
api_key_env: GROK_API_KEY
|
api_key_env: GROK_API_KEY
|
||||||
model: grok-2-1212
|
model: grok-4-1-fast-reasoning
|
||||||
max_tokens: 2048
|
max_tokens: 2048
|
||||||
temperature: 0.2
|
temperature: 0.2
|
||||||
timeout_ms: 60000
|
timeout_ms: 60000
|
||||||
description: "Grok API для SOFIIA (Chief AI Architect)"
|
description: "Grok 4.1 Fast Reasoning для SOFIIA (Chief AI Architect)"
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Node Capabilities
|
# Node Capabilities
|
||||||
@@ -141,6 +141,72 @@ node_capabilities:
|
|||||||
url: http://node-capabilities:8099/capabilities
|
url: http://node-capabilities:8099/capabilities
|
||||||
cache_ttl_sec: 30
|
cache_ttl_sec: 30
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Selection Policies (NCS-first model selection)
|
||||||
|
# ============================================================================
|
||||||
|
# Router uses these to map profile → required_type + prefer order.
|
||||||
|
# NCS picks the best served model matching these requirements.
|
||||||
|
# Cloud profiles skip NCS; if cloud fails, fallback_profile is used via NCS.
|
||||||
|
selection_policies:
|
||||||
|
local_default_coder:
|
||||||
|
required_type: llm
|
||||||
|
prefer: ["qwen3:14b", "qwen3.5:35b-a3b", "*"]
|
||||||
|
|
||||||
|
local_default_reasoner:
|
||||||
|
required_type: llm
|
||||||
|
prefer: ["qwen3.5:35b-a3b", "deepseek-r1:70b", "*"]
|
||||||
|
|
||||||
|
qwen3_strategist_8b:
|
||||||
|
required_type: llm
|
||||||
|
prefer: ["qwen3:14b", "qwen3.5:35b-a3b", "*"]
|
||||||
|
|
||||||
|
qwen3_support_8b:
|
||||||
|
required_type: llm
|
||||||
|
prefer: ["qwen3:14b", "gemma3:latest", "*"]
|
||||||
|
|
||||||
|
qwen3_science_8b:
|
||||||
|
required_type: llm
|
||||||
|
prefer: ["qwen3:14b", "qwen3.5:35b-a3b", "*"]
|
||||||
|
|
||||||
|
qwen3_creative_8b:
|
||||||
|
required_type: llm
|
||||||
|
prefer: ["qwen3:14b", "*"]
|
||||||
|
|
||||||
|
qwen3_5_35b_a3b:
|
||||||
|
required_type: llm
|
||||||
|
prefer: ["qwen3.5:35b-a3b", "*"]
|
||||||
|
|
||||||
|
qwen3_vision_8b:
|
||||||
|
required_type: vision
|
||||||
|
prefer: ["llava:13b", "*"]
|
||||||
|
|
||||||
|
qwen2_5_3b_service:
|
||||||
|
required_type: llm
|
||||||
|
prefer: ["phi3:latest", "gemma3:latest", "qwen3:14b"]
|
||||||
|
|
||||||
|
mistral_community_12b:
|
||||||
|
required_type: llm
|
||||||
|
prefer: ["mistral-nemo:12b", "qwen3:14b", "*"]
|
||||||
|
|
||||||
|
cloud_deepseek:
|
||||||
|
required_type: cloud_llm
|
||||||
|
provider: deepseek
|
||||||
|
fallback_profile: local_default_coder
|
||||||
|
|
||||||
|
cloud_grok:
|
||||||
|
required_type: cloud_llm
|
||||||
|
provider: grok
|
||||||
|
fallback_profile: local_default_coder
|
||||||
|
|
||||||
|
cloud_mistral:
|
||||||
|
required_type: cloud_llm
|
||||||
|
provider: mistral
|
||||||
|
fallback_profile: local_default_coder
|
||||||
|
|
||||||
|
vision_default:
|
||||||
|
required_type: vision
|
||||||
|
prefer: ["llava:13b", "*"]
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Orchestrator Providers
|
# Orchestrator Providers
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user