P1: NCS-first model selection + NATS capabilities + Grok 4.1
Router model selection: - New model_select.py: resolve_effective_profile → profile_requirements → select_best_model pipeline. NCS-first with graceful static fallback. - selection_policies in router-config.node2.yml define prefer order per profile without hardcoding models (e.g. local_default_coder prefers qwen3:14b then qwen3.5:35b-a3b). - Cloud profiles (cloud_grok, cloud_deepseek) skip NCS; on cloud failure use fallback_profile via NCS for local selection. - Structured logs: selected_profile, required_type, runtime, model, caps_age_s, fallback_reason on every infer request. Grok model fix: - grok-2-1212 no longer exists on xAI API → updated to grok-4-1-fast-reasoning across all 3 hardcoded locations in main.py and router-config.node2.yml. NCS NATS request/reply: - node-capabilities subscribes to node.noda2.capabilities.get (NATS request/reply). Enabled via ENABLE_NATS_CAPS=true in compose. - NODA1 router can query NODA2 capabilities over NATS leafnode without HTTP connectivity. Verified: - NCS: 14 served models from Ollama+Swapper+llama-server - NATS: request/reply returns full capabilities JSON - Sofiia: cloud_grok → grok-4-1-fast-reasoning (tested, 200 OK) - Helion: NCS → qwen3:14b via Ollama (caps_age=23.7s cache hit) - Router health: ok Made-with: Cursor
This commit is contained in:
@@ -46,6 +46,15 @@ except ImportError:
|
||||
RUNTIME_GUARD_AVAILABLE = False
|
||||
RuntimeGuard = None
|
||||
|
||||
# NCS-first model selection
|
||||
try:
|
||||
import capabilities_client
|
||||
from model_select import select_model_for_agent, ModelSelection, CLOUD_PROVIDERS as NCS_CLOUD_PROVIDERS
|
||||
NCS_AVAILABLE = True
|
||||
except ImportError:
|
||||
NCS_AVAILABLE = False
|
||||
capabilities_client = None # type: ignore[assignment]
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -756,6 +765,23 @@ async def startup_event():
|
||||
else:
|
||||
tool_manager = None
|
||||
|
||||
# Initialize Node Capabilities client
|
||||
if NCS_AVAILABLE and capabilities_client:
|
||||
ncs_cfg = router_config.get("node_capabilities", {})
|
||||
ncs_url = ncs_cfg.get("url", "") or os.getenv("NODE_CAPABILITIES_URL", "")
|
||||
ncs_ttl = ncs_cfg.get("cache_ttl_sec", 30)
|
||||
if ncs_url:
|
||||
capabilities_client.configure(url=ncs_url, ttl=ncs_ttl)
|
||||
caps = await capabilities_client.fetch_capabilities()
|
||||
served = caps.get("served_count", 0)
|
||||
logger.info(f"✅ NCS configured: url={ncs_url} ttl={ncs_ttl}s served={served} models")
|
||||
else:
|
||||
logger.warning("⚠️ NCS url not configured; model selection will use static config only")
|
||||
elif NCS_AVAILABLE:
|
||||
logger.info("ℹ️ NCS modules loaded but capabilities_client is None")
|
||||
else:
|
||||
logger.warning("⚠️ NCS modules not available (model_select / capabilities_client import failed)")
|
||||
|
||||
# Initialize CLAN runtime guard
|
||||
if RUNTIME_GUARD_AVAILABLE and RuntimeGuard and CLAN_RUNTIME_GUARD_ENABLED:
|
||||
try:
|
||||
@@ -1279,7 +1305,7 @@ async def internal_llm_complete(request: InternalLLMRequest):
|
||||
cloud_providers = [
|
||||
{"name": "deepseek", "api_key_env": "DEEPSEEK_API_KEY", "base_url": "https://api.deepseek.com", "model": "deepseek-chat", "timeout": 60},
|
||||
{"name": "mistral", "api_key_env": "MISTRAL_API_KEY", "base_url": "https://api.mistral.ai", "model": "mistral-large-latest", "timeout": 60},
|
||||
{"name": "grok", "api_key_env": "GROK_API_KEY", "base_url": "https://api.x.ai", "model": "grok-2-1212", "timeout": 60}
|
||||
{"name": "grok", "api_key_env": "GROK_API_KEY", "base_url": "https://api.x.ai", "model": "grok-4-1-fast-reasoning", "timeout": 60}
|
||||
]
|
||||
|
||||
# Respect configured provider: local profiles should stay local.
|
||||
@@ -1603,38 +1629,68 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
|
||||
cloud_provider_names = {"deepseek", "mistral", "grok", "openai", "anthropic"}
|
||||
|
||||
llm_profiles = router_config.get("llm_profiles", {})
|
||||
llm_profile = llm_profiles.get(default_llm, {})
|
||||
|
||||
if not llm_profile:
|
||||
fallback_llm = agent_config.get("fallback_llm", "local_default_coder")
|
||||
llm_profile = llm_profiles.get(fallback_llm, {})
|
||||
logger.warning(
|
||||
f"⚠️ Profile '{default_llm}' not found for agent={agent_id} "
|
||||
f"→ fallback to '{fallback_llm}' (local). "
|
||||
f"NOT defaulting to cloud silently."
|
||||
)
|
||||
default_llm = fallback_llm
|
||||
|
||||
provider = llm_profile.get("provider", "ollama")
|
||||
logger.info(f"🎯 Agent={agent_id}: profile={default_llm} provider={provider} model={llm_profile.get('model', '?')}")
|
||||
# ── NCS-first model selection ────────────────────────────────────────
|
||||
ncs_selection = None
|
||||
if NCS_AVAILABLE and capabilities_client:
|
||||
try:
|
||||
caps = await capabilities_client.fetch_capabilities()
|
||||
if caps:
|
||||
caps["_fetch_ts"] = capabilities_client._cache_ts
|
||||
ncs_selection = await select_model_for_agent(
|
||||
agent_id, agent_config, router_config, caps, request.model,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ NCS selection error: {e}; falling back to static config")
|
||||
|
||||
# If explicit model is requested, try to resolve it to configured cloud profile.
|
||||
if request.model:
|
||||
for profile_name, profile in llm_profiles.items():
|
||||
if profile.get("model") == request.model and profile.get("provider") in cloud_provider_names:
|
||||
llm_profile = profile
|
||||
provider = profile.get("provider", provider)
|
||||
default_llm = profile_name
|
||||
logger.info(f"🎛️ Matched request.model={request.model} to profile={profile_name} provider={provider}")
|
||||
break
|
||||
|
||||
# Determine model name
|
||||
if provider in ["deepseek", "openai", "anthropic", "mistral"]:
|
||||
model = llm_profile.get("model", "deepseek-chat")
|
||||
llm_profiles = router_config.get("llm_profiles", {})
|
||||
|
||||
if ncs_selection and ncs_selection.name:
|
||||
provider = ncs_selection.provider
|
||||
model = ncs_selection.name
|
||||
llm_profile = llm_profiles.get(default_llm, {})
|
||||
if ncs_selection.base_url and provider == "ollama":
|
||||
llm_profile = {**llm_profile, "base_url": ncs_selection.base_url}
|
||||
logger.info(
|
||||
f"🎯 NCS select: agent={agent_id} profile={default_llm} "
|
||||
f"→ runtime={ncs_selection.runtime} model={model} "
|
||||
f"provider={provider} via_ncs={ncs_selection.via_ncs} "
|
||||
f"caps_age={ncs_selection.caps_age_s}s "
|
||||
f"fallback={ncs_selection.fallback_reason or 'none'}"
|
||||
)
|
||||
else:
|
||||
# For local ollama, use swapper model name format
|
||||
model = request.model or "qwen3:8b"
|
||||
llm_profile = llm_profiles.get(default_llm, {})
|
||||
if not llm_profile:
|
||||
fallback_llm = agent_config.get("fallback_llm", "local_default_coder")
|
||||
llm_profile = llm_profiles.get(fallback_llm, {})
|
||||
logger.warning(
|
||||
f"⚠️ Profile '{default_llm}' not found for agent={agent_id} "
|
||||
f"→ fallback to '{fallback_llm}' (local). "
|
||||
f"NOT defaulting to cloud silently."
|
||||
)
|
||||
default_llm = fallback_llm
|
||||
|
||||
provider = llm_profile.get("provider", "ollama")
|
||||
|
||||
if request.model:
|
||||
for profile_name, profile in llm_profiles.items():
|
||||
if profile.get("model") == request.model and profile.get("provider") in cloud_provider_names:
|
||||
llm_profile = profile
|
||||
provider = profile.get("provider", provider)
|
||||
default_llm = profile_name
|
||||
logger.info(f"🎛️ Matched request.model={request.model} to profile={profile_name} provider={provider}")
|
||||
break
|
||||
|
||||
if provider in ["deepseek", "openai", "anthropic", "mistral"]:
|
||||
model = llm_profile.get("model", "deepseek-chat")
|
||||
elif provider == "grok":
|
||||
model = llm_profile.get("model", "grok-4-1-fast-reasoning")
|
||||
else:
|
||||
model = request.model or llm_profile.get("model", "qwen3:14b")
|
||||
|
||||
logger.info(
|
||||
f"🎯 Static select: agent={agent_id} profile={default_llm} "
|
||||
f"provider={provider} model={model}"
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# VISION PROCESSING (if images present)
|
||||
@@ -1863,7 +1919,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
"name": "grok",
|
||||
"api_key_env": "GROK_API_KEY",
|
||||
"base_url": "https://api.x.ai",
|
||||
"model": "grok-2-1212",
|
||||
"model": "grok-4-1-fast-reasoning",
|
||||
"timeout": 60
|
||||
}
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user