From de8bb36462e3292fc495a1f209c0491a0b9bb467 Mon Sep 17 00:00:00 2001 From: Apple Date: Wed, 18 Feb 2026 10:40:40 -0800 Subject: [PATCH] docs+router: formalize runtime policy and remove temporary cloud-first code override --- docs/AGENT_RUNTIME_POLICY.md | 31 +++++++++++++++++++++ docs/runbooks/CONFIG_GENERATION_WORKFLOW.md | 28 +++++++++++++++++++ services/router/main.py | 16 ----------- 3 files changed, 59 insertions(+), 16 deletions(-) create mode 100644 docs/AGENT_RUNTIME_POLICY.md create mode 100644 docs/runbooks/CONFIG_GENERATION_WORKFLOW.md diff --git a/docs/AGENT_RUNTIME_POLICY.md b/docs/AGENT_RUNTIME_POLICY.md new file mode 100644 index 00000000..ce0f5511 --- /dev/null +++ b/docs/AGENT_RUNTIME_POLICY.md @@ -0,0 +1,31 @@ +# Agent Runtime Policy (NODE1) + +## Purpose +Single policy for runtime model selection and orchestration behavior. + +## Agent Classes +- `top_level`: Telegram-facing orchestrators with optional CrewAI teams. +- `internal`: infrastructure/service agents (not Telegram-facing by default). + +## Model Policy +- Top-level agents: `cloud_deepseek` primary + `cloud_mistral` fallback. +- Exception: `sofiia` uses `cloud_grok` primary + `cloud_deepseek` fallback. +- Internal agents: local-first (`ollama` profiles). + - `monitor`: `qwen2_5_3b_service`. + - `devtools`: local by default; cloud override only for explicitly heavy tasks. + - `comfy`: no chat LLM profile (tool/service execution path). + +## Orchestration Policy (CrewAI) +- Top-level agents are direct-LLM first for simple requests. +- CrewAI is on-demand for complex/detailed requests (`force_detailed` / `requires_complex_reasoning`). +- Fast path must cap active subagents to 2-3 roles. +- Final user response is always formed by the top-level agent. + +## Routing Constraints +- Do not use code-level hard overrides for cloud/local policy if it can be expressed in `router-config.yml`. +- Prefer deterministic routing rules per agent over generic fallbacks. + +## Source of Truth +- Canonical intent: `config/agent_registry.yml`. +- Runtime implementation: `services/router/router-config.yml`. +- Any policy change must be reflected in both until router config generation is fully automated. diff --git a/docs/runbooks/CONFIG_GENERATION_WORKFLOW.md b/docs/runbooks/CONFIG_GENERATION_WORKFLOW.md new file mode 100644 index 00000000..fd9eecc6 --- /dev/null +++ b/docs/runbooks/CONFIG_GENERATION_WORKFLOW.md @@ -0,0 +1,28 @@ +# Config Generation Workflow + +## Goal +Keep agent policy consistent and avoid drift between registry and runtime router config. + +## Current Reality +- `tools/agents generate` updates: + - `config/router_agents.json` + - `config/crewai_agents.json` + - `gateway-bot/agent_registry.json` +- `services/router/router-config.yml` is still runtime-critical and currently not fully regenerated from registry. + +## Update Procedure +1. Edit canonical policy in `config/agent_registry.yml`. +2. Run `python3 tools/agents generate`. +3. Manually sync runtime-critical rules in `services/router/router-config.yml`: + - llm profiles (`cloud_deepseek`, `cloud_mistral`, `cloud_grok`, local profiles) + - per-agent routing rules (`*_agent` rules) + - infra agents local-first constraints +4. Deploy router and run smoke: + - infer: all top-level agents + `monitor` + - webhook: `/telegram/webhook` + selected agent webhook + - canary: `ops/monitor_canary_summary.sh` +5. Commit policy docs + config updates in one PR/commit set. + +## Hard Rules +- No temporary code-level model policy hacks in `services/router/main.py`. +- If emergency hotfix is needed, remove it in the next stabilization commit. diff --git a/services/router/main.py b/services/router/main.py index b95b1f7c..212d5a34 100644 --- a/services/router/main.py +++ b/services/router/main.py @@ -48,14 +48,6 @@ except ImportError: logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -_CLOUD_FIRST_AGENT_SET = { - x.strip().lower() - for x in os.getenv( - "CLOUD_FIRST_AGENTS", - "daarwizz,helion,alateya,druid,nutra,agromatrix,greenfood,clan,eonarch,yaromir,soul,senpai,oneok,sofiia,monitor", - ).split(",") - if x.strip() -} TRUSTED_DOMAINS_CONFIG_PATH = os.getenv("TRUSTED_DOMAINS_CONFIG_PATH", "./trusted_domains.yml") _trusted_domains_cache: Dict[str, Any] = {"mtime": None, "data": {}} @@ -1459,16 +1451,8 @@ async def agent_infer(agent_id: str, request: InferRequest): logger.exception(f"❌ CrewAI error: {e}, falling back to direct LLM") default_llm = agent_config.get("default_llm", "qwen3:8b") - if agent_id.lower() in _CLOUD_FIRST_AGENT_SET and not (metadata or {}).get("force_local_model"): - default_llm = "cloud_deepseek" routing_rules = router_config.get("routing", []) - if agent_id.lower() in _CLOUD_FIRST_AGENT_SET and not (metadata or {}).get("force_local_model"): - # For cloud-first agents, ignore local-only routing rules unless explicitly forced. - routing_rules = [ - rule for rule in routing_rules - if not (isinstance(rule.get("use_llm"), str) and not rule.get("use_llm", "").startswith("cloud_")) - ] default_llm = _select_default_llm(agent_id, metadata, default_llm, routing_rules) # Get LLM profile configuration