From de8bb36462e3292fc495a1f209c0491a0b9bb467 Mon Sep 17 00:00:00 2001
From: Apple <apple@MacBook-Pro.local>
Date: Wed, 18 Feb 2026 10:40:40 -0800
Subject: [PATCH] docs+router: formalize runtime policy and remove temporary
 cloud-first code override

---
 docs/AGENT_RUNTIME_POLICY.md                | 31 +++++++++++++++++++++
 docs/runbooks/CONFIG_GENERATION_WORKFLOW.md | 28 +++++++++++++++++++
 services/router/main.py                     | 16 -----------
 3 files changed, 59 insertions(+), 16 deletions(-)
 create mode 100644 docs/AGENT_RUNTIME_POLICY.md
 create mode 100644 docs/runbooks/CONFIG_GENERATION_WORKFLOW.md

diff --git a/docs/AGENT_RUNTIME_POLICY.md b/docs/AGENT_RUNTIME_POLICY.md
new file mode 100644
index 00000000..ce0f5511
--- /dev/null
+++ b/docs/AGENT_RUNTIME_POLICY.md
@@ -0,0 +1,31 @@
+# Agent Runtime Policy (NODE1)
+
+## Purpose
+Single policy for runtime model selection and orchestration behavior.
+
+## Agent Classes
+- `top_level`: Telegram-facing orchestrators with optional CrewAI teams.
+- `internal`: infrastructure/service agents (not Telegram-facing by default).
+
+## Model Policy
+- Top-level agents: `cloud_deepseek` primary + `cloud_mistral` fallback.
+- Exception: `sofiia` uses `cloud_grok` primary + `cloud_deepseek` fallback.
+- Internal agents: local-first (`ollama` profiles).
+  - `monitor`: `qwen2_5_3b_service`.
+  - `devtools`: local by default; cloud override only for explicitly heavy tasks.
+  - `comfy`: no chat LLM profile (tool/service execution path).
+
+## Orchestration Policy (CrewAI)
+- Top-level agents are direct-LLM first for simple requests.
+- CrewAI is on-demand for complex/detailed requests (`force_detailed` / `requires_complex_reasoning`).
+- Fast path must cap active subagents to 2-3 roles.
+- Final user response is always formed by the top-level agent.
+
+## Routing Constraints
+- Do not use code-level hard overrides for cloud/local policy if it can be expressed in `router-config.yml`.
+- Prefer deterministic routing rules per agent over generic fallbacks.
+
+## Source of Truth
+- Canonical intent: `config/agent_registry.yml`.
+- Runtime implementation: `services/router/router-config.yml`.
+- Any policy change must be reflected in both until router config generation is fully automated.
diff --git a/docs/runbooks/CONFIG_GENERATION_WORKFLOW.md b/docs/runbooks/CONFIG_GENERATION_WORKFLOW.md
new file mode 100644
index 00000000..fd9eecc6
--- /dev/null
+++ b/docs/runbooks/CONFIG_GENERATION_WORKFLOW.md
@@ -0,0 +1,28 @@
+# Config Generation Workflow
+
+## Goal
+Keep agent policy consistent and avoid drift between registry and runtime router config.
+
+## Current Reality
+- `tools/agents generate` updates:
+  - `config/router_agents.json`
+  - `config/crewai_agents.json`
+  - `gateway-bot/agent_registry.json`
+- `services/router/router-config.yml` is still runtime-critical and currently not fully regenerated from registry.
+
+## Update Procedure
+1. Edit canonical policy in `config/agent_registry.yml`.
+2. Run `python3 tools/agents generate`.
+3. Manually sync runtime-critical rules in `services/router/router-config.yml`:
+   - llm profiles (`cloud_deepseek`, `cloud_mistral`, `cloud_grok`, local profiles)
+   - per-agent routing rules (`*_agent` rules)
+   - infra agents local-first constraints
+4. Deploy router and run smoke:
+   - infer: all top-level agents + `monitor`
+   - webhook: `/telegram/webhook` + selected agent webhook
+   - canary: `ops/monitor_canary_summary.sh`
+5. Commit policy docs + config updates in one PR/commit set.
+
+## Hard Rules
+- No temporary code-level model policy hacks in `services/router/main.py`.
+- If emergency hotfix is needed, remove it in the next stabilization commit.
diff --git a/services/router/main.py b/services/router/main.py
index b95b1f7c..212d5a34 100644
--- a/services/router/main.py
+++ b/services/router/main.py
@@ -48,14 +48,6 @@ except ImportError:
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-_CLOUD_FIRST_AGENT_SET = {
-    x.strip().lower()
-    for x in os.getenv(
-        "CLOUD_FIRST_AGENTS",
-        "daarwizz,helion,alateya,druid,nutra,agromatrix,greenfood,clan,eonarch,yaromir,soul,senpai,oneok,sofiia,monitor",
-    ).split(",")
-    if x.strip()
-}
 
 TRUSTED_DOMAINS_CONFIG_PATH = os.getenv("TRUSTED_DOMAINS_CONFIG_PATH", "./trusted_domains.yml")
 _trusted_domains_cache: Dict[str, Any] = {"mtime": None, "data": {}}
@@ -1459,16 +1451,8 @@ async def agent_infer(agent_id: str, request: InferRequest):
             logger.exception(f"❌ CrewAI error: {e}, falling back to direct LLM")
 
     default_llm = agent_config.get("default_llm", "qwen3:8b")
-    if agent_id.lower() in _CLOUD_FIRST_AGENT_SET and not (metadata or {}).get("force_local_model"):
-        default_llm = "cloud_deepseek"
 
     routing_rules = router_config.get("routing", [])
-    if agent_id.lower() in _CLOUD_FIRST_AGENT_SET and not (metadata or {}).get("force_local_model"):
-        # For cloud-first agents, ignore local-only routing rules unless explicitly forced.
-        routing_rules = [
-            rule for rule in routing_rules
-            if not (isinstance(rule.get("use_llm"), str) and not rule.get("use_llm", "").startswith("cloud_"))
-        ]
     default_llm = _select_default_llm(agent_id, metadata, default_llm, routing_rules)
     
     # Get LLM profile configuration