snapshot: NODE1 production state 2026-02-09

Complete snapshot of /opt/microdao-daarion/ from NODE1 (144.76.224.179). This represents the actual running production code that has diverged significantly from the previous main branch. Key changes from old main: - Gateway (http_api.py): expanded from ~40KB to 164KB with full agent support - Router: new /v1/agents/{id}/infer endpoint with vision + DeepSeek routing - Behavior Policy: SOWA v2.2 (3-level: FULL/ACK/SILENT) - Agent Registry: config/agent_registry.yml as single source of truth - 13 agents configured (was 3) - Memory service integration - CrewAI teams and roles Excluded from snapshot: venv/, .env, data/, backups, .tgz archives Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-09 08:46:46 -08:00
parent 134c044c21
commit ef3473db21
9473 changed files with 408933 additions and 2769877 deletions
--- a/services/router/main.py
+++ b/services/router/main.py
@@ -1,4 +1,5 @@
 from fastapi import FastAPI, HTTPException
+from fastapi.responses import Response
 from pydantic import BaseModel
 from typing import Literal, Optional, Dict, Any, List
 import asyncio
@@ -7,6 +8,16 @@ import os
 import yaml
 import httpx
 import logging
+import time  # For latency metrics
+
+# CrewAI Integration
+try:
+    from crewai_client import should_use_crewai, call_crewai, get_crewai_health
+    CREWAI_CLIENT_AVAILABLE = True
+except ImportError:
+    CREWAI_CLIENT_AVAILABLE = False
+    should_use_crewai = None
+    call_crewai = None
 from neo4j import AsyncGraphDatabase

 # Memory Retrieval Pipeline v3.0
@@ -41,6 +52,10 @@ OCR_URL = os.getenv("OCR_URL", "http://swapper-service:8890")  # Swapper /ocr en
 DOCUMENT_URL = os.getenv("DOCUMENT_URL", "http://swapper-service:8890")  # Swapper /document endpoint
 CITY_SERVICE_URL = os.getenv("CITY_SERVICE_URL", "http://daarion-city-service:7001")

+# CrewAI Routing Configuration
+CREWAI_ROUTING_ENABLED = os.getenv("CREWAI_ROUTING_ENABLED", "true").lower() == "true"
+CREWAI_URL = os.getenv("CREWAI_URL", "http://dagi-staging-crewai-service:9010")
+
 # Neo4j Configuration
 NEO4J_URI = os.getenv("NEO4J_BOLT_URL", "bolt://neo4j:7687")
 NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
@@ -269,6 +284,21 @@ async def publish_agent_invocation(invocation: AgentInvocation):
    else:
        print(f"⚠️ NATS not available, invocation not published: {invocation.json()}")

+
+
+# ==============================================================
+# PROMETHEUS METRICS ENDPOINT
+# ==============================================================
+@app.get("/metrics")
+async def prometheus_metrics():
+    """Prometheus metrics endpoint."""
+    try:
+        from agent_metrics import get_metrics, get_content_type
+        return Response(content=get_metrics(), media_type=get_content_type())
+    except Exception as e:
+        logger.error(f"Metrics error: {e}")
+        return Response(content=b"# Error generating metrics", media_type="text/plain")
+
@app.get("/health")
 async def health():
    """Health check endpoint"""
@@ -346,6 +376,31 @@ class InferResponse(BaseModel):
    image_base64: Optional[str] = None  # Generated image in base64 format


+
+
+# =========================================================================
+# INTERNAL LLM API (for CrewAI and internal services)
+# =========================================================================
+
+class InternalLLMRequest(BaseModel):
+    prompt: str
+    system_prompt: Optional[str] = None
+    llm_profile: Optional[str] = "reasoning"
+    model: Optional[str] = None
+    max_tokens: Optional[int] = 2048
+    temperature: Optional[float] = 0.2
+    role_context: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+
+
+class InternalLLMResponse(BaseModel):
+    text: str
+    model: str
+    provider: str
+    tokens_used: int = 0
+    latency_ms: int = 0
+
+
 class BackendStatus(BaseModel):
    """Status of a backend service"""
    name: str
@@ -447,6 +502,100 @@ async def get_backends_status():
    return backends


+
+
+# =========================================================================
+# INTERNAL LLM COMPLETE ENDPOINT (for CrewAI)
+# =========================================================================
+
+@app.post("/internal/llm/complete", response_model=InternalLLMResponse)
+async def internal_llm_complete(request: InternalLLMRequest):
+    """
+    Internal LLM completion endpoint.
+    NO routing, NO CrewAI decision, NO agent selection.
+    Used by CrewAI service for multi-role orchestration.
+    """
+    import time as time_module
+    t0 = time_module.time()
+    
+    logger.info(f"Internal LLM: profile={request.llm_profile}, role={request.role_context}")
+    
+    # Get LLM profile configuration
+    llm_profiles = router_config.get("llm_profiles", {})
+    profile_name = request.llm_profile or "reasoning"
+    llm_profile = llm_profiles.get(profile_name, {})
+    
+    provider = llm_profile.get("provider", "deepseek")
+    model = request.model or llm_profile.get("model", "deepseek-chat")
+    max_tokens = request.max_tokens or llm_profile.get("max_tokens", 2048)
+    temperature = request.temperature or llm_profile.get("temperature", 0.2)
+    
+    # Build messages
+    messages = []
+    if request.system_prompt:
+        system_content = request.system_prompt
+        if request.role_context:
+            system_content = f"[Role: {request.role_context}]\n\n{system_content}"
+        messages.append({"role": "system", "content": system_content})
+    elif request.role_context:
+        messages.append({"role": "system", "content": f"You are acting as {request.role_context}. Respond professionally."})
+    
+    messages.append({"role": "user", "content": request.prompt})
+    
+    # Cloud providers
+    cloud_providers = [
+        {"name": "deepseek", "api_key_env": "DEEPSEEK_API_KEY", "base_url": "https://api.deepseek.com", "model": "deepseek-chat", "timeout": 60},
+        {"name": "mistral", "api_key_env": "MISTRAL_API_KEY", "base_url": "https://api.mistral.ai", "model": "mistral-large-latest", "timeout": 60},
+        {"name": "grok", "api_key_env": "GROK_API_KEY", "base_url": "https://api.x.ai", "model": "grok-2-1212", "timeout": 60}
+    ]
+    
+    if provider in ["deepseek", "mistral", "grok"]:
+        cloud_providers = sorted(cloud_providers, key=lambda x: 0 if x["name"] == provider else 1)
+    
+    # Try cloud providers
+    for cloud in cloud_providers:
+        api_key = os.getenv(cloud["api_key_env"])
+        if not api_key:
+            continue
+        
+        try:
+            logger.debug(f"Internal LLM trying {cloud['name']}")
+            cloud_resp = await http_client.post(
+                f"{cloud['base_url']}/v1/chat/completions",
+                headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
+                json={"model": cloud["model"], "messages": messages, "max_tokens": max_tokens, "temperature": temperature, "stream": False},
+                timeout=cloud["timeout"]
+            )
+            
+            if cloud_resp.status_code == 200:
+                data = cloud_resp.json()
+                response_text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
+                tokens = data.get("usage", {}).get("total_tokens", 0)
+                latency = int((time_module.time() - t0) * 1000)
+                logger.info(f"Internal LLM success: {cloud['name']}, {tokens} tokens, {latency}ms")
+                return InternalLLMResponse(text=response_text, model=cloud["model"], provider=cloud["name"], tokens_used=tokens, latency_ms=latency)
+        except Exception as e:
+            logger.warning(f"Internal LLM {cloud['name']} failed: {e}")
+            continue
+    
+    # Fallback to Ollama
+    try:
+        logger.info("Internal LLM fallback to Ollama")
+        ollama_resp = await http_client.post(
+            "http://172.18.0.1:11434/api/generate",
+            json={"model": "qwen3:8b", "prompt": request.prompt, "system": request.system_prompt or "", "stream": False, "options": {"num_predict": max_tokens, "temperature": temperature}},
+            timeout=120.0
+        )
+        if ollama_resp.status_code == 200:
+            data = ollama_resp.json()
+            latency = int((time_module.time() - t0) * 1000)
+            return InternalLLMResponse(text=data.get("response", ""), model="qwen3:8b", provider="ollama", tokens_used=0, latency_ms=latency)
+    except Exception as e:
+        logger.error(f"Internal LLM Ollama failed: {e}")
+    
+    raise HTTPException(status_code=503, detail="All LLM providers unavailable")
+
+
@app.post("/v1/agents/{agent_id}/infer", response_model=InferResponse)
 async def agent_infer(agent_id: str, request: InferRequest):
    """
@@ -519,9 +668,73 @@ async def agent_infer(agent_id: str, request: InferRequest):
            system_prompt = agent_config.get("system_prompt")
    
    # Determine which backend to use
-    # Use router config to get default model for agent, fallback to qwen3-8b
+    # Use router config to get default model for agent, fallback to qwen3:8b
    agent_config = router_config.get("agents", {}).get(agent_id, {})
-    default_llm = agent_config.get("default_llm", "qwen3-8b")
+
+    # =========================================================================
+    # CREWAI DECISION: Use orchestration or direct LLM?
+    # =========================================================================
+    if CREWAI_ROUTING_ENABLED and CREWAI_CLIENT_AVAILABLE:
+        try:
+            # Get agent CrewAI config from registry (or router_config fallback)
+            crewai_cfg = agent_config.get("crewai", {})
+            
+            use_crewai, crewai_reason = should_use_crewai(
+                agent_id=agent_id,
+                prompt=request.prompt,
+                agent_config=agent_config,
+                force_crewai=request.metadata.get("force_crewai", False) if request.metadata else False,
+                
+            )
+            
+            logger.info(f"🎭 CrewAI decision for {agent_id}: {use_crewai} ({crewai_reason})")
+            
+            if use_crewai:
+                t0 = time.time()
+                crew_result = await call_crewai(
+                    agent_id=agent_id,
+                    task=request.prompt,
+                    context={
+                        "memory_brief": memory_brief_text,
+                        "system_prompt": system_prompt,
+                        "metadata": metadata,
+                    },
+                    team=crewai_cfg.get("team")
+                )
+                
+                latency = time.time() - t0
+                
+                if crew_result.get("success") and crew_result.get("result"):
+                    logger.info(f"✅ CrewAI success for {agent_id}: {latency:.2f}s")
+                    
+                    # Store interaction in memory
+                    if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id:
+                        try:
+                            await memory_retrieval.store_interaction(
+                                channel=channel,
+                                chat_id=chat_id,
+                                user_id=user_id,
+                                agent_id=request_agent_id,
+                                username=username,
+                                user_message=request.prompt,
+                                assistant_response=crew_result["result"]
+                            )
+                        except Exception as e:
+                            logger.warning(f"⚠️ Memory storage failed: {e}")
+                    
+                    return InferResponse(
+                        response=crew_result["result"],
+                        model="crewai-" + agent_id,
+                        provider="crewai",
+                        tokens_used=0,
+                        latency_ms=int(latency * 1000)
+                    )
+                else:
+                    logger.warning(f"⚠️ CrewAI failed, falling back to direct LLM")
+        except Exception as e:
+            logger.exception(f"❌ CrewAI error: {e}, falling back to direct LLM")
+
+    default_llm = agent_config.get("default_llm", "qwen3:8b")
    
    # Check if there's a routing rule for this agent
    routing_rules = router_config.get("routing", [])
@@ -542,7 +755,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
        model = llm_profile.get("model", "deepseek-chat")
    else:
        # For local ollama, use swapper model name format
-        model = request.model or "qwen3-8b"
+        model = request.model or "qwen3:8b"
    
    # =========================================================================
    # VISION PROCESSING (if images present)
@@ -929,9 +1142,9 @@ async def agent_infer(agent_id: str, request: InferRequest):
    
    # Check if default_llm is local
    if llm_profile.get("provider") == "ollama":
-        # Extract model name and convert format (qwen3:8b → qwen3-8b for Swapper)
+        # Extract model name and convert format (qwen3:8b → qwen3:8b for Swapper)
        ollama_model = llm_profile.get("model", "qwen3:8b")
-        local_model = ollama_model.replace(":", "-")  # qwen3:8b → qwen3-8b
+        local_model = ollama_model.replace(":", "-")  # qwen3:8b → qwen3:8b
        logger.debug(f"✅ Using agent's default local model: {local_model}")
    else:
        # Find first local model from config
@@ -944,7 +1157,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
    
    # Final fallback if no local model found
    if not local_model:
-        local_model = "qwen3-8b"
+        local_model = "qwen3:8b"
        logger.warning(f"⚠️ No local model in config, using hardcoded fallback: {local_model}")
    
    try: