feat(runtime): sync experience bus and learner stack into main

2026-03-05 11:30:17 -08:00
parent edd0427c61
commit ef6ebe3583
22 changed files with 2837 additions and 22 deletions
--- a/services/router/main.py
+++ b/services/router/main.py
@@ -1,10 +1,12 @@
 from fastapi import FastAPI, HTTPException, Request
 from fastapi.responses import JSONResponse, Response
 from pydantic import BaseModel, ConfigDict
-from typing import Literal, Optional, Dict, Any, List
+from typing import Literal, Optional, Dict, Any, List, Tuple
 import asyncio
+from collections import OrderedDict
 import json
 import os
+import random as random_module
 import re
 import yaml
 import httpx
@@ -12,6 +14,8 @@ import logging
 import hashlib
 import hmac
 import time  # For latency metrics
+import uuid
+from datetime import datetime, timezone, timedelta
 from difflib import SequenceMatcher

 # CrewAI Integration
@@ -62,6 +66,34 @@ except ImportError:
    global_capabilities_client = None  # type: ignore[assignment]
    offload_client = None  # type: ignore[assignment]

+try:
+    from experience_bus import ExperienceBus, normalize_input_for_hash, redact_error_message
+    EXPERIENCE_BUS_AVAILABLE = True
+except ImportError:
+    EXPERIENCE_BUS_AVAILABLE = False
+    ExperienceBus = None  # type: ignore[assignment]
+
+try:
+    import asyncpg
+except ImportError:
+    asyncpg = None  # type: ignore[assignment]
+
+try:
+    from agent_metrics import (
+        inc_lessons_retrieved,
+        inc_lessons_attached,
+        observe_lessons_attach_latency,
+    )
+except Exception:
+    def inc_lessons_retrieved(*args: Any, **kwargs: Any) -> None:
+        return None
+
+    def inc_lessons_attached(*args: Any, **kwargs: Any) -> None:
+        return None
+
+    def observe_lessons_attach_latency(*args: Any, **kwargs: Any) -> None:
+        return None
+
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 NEO4J_NOTIFICATIONS_LOG_LEVEL = os.getenv("NEO4J_NOTIFICATIONS_LOG_LEVEL", "ERROR").strip().upper()
@@ -71,6 +103,29 @@ logging.getLogger("neo4j.notifications").setLevel(_neo4j_notifications_level)
 # If auto-router module is unavailable (or loaded later), inference must still work.
 SOFIIA_AUTO_ROUTER_AVAILABLE = False

+
+def _parse_agent_id_set(raw_value: Optional[str], default_csv: str = "") -> set[str]:
+    source = raw_value if (raw_value is not None and str(raw_value).strip() != "") else default_csv
+    out: set[str] = set()
+    for part in str(source or "").split(","):
+        token = part.strip().lower()
+        if token:
+            out.add(token)
+    return out
+
+
+PLANNED_AGENT_IDS = _parse_agent_id_set(os.getenv("PLANNED_AGENT_IDS"), "aistalk")
+DISABLED_AGENT_IDS = _parse_agent_id_set(os.getenv("DISABLED_AGENT_IDS"), "devtools")
+
+
+def _inactive_agent_state(agent_id: str) -> Optional[str]:
+    aid = str(agent_id or "").strip().lower()
+    if aid in PLANNED_AGENT_IDS:
+        return "planned"
+    if aid in DISABLED_AGENT_IDS:
+        return "disabled"
+    return None
+
 TRUSTED_DOMAINS_CONFIG_PATH = os.getenv("TRUSTED_DOMAINS_CONFIG_PATH", "./trusted_domains.yml")
 _trusted_domains_cache: Dict[str, Any] = {"mtime": None, "data": {}}

@@ -894,6 +949,287 @@ def _select_default_llm(agent_id: str, metadata: Dict[str, Any], base_llm: str,
                return use_llm
    return base_llm

+
+def _safe_json_from_bytes(payload: bytes) -> Dict[str, Any]:
+    if not payload:
+        return {}
+    try:
+        decoded = payload.decode("utf-8", errors="ignore").strip()
+        if not decoded:
+            return {}
+        value = json.loads(decoded)
+        if isinstance(value, dict):
+            return value
+    except Exception:
+        return {}
+    return {}
+
+
+def _extract_infer_agent_id(path: str) -> Optional[str]:
+    match = _INFER_PATH_RE.match(path or "")
+    if not match:
+        return None
+    return (match.group(1) or "").strip().lower() or None
+
+
+def _infer_channel_from_metadata(metadata: Dict[str, Any]) -> str:
+    channel = str(
+        metadata.get("channel")
+        or metadata.get("channel_type")
+        or metadata.get("source")
+        or metadata.get("entrypoint")
+        or "unknown"
+    ).strip().lower()
+    if channel in {"telegram", "web", "api"}:
+        return channel
+    return "unknown"
+
+
+def _derive_provider_from_backend_model(backend: str, model: str, profile: Optional[str]) -> str:
+    profiles = (router_config or {}).get("llm_profiles", {}) if isinstance(router_config, dict) else {}
+    if profile and isinstance(profiles, dict):
+        p = profiles.get(profile, {})
+        if isinstance(p, dict) and p.get("provider"):
+            return str(p.get("provider"))
+
+    b = str(backend or "").lower()
+    m = str(model or "").lower()
+    if "mistral" in b:
+        return "mistral"
+    if "deepseek" in b:
+        return "deepseek"
+    if "grok" in b:
+        return "grok"
+    if "anthropic" in b or "claude" in b:
+        return "anthropic"
+    if "openai" in b:
+        return "openai"
+    if "glm" in b:
+        return "glm"
+    if "nats-offload" in b:
+        return "remote"
+    if "ollama" in b or "local" in b:
+        return "local"
+    if any(m.startswith(prefix) for prefix in ("qwen", "gemma", "mistral", "deepseek", "glm")):
+        return "local"
+    return "other"
+
+
+def _resolve_profile_for_event(agent_id: str, req_payload: Dict[str, Any]) -> Optional[str]:
+    if not isinstance(router_config, dict):
+        return None
+    metadata = req_payload.get("metadata")
+    if not isinstance(metadata, dict):
+        metadata = {}
+    agent_cfg = (router_config.get("agents") or {}).get(agent_id, {})
+    if not isinstance(agent_cfg, dict):
+        return None
+    base_llm = str(agent_cfg.get("default_llm") or "").strip()
+    if not base_llm:
+        return None
+    rules = router_config.get("routing") or []
+    if isinstance(rules, list):
+        return _select_default_llm(agent_id, metadata, base_llm, rules)
+    return base_llm
+
+
+def _lesson_guarded_text(value: Any, max_len: int = 220) -> str:
+    text = re.sub(r"\s+", " ", str(value or "")).strip()
+    if not text:
+        return ""
+    lower = text.lower()
+    if any(marker in lower for marker in LESSONS_INJECTION_GUARDS):
+        return ""
+    if len(text) > max_len:
+        text = text[:max_len].rstrip()
+    return text
+
+
+def _decode_lesson_signals(raw: Any) -> Dict[str, Any]:
+    if isinstance(raw, dict):
+        return dict(raw)
+    if isinstance(raw, str):
+        try:
+            parsed = json.loads(raw)
+            if isinstance(parsed, dict):
+                return parsed
+        except Exception:
+            return {}
+    return {}
+
+
+def _score_lesson_record(
+    row: Dict[str, Any],
+    *,
+    agent_id: str,
+    provider: str,
+    model: str,
+    profile: str,
+    last_error_class: Optional[str],
+) -> float:
+    score = 0.0
+    row_agent_id = str(row.get("agent_id") or "").strip().lower()
+    if row_agent_id and row_agent_id == agent_id:
+        score += 3.0
+
+    signals = _decode_lesson_signals(row.get("signals"))
+    signal_error = str(signals.get("error_class") or "").strip().lower()
+    if last_error_class and signal_error and signal_error == last_error_class.lower():
+        score += 2.0
+
+    signal_provider = str(signals.get("provider") or "").strip().lower()
+    signal_model = str(signals.get("model") or "").strip().lower()
+    signal_profile = str(signals.get("profile") or "").strip().lower()
+    if provider and signal_provider and signal_provider == provider:
+        score += 1.0
+    if model and signal_model and signal_model == model:
+        score += 1.0
+    if profile and signal_profile and signal_profile == profile:
+        score += 1.0
+
+    row_ts = row.get("ts")
+    if isinstance(row_ts, datetime):
+        dt = row_ts if row_ts.tzinfo else row_ts.replace(tzinfo=timezone.utc)
+        age_hours = max(0.0, (datetime.now(timezone.utc) - dt).total_seconds() / 3600.0)
+        score -= min(2.0, age_hours / 168.0)  # down-rank lessons older than ~7 days
+
+    return score
+
+
+def _render_operational_lessons(lessons: List[Dict[str, Any]], max_chars: int) -> str:
+    if not lessons:
+        return ""
+    lines = ["Operational Lessons (apply if relevant):"]
+    for idx, lesson in enumerate(lessons, start=1):
+        trigger = _lesson_guarded_text(lesson.get("trigger"), max_len=220)
+        action = _lesson_guarded_text(lesson.get("action"), max_len=220)
+        avoid = _lesson_guarded_text(lesson.get("avoid"), max_len=220)
+        if not trigger or not action or not avoid:
+            continue
+        chunk = f"{idx}) Trigger: {trigger}\n   Do: {action}\n   Avoid: {avoid}"
+        candidate = "\n".join(lines + [chunk])
+        if len(candidate) > max_chars:
+            break
+        lines.append(chunk)
+
+    if len(lines) <= 1:
+        return ""
+    return "\n".join(lines)
+
+
+async def _update_last_infer_signal(agent_id: str, *, ok: bool, error_class: Optional[str], latency_ms: int) -> None:
+    key = str(agent_id or "").strip().lower()
+    if not key:
+        return
+    now = time.monotonic()
+    async with _lessons_signal_lock:
+        _lessons_signal_cache[key] = {
+            "ok": bool(ok),
+            "error_class": str(error_class or "").strip() or None,
+            "latency_ms": int(max(0, latency_ms)),
+            "seen_at": now,
+        }
+        _lessons_signal_cache.move_to_end(key, last=True)
+        threshold = now - max(30, LESSONS_SIGNAL_CACHE_TTL_SECONDS)
+        stale_keys = [k for k, v in _lessons_signal_cache.items() if float(v.get("seen_at", 0.0)) < threshold]
+        for stale_key in stale_keys:
+            _lessons_signal_cache.pop(stale_key, None)
+        while len(_lessons_signal_cache) > 4000:
+            _lessons_signal_cache.popitem(last=False)
+
+
+async def _get_last_infer_signal(agent_id: str) -> Optional[Dict[str, Any]]:
+    key = str(agent_id or "").strip().lower()
+    if not key:
+        return None
+    now = time.monotonic()
+    async with _lessons_signal_lock:
+        value = _lessons_signal_cache.get(key)
+        if not value:
+            return None
+        age = now - float(value.get("seen_at", 0.0))
+        if age > LESSONS_SIGNAL_CACHE_TTL_SECONDS:
+            _lessons_signal_cache.pop(key, None)
+            return None
+        return dict(value)
+
+
+async def _fetch_ranked_lessons(
+    *,
+    agent_id: str,
+    provider: str,
+    model: str,
+    profile: str,
+    last_error_class: Optional[str],
+    limit: int,
+) -> Tuple[List[Dict[str, Any]], str, int]:
+    if lessons_db_pool is None:
+        return [], "err", 0
+
+    query = """
+    SELECT lesson_key, ts, scope, agent_id, task_type, trigger, action, avoid, signals
+    FROM agent_lessons
+    WHERE (agent_id = $1 OR agent_id IS NULL)
+      AND task_type = 'infer'
+    ORDER BY (agent_id = $1) DESC, ts DESC
+    LIMIT 50
+    """
+
+    started = time.time()
+    try:
+        async with lessons_db_pool.acquire() as conn:
+            rows = await asyncio.wait_for(
+                conn.fetch(query, str(agent_id).strip().lower()),
+                timeout=LESSONS_ATTACH_TIMEOUT_MS / 1000.0,
+            )
+    except asyncio.TimeoutError:
+        elapsed = max(0, int((time.time() - started) * 1000))
+        return [], "timeout", elapsed
+    except Exception as e:
+        logger.debug("Lessons retrieval failed: %s", e)
+        elapsed = max(0, int((time.time() - started) * 1000))
+        return [], "err", elapsed
+
+    ranked: List[Tuple[float, datetime, Dict[str, Any]]] = []
+    for row in rows:
+        row_data = dict(row)
+        lesson = {
+            "lesson_key": row_data.get("lesson_key"),
+            "ts": row_data.get("ts"),
+            "scope": row_data.get("scope"),
+            "agent_id": row_data.get("agent_id"),
+            "task_type": row_data.get("task_type"),
+            "trigger": row_data.get("trigger"),
+            "action": row_data.get("action"),
+            "avoid": row_data.get("avoid"),
+            "signals": _decode_lesson_signals(row_data.get("signals")),
+        }
+
+        if not (
+            _lesson_guarded_text(lesson.get("trigger"))
+            and _lesson_guarded_text(lesson.get("action"))
+            and _lesson_guarded_text(lesson.get("avoid"))
+        ):
+            continue
+
+        score = _score_lesson_record(
+            lesson,
+            agent_id=agent_id,
+            provider=(provider or "").strip().lower(),
+            model=(model or "").strip().lower(),
+            profile=(profile or "").strip().lower(),
+            last_error_class=last_error_class,
+        )
+        ts = lesson.get("ts")
+        if not isinstance(ts, datetime):
+            ts = datetime.now(timezone.utc) - timedelta(days=365)
+        ranked.append((score, ts, lesson))
+
+    ranked.sort(key=lambda item: (item[0], item[1]), reverse=True)
+    selected = [item[2] for item in ranked[: max(1, limit)]]
+    elapsed = max(0, int((time.time() - started) * 1000))
+    return selected, "ok", elapsed
+
 app = FastAPI(title="DAARION Router", version="2.0.0")

 # Configuration
@@ -907,6 +1243,27 @@ VISION_URL = os.getenv("VISION_URL", "http://host.docker.internal:11434")
 OCR_URL = os.getenv("OCR_URL", "http://swapper-service:8890")
 DOCUMENT_URL = os.getenv("DOCUMENT_URL", "http://swapper-service:8890")
 CITY_SERVICE_URL = os.getenv("CITY_SERVICE_URL", "http://daarion-city-service:7001")
+LESSONS_ATTACH_ENABLED = os.getenv("LESSONS_ATTACH_ENABLED", "true").lower() in {"1", "true", "yes"}
+LESSONS_ATTACH_MIN = max(1, int(os.getenv("LESSONS_ATTACH_MIN", "3")))
+LESSONS_ATTACH_MAX = max(LESSONS_ATTACH_MIN, int(os.getenv("LESSONS_ATTACH_MAX", "7")))
+LESSONS_ATTACH_TIMEOUT_MS = max(5, int(os.getenv("LESSONS_ATTACH_TIMEOUT_MS", "25")))
+LESSONS_ATTACH_SAMPLE_PCT = max(0.0, min(100.0, float(os.getenv("LESSONS_ATTACH_SAMPLE_PCT", "10"))))
+LESSONS_ATTACH_MAX_CHARS = max(400, int(os.getenv("LESSONS_ATTACH_MAX_CHARS", "1200")))
+LESSONS_SIGNAL_CACHE_TTL_SECONDS = max(30, int(os.getenv("LESSONS_SIGNAL_CACHE_TTL_SECONDS", "300")))
+LESSONS_LATENCY_SPIKE_MS = max(250, int(os.getenv("EXPERIENCE_LATENCY_SPIKE_MS", "5000")))
+LESSONS_DATABASE_URL = (
+    os.getenv("LESSONS_DATABASE_URL")
+    or os.getenv("EXPERIENCE_DATABASE_URL")
+    or os.getenv("DATABASE_URL")
+)
+
+LESSONS_INJECTION_GUARDS = (
+    "ignore previous",
+    "ignore all previous",
+    "system:",
+    "developer:",
+    "```",
+)

 # CrewAI Routing Configuration
 CREWAI_ROUTING_ENABLED = os.getenv("CREWAI_ROUTING_ENABLED", "true").lower() == "true"
@@ -947,6 +1304,12 @@ nats_available = False
 # Tool Manager
 tool_manager = None
 runtime_guard_engine = None
+experience_bus = None
+lessons_db_pool = None
+_lessons_signal_cache: "OrderedDict[str, Dict[str, Any]]" = OrderedDict()
+_lessons_signal_lock = asyncio.Lock()
+
+_INFER_PATH_RE = re.compile(r"^/v1/agents/([^/]+)/infer/?$")

 # Models
 class FilterDecision(BaseModel):
@@ -999,10 +1362,146 @@ def load_router_config():
 config = load_config()
 router_config = load_router_config()

+
+@app.middleware("http")
+async def experience_capture_middleware(request: Request, call_next):
+    """Capture /infer outcomes and emit ExperienceEvent asynchronously."""
+    infer_agent_id = _extract_infer_agent_id(request.url.path)
+    if (
+        not infer_agent_id
+        or request.method.upper() != "POST"
+        or not EXPERIENCE_BUS_AVAILABLE
+        or experience_bus is None
+    ):
+        return await call_next(request)
+
+    started_at = time.time()
+    req_body = await request.body()
+
+    async def _receive() -> Dict[str, Any]:
+        return {"type": "http.request", "body": req_body, "more_body": False}
+
+    wrapped_request = Request(request.scope, _receive)
+
+    response = None
+    response_body = b""
+    status_code = 500
+    caught_exc: Optional[Exception] = None
+
+    try:
+        response = await call_next(wrapped_request)
+        status_code = int(response.status_code)
+        chunks: List[bytes] = []
+        async for chunk in response.body_iterator:
+            chunks.append(chunk)
+        response_body = b"".join(chunks)
+    except Exception as exc:  # pragma: no cover - defensive capture path
+        caught_exc = exc
+        status_code = 500
+
+    latency_ms = max(0, int((time.time() - started_at) * 1000))
+
+    try:
+        req_payload = _safe_json_from_bytes(req_body)
+        resp_payload = _safe_json_from_bytes(response_body)
+        metadata = req_payload.get("metadata")
+        if not isinstance(metadata, dict):
+            metadata = {}
+
+        prompt = str(req_payload.get("prompt") or "")
+        normalized_input = normalize_input_for_hash(prompt)
+        inputs_hash = hashlib.sha256(normalized_input.encode("utf-8")).hexdigest()
+
+        profile = _resolve_profile_for_event(infer_agent_id, req_payload)
+        profile_cfg = {}
+        if profile and isinstance(router_config, dict):
+            profile_cfg = (router_config.get("llm_profiles") or {}).get(profile, {}) or {}
+        if not isinstance(profile_cfg, dict):
+            profile_cfg = {}
+
+        model = str(resp_payload.get("model") or profile_cfg.get("model") or "unknown")
+        backend = str(resp_payload.get("backend") or "")
+        provider = _derive_provider_from_backend_model(backend, model, profile)
+
+        tokens_total = resp_payload.get("tokens_used")
+        tokens_out = int(tokens_total) if isinstance(tokens_total, int) else None
+        request_id = str(
+            metadata.get("request_id")
+            or metadata.get("trace_id")
+            or request.headers.get("x-request-id")
+            or ""
+        ).strip() or None
+
+        err_class: Optional[str] = None
+        err_msg: Optional[str] = None
+        detail_obj = resp_payload.get("detail")
+        if caught_exc is not None:
+            err_class = type(caught_exc).__name__
+            err_msg = str(caught_exc)
+        elif status_code >= 400:
+            if isinstance(detail_obj, dict):
+                err_class = str(detail_obj.get("code") or detail_obj.get("error_class") or f"http_{status_code}")
+                err_msg = str(detail_obj.get("message") or detail_obj.get("detail") or json.dumps(detail_obj))
+            elif isinstance(detail_obj, str):
+                err_class = f"http_{status_code}"
+                err_msg = detail_obj
+            else:
+                err_class = f"http_{status_code}"
+                err_msg = f"http_status={status_code}"
+
+        await _update_last_infer_signal(
+            infer_agent_id,
+            ok=status_code < 400,
+            error_class=err_class,
+            latency_ms=latency_ms,
+        )
+
+        event = {
+            "event_id": str(uuid.uuid4()),
+            "ts": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "node_id": os.getenv("NODE_ID", "NODA1"),
+            "source": "router",
+            "agent_id": infer_agent_id,
+            "request_id": request_id,
+            "channel": _infer_channel_from_metadata(metadata),
+            "task_type": "infer",
+            "inputs_hash": inputs_hash,
+            "llm": {
+                "provider": provider,
+                "model": model,
+                "profile": profile,
+                "latency_ms": latency_ms,
+                "tokens_in": None,
+                "tokens_out": tokens_out,
+            },
+            "result": {
+                "ok": status_code < 400,
+                "error_class": err_class,
+                "error_msg_redacted": redact_error_message(err_msg),
+                "http_status": status_code,
+            },
+        }
+        await experience_bus.capture(event)
+    except Exception as exp_err:
+        logger.debug("Experience capture skipped: %s", exp_err)
+
+    if caught_exc is not None:
+        raise caught_exc
+
+    headers = dict(response.headers) if response is not None else {}
+    headers.pop("content-length", None)
+    return Response(
+        content=response_body,
+        status_code=status_code,
+        headers=headers,
+        media_type=response.media_type if response is not None else "application/json",
+        background=response.background if response is not None else None,
+    )
+
@app.on_event("startup")
 async def startup_event():
    """Initialize NATS connection and subscriptions"""
-    global nc, nats_available, http_client, neo4j_driver, neo4j_available, runtime_guard_engine
+    global nc, nats_available, http_client, neo4j_driver, neo4j_available, runtime_guard_engine, experience_bus, lessons_db_pool
    logger.info("🚀 DAGI Router v2.0.0 starting up...")
    
    # Initialize HTTP client
@@ -1041,6 +1540,34 @@ async def startup_event():
        logger.warning(f"⚠️ NATS not available: {e}")
        logger.warning("⚠️ Running in test mode (HTTP only)")
        nats_available = False
+
+    # Initialize Experience Bus (Phase-1)
+    if EXPERIENCE_BUS_AVAILABLE and ExperienceBus is not None:
+        try:
+            experience_bus = ExperienceBus()
+            await experience_bus.start(nats_client=nc if nats_available else None)
+            logger.info("✅ Experience Bus initialized")
+        except Exception as e:
+            experience_bus = None
+            logger.warning(f"⚠️ Experience Bus init failed: {e}")
+
+    # Initialize lessons retrieval pool (Phase-3 read path)
+    if LESSONS_ATTACH_ENABLED:
+        if asyncpg is None:
+            logger.warning("⚠️ Lessons attach enabled but asyncpg is unavailable")
+        elif not LESSONS_DATABASE_URL:
+            logger.warning("⚠️ Lessons attach enabled but LESSONS_DATABASE_URL is missing")
+        else:
+            try:
+                lessons_db_pool = await asyncpg.create_pool(
+                    LESSONS_DATABASE_URL,
+                    min_size=1,
+                    max_size=3,
+                )
+                logger.info("✅ Lessons DB pool initialized")
+            except Exception as e:
+                lessons_db_pool = None
+                logger.warning(f"⚠️ Lessons DB pool init failed: {e}")
    
    # Initialize Memory Retrieval Pipeline
    if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval:
@@ -1765,6 +2292,24 @@ async def agent_infer(agent_id: str, request: InferRequest):
    """
    logger.info(f"🔀 Inference request for agent: {agent_id}")
    logger.info(f"📝 Prompt: {request.prompt[:100]}...")
+
+    inactive_state = _inactive_agent_state(agent_id)
+    if inactive_state is not None:
+        status_code = 410 if inactive_state == "planned" else 404
+        logger.info(
+            "⛔ Agent unavailable by lifecycle state: agent=%s state=%s",
+            agent_id,
+            inactive_state,
+        )
+        raise HTTPException(
+            status_code=status_code,
+            detail={
+                "code": f"agent_{inactive_state}",
+                "agent_id": str(agent_id).strip().lower(),
+                "state": inactive_state,
+                "message": "Agent is not active in this environment",
+            },
+        )
    
    # =========================================================================
    # MEMORY RETRIEVAL (v4.0 - Universal for all agents)
@@ -2682,23 +3227,77 @@ async def agent_infer(agent_id: str, request: InferRequest):
    # SMART LLM ROUTER WITH AUTO-FALLBACK
    # Priority: DeepSeek → Mistral → Grok → Local Ollama
    # =========================================================================
-    
+
+    lessons_block = ""
+    lessons_attached_count = 0
+    if LESSONS_ATTACH_ENABLED and not request.images:
+        retrieval_always_on = False
+        retrieval_limit = LESSONS_ATTACH_MIN
+        last_signal = await _get_last_infer_signal(request_agent_id)
+        last_error_class = None
+        if last_signal:
+            last_error_class = last_signal.get("error_class")
+            if (not bool(last_signal.get("ok", True))) or int(last_signal.get("latency_ms", 0) or 0) >= LESSONS_LATENCY_SPIKE_MS:
+                retrieval_always_on = True
+                retrieval_limit = LESSONS_ATTACH_MAX
+
+        should_retrieve = retrieval_always_on or (random_module.random() * 100.0 < LESSONS_ATTACH_SAMPLE_PCT)
+        if should_retrieve:
+            lessons_rows, retrieval_status, retrieval_latency_ms = await _fetch_ranked_lessons(
+                agent_id=request_agent_id,
+                provider=str(provider or "").strip().lower(),
+                model=str(model or "").strip().lower(),
+                profile=str(default_llm or "").strip().lower(),
+                last_error_class=str(last_error_class or "").strip() or None,
+                limit=retrieval_limit,
+            )
+            inc_lessons_retrieved(status=retrieval_status)
+            observe_lessons_attach_latency(latency_ms=float(retrieval_latency_ms))
+
+            if retrieval_status == "ok" and lessons_rows:
+                selected_lessons = lessons_rows[:retrieval_limit]
+                lessons_block = _render_operational_lessons(selected_lessons, LESSONS_ATTACH_MAX_CHARS)
+                if lessons_block:
+                    lessons_attached_count = len(selected_lessons)
+                    logger.info(
+                        "🧠 lessons_attached=%s agent=%s mode=%s",
+                        lessons_attached_count,
+                        request_agent_id,
+                        "always_on" if retrieval_always_on else "sampled",
+                    )
+        inc_lessons_attached(count=lessons_attached_count)
+
    # Build messages array once for all providers
    messages = []
    if system_prompt:
+        combined_parts: List[str] = [system_prompt]
        if memory_brief_text:
-            enhanced_prompt = f"{system_prompt}\n\n[INTERNAL MEMORY - do NOT repeat to user]\n{memory_brief_text}"
-            messages.append({"role": "system", "content": enhanced_prompt})
-            logger.info(f"📝 Added system message with prompt ({len(system_prompt)} chars) + memory ({len(memory_brief_text)} chars)")
-        else:
-            messages.append({"role": "system", "content": system_prompt})
-            logger.info(f"📝 Added system message with prompt ({len(system_prompt)} chars)")
-    elif memory_brief_text:
-        messages.append({"role": "system", "content": f"[INTERNAL MEMORY - do NOT repeat to user]\n{memory_brief_text}"})
-        logger.warning(f"⚠️ No system_prompt! Using only memory brief ({len(memory_brief_text)} chars)")
+            combined_parts.append(f"[INTERNAL MEMORY - do NOT repeat to user]\n{memory_brief_text}")
+        if lessons_block:
+            combined_parts.append(f"[OPERATIONAL LESSONS - INTERNAL]\n{lessons_block}")
+        enhanced_prompt = "\n\n".join(combined_parts)
+        messages.append({"role": "system", "content": enhanced_prompt})
+        logger.info(
+            "📝 Added system message prompt=%s memory=%s lessons=%s",
+            len(system_prompt),
+            len(memory_brief_text or ""),
+            lessons_attached_count,
+        )
+    elif memory_brief_text or lessons_block:
+        fallback_parts: List[str] = []
+        if memory_brief_text:
+            fallback_parts.append(f"[INTERNAL MEMORY - do NOT repeat to user]\n{memory_brief_text}")
+        if lessons_block:
+            fallback_parts.append(f"[OPERATIONAL LESSONS - INTERNAL]\n{lessons_block}")
+        messages.append({"role": "system", "content": "\n\n".join(fallback_parts)})
+        logger.warning(
+            "⚠️ No system_prompt! Using fallback context memory=%s lessons=%s",
+            len(memory_brief_text or ""),
+            lessons_attached_count,
+        )
    else:
-        logger.error(f"❌ No system_prompt AND no memory_brief! LLM will have no context!")
-    
+        logger.error("❌ No system_prompt, memory_brief, or lessons; LLM will have no context")
+
    messages.append({"role": "user", "content": request.prompt})
    logger.debug(f"📨 Messages array: {len(messages)} messages, system={len(messages[0].get('content', '')) if messages else 0} chars")
    
@@ -4555,7 +5154,7 @@ async def sofiia_model_catalog(refresh_ollama: bool = False):
@app.on_event("shutdown")
 async def shutdown_event():
    """Cleanup connections on shutdown"""
-    global neo4j_driver, http_client, nc
+    global neo4j_driver, http_client, nc, experience_bus, lessons_db_pool
    
    # Close Memory Retrieval
    if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval:
@@ -4576,3 +5175,17 @@ async def shutdown_event():
    if nc:
        await nc.close()
        logger.info("🔌 NATS connection closed")
+
+    if EXPERIENCE_BUS_AVAILABLE and experience_bus:
+        try:
+            await experience_bus.stop()
+            logger.info("🔌 Experience Bus closed")
+        except Exception as e:
+            logger.warning(f"⚠️ Experience Bus close error: {e}")
+
+    if lessons_db_pool is not None:
+        try:
+            await lessons_db_pool.close()
+            logger.info("🔌 Lessons DB pool closed")
+        except Exception as e:
+            logger.warning(f"⚠️ Lessons DB pool close error: {e}")