feat(runtime): sync experience bus and learner stack into main
This commit is contained in:
@@ -1,10 +1,12 @@
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse, Response
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
from typing import Literal, Optional, Dict, Any, List
|
||||
from typing import Literal, Optional, Dict, Any, List, Tuple
|
||||
import asyncio
|
||||
from collections import OrderedDict
|
||||
import json
|
||||
import os
|
||||
import random as random_module
|
||||
import re
|
||||
import yaml
|
||||
import httpx
|
||||
@@ -12,6 +14,8 @@ import logging
|
||||
import hashlib
|
||||
import hmac
|
||||
import time # For latency metrics
|
||||
import uuid
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
# CrewAI Integration
|
||||
@@ -62,6 +66,34 @@ except ImportError:
|
||||
global_capabilities_client = None # type: ignore[assignment]
|
||||
offload_client = None # type: ignore[assignment]
|
||||
|
||||
try:
|
||||
from experience_bus import ExperienceBus, normalize_input_for_hash, redact_error_message
|
||||
EXPERIENCE_BUS_AVAILABLE = True
|
||||
except ImportError:
|
||||
EXPERIENCE_BUS_AVAILABLE = False
|
||||
ExperienceBus = None # type: ignore[assignment]
|
||||
|
||||
try:
|
||||
import asyncpg
|
||||
except ImportError:
|
||||
asyncpg = None # type: ignore[assignment]
|
||||
|
||||
try:
|
||||
from agent_metrics import (
|
||||
inc_lessons_retrieved,
|
||||
inc_lessons_attached,
|
||||
observe_lessons_attach_latency,
|
||||
)
|
||||
except Exception:
|
||||
def inc_lessons_retrieved(*args: Any, **kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
def inc_lessons_attached(*args: Any, **kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
def observe_lessons_attach_latency(*args: Any, **kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
NEO4J_NOTIFICATIONS_LOG_LEVEL = os.getenv("NEO4J_NOTIFICATIONS_LOG_LEVEL", "ERROR").strip().upper()
|
||||
@@ -71,6 +103,29 @@ logging.getLogger("neo4j.notifications").setLevel(_neo4j_notifications_level)
|
||||
# If auto-router module is unavailable (or loaded later), inference must still work.
|
||||
SOFIIA_AUTO_ROUTER_AVAILABLE = False
|
||||
|
||||
|
||||
def _parse_agent_id_set(raw_value: Optional[str], default_csv: str = "") -> set[str]:
|
||||
source = raw_value if (raw_value is not None and str(raw_value).strip() != "") else default_csv
|
||||
out: set[str] = set()
|
||||
for part in str(source or "").split(","):
|
||||
token = part.strip().lower()
|
||||
if token:
|
||||
out.add(token)
|
||||
return out
|
||||
|
||||
|
||||
PLANNED_AGENT_IDS = _parse_agent_id_set(os.getenv("PLANNED_AGENT_IDS"), "aistalk")
|
||||
DISABLED_AGENT_IDS = _parse_agent_id_set(os.getenv("DISABLED_AGENT_IDS"), "devtools")
|
||||
|
||||
|
||||
def _inactive_agent_state(agent_id: str) -> Optional[str]:
|
||||
aid = str(agent_id or "").strip().lower()
|
||||
if aid in PLANNED_AGENT_IDS:
|
||||
return "planned"
|
||||
if aid in DISABLED_AGENT_IDS:
|
||||
return "disabled"
|
||||
return None
|
||||
|
||||
TRUSTED_DOMAINS_CONFIG_PATH = os.getenv("TRUSTED_DOMAINS_CONFIG_PATH", "./trusted_domains.yml")
|
||||
_trusted_domains_cache: Dict[str, Any] = {"mtime": None, "data": {}}
|
||||
|
||||
@@ -894,6 +949,287 @@ def _select_default_llm(agent_id: str, metadata: Dict[str, Any], base_llm: str,
|
||||
return use_llm
|
||||
return base_llm
|
||||
|
||||
|
||||
def _safe_json_from_bytes(payload: bytes) -> Dict[str, Any]:
|
||||
if not payload:
|
||||
return {}
|
||||
try:
|
||||
decoded = payload.decode("utf-8", errors="ignore").strip()
|
||||
if not decoded:
|
||||
return {}
|
||||
value = json.loads(decoded)
|
||||
if isinstance(value, dict):
|
||||
return value
|
||||
except Exception:
|
||||
return {}
|
||||
return {}
|
||||
|
||||
|
||||
def _extract_infer_agent_id(path: str) -> Optional[str]:
|
||||
match = _INFER_PATH_RE.match(path or "")
|
||||
if not match:
|
||||
return None
|
||||
return (match.group(1) or "").strip().lower() or None
|
||||
|
||||
|
||||
def _infer_channel_from_metadata(metadata: Dict[str, Any]) -> str:
|
||||
channel = str(
|
||||
metadata.get("channel")
|
||||
or metadata.get("channel_type")
|
||||
or metadata.get("source")
|
||||
or metadata.get("entrypoint")
|
||||
or "unknown"
|
||||
).strip().lower()
|
||||
if channel in {"telegram", "web", "api"}:
|
||||
return channel
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _derive_provider_from_backend_model(backend: str, model: str, profile: Optional[str]) -> str:
|
||||
profiles = (router_config or {}).get("llm_profiles", {}) if isinstance(router_config, dict) else {}
|
||||
if profile and isinstance(profiles, dict):
|
||||
p = profiles.get(profile, {})
|
||||
if isinstance(p, dict) and p.get("provider"):
|
||||
return str(p.get("provider"))
|
||||
|
||||
b = str(backend or "").lower()
|
||||
m = str(model or "").lower()
|
||||
if "mistral" in b:
|
||||
return "mistral"
|
||||
if "deepseek" in b:
|
||||
return "deepseek"
|
||||
if "grok" in b:
|
||||
return "grok"
|
||||
if "anthropic" in b or "claude" in b:
|
||||
return "anthropic"
|
||||
if "openai" in b:
|
||||
return "openai"
|
||||
if "glm" in b:
|
||||
return "glm"
|
||||
if "nats-offload" in b:
|
||||
return "remote"
|
||||
if "ollama" in b or "local" in b:
|
||||
return "local"
|
||||
if any(m.startswith(prefix) for prefix in ("qwen", "gemma", "mistral", "deepseek", "glm")):
|
||||
return "local"
|
||||
return "other"
|
||||
|
||||
|
||||
def _resolve_profile_for_event(agent_id: str, req_payload: Dict[str, Any]) -> Optional[str]:
|
||||
if not isinstance(router_config, dict):
|
||||
return None
|
||||
metadata = req_payload.get("metadata")
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = {}
|
||||
agent_cfg = (router_config.get("agents") or {}).get(agent_id, {})
|
||||
if not isinstance(agent_cfg, dict):
|
||||
return None
|
||||
base_llm = str(agent_cfg.get("default_llm") or "").strip()
|
||||
if not base_llm:
|
||||
return None
|
||||
rules = router_config.get("routing") or []
|
||||
if isinstance(rules, list):
|
||||
return _select_default_llm(agent_id, metadata, base_llm, rules)
|
||||
return base_llm
|
||||
|
||||
|
||||
def _lesson_guarded_text(value: Any, max_len: int = 220) -> str:
|
||||
text = re.sub(r"\s+", " ", str(value or "")).strip()
|
||||
if not text:
|
||||
return ""
|
||||
lower = text.lower()
|
||||
if any(marker in lower for marker in LESSONS_INJECTION_GUARDS):
|
||||
return ""
|
||||
if len(text) > max_len:
|
||||
text = text[:max_len].rstrip()
|
||||
return text
|
||||
|
||||
|
||||
def _decode_lesson_signals(raw: Any) -> Dict[str, Any]:
|
||||
if isinstance(raw, dict):
|
||||
return dict(raw)
|
||||
if isinstance(raw, str):
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
if isinstance(parsed, dict):
|
||||
return parsed
|
||||
except Exception:
|
||||
return {}
|
||||
return {}
|
||||
|
||||
|
||||
def _score_lesson_record(
|
||||
row: Dict[str, Any],
|
||||
*,
|
||||
agent_id: str,
|
||||
provider: str,
|
||||
model: str,
|
||||
profile: str,
|
||||
last_error_class: Optional[str],
|
||||
) -> float:
|
||||
score = 0.0
|
||||
row_agent_id = str(row.get("agent_id") or "").strip().lower()
|
||||
if row_agent_id and row_agent_id == agent_id:
|
||||
score += 3.0
|
||||
|
||||
signals = _decode_lesson_signals(row.get("signals"))
|
||||
signal_error = str(signals.get("error_class") or "").strip().lower()
|
||||
if last_error_class and signal_error and signal_error == last_error_class.lower():
|
||||
score += 2.0
|
||||
|
||||
signal_provider = str(signals.get("provider") or "").strip().lower()
|
||||
signal_model = str(signals.get("model") or "").strip().lower()
|
||||
signal_profile = str(signals.get("profile") or "").strip().lower()
|
||||
if provider and signal_provider and signal_provider == provider:
|
||||
score += 1.0
|
||||
if model and signal_model and signal_model == model:
|
||||
score += 1.0
|
||||
if profile and signal_profile and signal_profile == profile:
|
||||
score += 1.0
|
||||
|
||||
row_ts = row.get("ts")
|
||||
if isinstance(row_ts, datetime):
|
||||
dt = row_ts if row_ts.tzinfo else row_ts.replace(tzinfo=timezone.utc)
|
||||
age_hours = max(0.0, (datetime.now(timezone.utc) - dt).total_seconds() / 3600.0)
|
||||
score -= min(2.0, age_hours / 168.0) # down-rank lessons older than ~7 days
|
||||
|
||||
return score
|
||||
|
||||
|
||||
def _render_operational_lessons(lessons: List[Dict[str, Any]], max_chars: int) -> str:
|
||||
if not lessons:
|
||||
return ""
|
||||
lines = ["Operational Lessons (apply if relevant):"]
|
||||
for idx, lesson in enumerate(lessons, start=1):
|
||||
trigger = _lesson_guarded_text(lesson.get("trigger"), max_len=220)
|
||||
action = _lesson_guarded_text(lesson.get("action"), max_len=220)
|
||||
avoid = _lesson_guarded_text(lesson.get("avoid"), max_len=220)
|
||||
if not trigger or not action or not avoid:
|
||||
continue
|
||||
chunk = f"{idx}) Trigger: {trigger}\n Do: {action}\n Avoid: {avoid}"
|
||||
candidate = "\n".join(lines + [chunk])
|
||||
if len(candidate) > max_chars:
|
||||
break
|
||||
lines.append(chunk)
|
||||
|
||||
if len(lines) <= 1:
|
||||
return ""
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def _update_last_infer_signal(agent_id: str, *, ok: bool, error_class: Optional[str], latency_ms: int) -> None:
|
||||
key = str(agent_id or "").strip().lower()
|
||||
if not key:
|
||||
return
|
||||
now = time.monotonic()
|
||||
async with _lessons_signal_lock:
|
||||
_lessons_signal_cache[key] = {
|
||||
"ok": bool(ok),
|
||||
"error_class": str(error_class or "").strip() or None,
|
||||
"latency_ms": int(max(0, latency_ms)),
|
||||
"seen_at": now,
|
||||
}
|
||||
_lessons_signal_cache.move_to_end(key, last=True)
|
||||
threshold = now - max(30, LESSONS_SIGNAL_CACHE_TTL_SECONDS)
|
||||
stale_keys = [k for k, v in _lessons_signal_cache.items() if float(v.get("seen_at", 0.0)) < threshold]
|
||||
for stale_key in stale_keys:
|
||||
_lessons_signal_cache.pop(stale_key, None)
|
||||
while len(_lessons_signal_cache) > 4000:
|
||||
_lessons_signal_cache.popitem(last=False)
|
||||
|
||||
|
||||
async def _get_last_infer_signal(agent_id: str) -> Optional[Dict[str, Any]]:
|
||||
key = str(agent_id or "").strip().lower()
|
||||
if not key:
|
||||
return None
|
||||
now = time.monotonic()
|
||||
async with _lessons_signal_lock:
|
||||
value = _lessons_signal_cache.get(key)
|
||||
if not value:
|
||||
return None
|
||||
age = now - float(value.get("seen_at", 0.0))
|
||||
if age > LESSONS_SIGNAL_CACHE_TTL_SECONDS:
|
||||
_lessons_signal_cache.pop(key, None)
|
||||
return None
|
||||
return dict(value)
|
||||
|
||||
|
||||
async def _fetch_ranked_lessons(
|
||||
*,
|
||||
agent_id: str,
|
||||
provider: str,
|
||||
model: str,
|
||||
profile: str,
|
||||
last_error_class: Optional[str],
|
||||
limit: int,
|
||||
) -> Tuple[List[Dict[str, Any]], str, int]:
|
||||
if lessons_db_pool is None:
|
||||
return [], "err", 0
|
||||
|
||||
query = """
|
||||
SELECT lesson_key, ts, scope, agent_id, task_type, trigger, action, avoid, signals
|
||||
FROM agent_lessons
|
||||
WHERE (agent_id = $1 OR agent_id IS NULL)
|
||||
AND task_type = 'infer'
|
||||
ORDER BY (agent_id = $1) DESC, ts DESC
|
||||
LIMIT 50
|
||||
"""
|
||||
|
||||
started = time.time()
|
||||
try:
|
||||
async with lessons_db_pool.acquire() as conn:
|
||||
rows = await asyncio.wait_for(
|
||||
conn.fetch(query, str(agent_id).strip().lower()),
|
||||
timeout=LESSONS_ATTACH_TIMEOUT_MS / 1000.0,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
elapsed = max(0, int((time.time() - started) * 1000))
|
||||
return [], "timeout", elapsed
|
||||
except Exception as e:
|
||||
logger.debug("Lessons retrieval failed: %s", e)
|
||||
elapsed = max(0, int((time.time() - started) * 1000))
|
||||
return [], "err", elapsed
|
||||
|
||||
ranked: List[Tuple[float, datetime, Dict[str, Any]]] = []
|
||||
for row in rows:
|
||||
row_data = dict(row)
|
||||
lesson = {
|
||||
"lesson_key": row_data.get("lesson_key"),
|
||||
"ts": row_data.get("ts"),
|
||||
"scope": row_data.get("scope"),
|
||||
"agent_id": row_data.get("agent_id"),
|
||||
"task_type": row_data.get("task_type"),
|
||||
"trigger": row_data.get("trigger"),
|
||||
"action": row_data.get("action"),
|
||||
"avoid": row_data.get("avoid"),
|
||||
"signals": _decode_lesson_signals(row_data.get("signals")),
|
||||
}
|
||||
|
||||
if not (
|
||||
_lesson_guarded_text(lesson.get("trigger"))
|
||||
and _lesson_guarded_text(lesson.get("action"))
|
||||
and _lesson_guarded_text(lesson.get("avoid"))
|
||||
):
|
||||
continue
|
||||
|
||||
score = _score_lesson_record(
|
||||
lesson,
|
||||
agent_id=agent_id,
|
||||
provider=(provider or "").strip().lower(),
|
||||
model=(model or "").strip().lower(),
|
||||
profile=(profile or "").strip().lower(),
|
||||
last_error_class=last_error_class,
|
||||
)
|
||||
ts = lesson.get("ts")
|
||||
if not isinstance(ts, datetime):
|
||||
ts = datetime.now(timezone.utc) - timedelta(days=365)
|
||||
ranked.append((score, ts, lesson))
|
||||
|
||||
ranked.sort(key=lambda item: (item[0], item[1]), reverse=True)
|
||||
selected = [item[2] for item in ranked[: max(1, limit)]]
|
||||
elapsed = max(0, int((time.time() - started) * 1000))
|
||||
return selected, "ok", elapsed
|
||||
|
||||
app = FastAPI(title="DAARION Router", version="2.0.0")
|
||||
|
||||
# Configuration
|
||||
@@ -907,6 +1243,27 @@ VISION_URL = os.getenv("VISION_URL", "http://host.docker.internal:11434")
|
||||
OCR_URL = os.getenv("OCR_URL", "http://swapper-service:8890")
|
||||
DOCUMENT_URL = os.getenv("DOCUMENT_URL", "http://swapper-service:8890")
|
||||
CITY_SERVICE_URL = os.getenv("CITY_SERVICE_URL", "http://daarion-city-service:7001")
|
||||
LESSONS_ATTACH_ENABLED = os.getenv("LESSONS_ATTACH_ENABLED", "true").lower() in {"1", "true", "yes"}
|
||||
LESSONS_ATTACH_MIN = max(1, int(os.getenv("LESSONS_ATTACH_MIN", "3")))
|
||||
LESSONS_ATTACH_MAX = max(LESSONS_ATTACH_MIN, int(os.getenv("LESSONS_ATTACH_MAX", "7")))
|
||||
LESSONS_ATTACH_TIMEOUT_MS = max(5, int(os.getenv("LESSONS_ATTACH_TIMEOUT_MS", "25")))
|
||||
LESSONS_ATTACH_SAMPLE_PCT = max(0.0, min(100.0, float(os.getenv("LESSONS_ATTACH_SAMPLE_PCT", "10"))))
|
||||
LESSONS_ATTACH_MAX_CHARS = max(400, int(os.getenv("LESSONS_ATTACH_MAX_CHARS", "1200")))
|
||||
LESSONS_SIGNAL_CACHE_TTL_SECONDS = max(30, int(os.getenv("LESSONS_SIGNAL_CACHE_TTL_SECONDS", "300")))
|
||||
LESSONS_LATENCY_SPIKE_MS = max(250, int(os.getenv("EXPERIENCE_LATENCY_SPIKE_MS", "5000")))
|
||||
LESSONS_DATABASE_URL = (
|
||||
os.getenv("LESSONS_DATABASE_URL")
|
||||
or os.getenv("EXPERIENCE_DATABASE_URL")
|
||||
or os.getenv("DATABASE_URL")
|
||||
)
|
||||
|
||||
LESSONS_INJECTION_GUARDS = (
|
||||
"ignore previous",
|
||||
"ignore all previous",
|
||||
"system:",
|
||||
"developer:",
|
||||
"```",
|
||||
)
|
||||
|
||||
# CrewAI Routing Configuration
|
||||
CREWAI_ROUTING_ENABLED = os.getenv("CREWAI_ROUTING_ENABLED", "true").lower() == "true"
|
||||
@@ -947,6 +1304,12 @@ nats_available = False
|
||||
# Tool Manager
|
||||
tool_manager = None
|
||||
runtime_guard_engine = None
|
||||
experience_bus = None
|
||||
lessons_db_pool = None
|
||||
_lessons_signal_cache: "OrderedDict[str, Dict[str, Any]]" = OrderedDict()
|
||||
_lessons_signal_lock = asyncio.Lock()
|
||||
|
||||
_INFER_PATH_RE = re.compile(r"^/v1/agents/([^/]+)/infer/?$")
|
||||
|
||||
# Models
|
||||
class FilterDecision(BaseModel):
|
||||
@@ -999,10 +1362,146 @@ def load_router_config():
|
||||
config = load_config()
|
||||
router_config = load_router_config()
|
||||
|
||||
|
||||
@app.middleware("http")
|
||||
async def experience_capture_middleware(request: Request, call_next):
|
||||
"""Capture /infer outcomes and emit ExperienceEvent asynchronously."""
|
||||
infer_agent_id = _extract_infer_agent_id(request.url.path)
|
||||
if (
|
||||
not infer_agent_id
|
||||
or request.method.upper() != "POST"
|
||||
or not EXPERIENCE_BUS_AVAILABLE
|
||||
or experience_bus is None
|
||||
):
|
||||
return await call_next(request)
|
||||
|
||||
started_at = time.time()
|
||||
req_body = await request.body()
|
||||
|
||||
async def _receive() -> Dict[str, Any]:
|
||||
return {"type": "http.request", "body": req_body, "more_body": False}
|
||||
|
||||
wrapped_request = Request(request.scope, _receive)
|
||||
|
||||
response = None
|
||||
response_body = b""
|
||||
status_code = 500
|
||||
caught_exc: Optional[Exception] = None
|
||||
|
||||
try:
|
||||
response = await call_next(wrapped_request)
|
||||
status_code = int(response.status_code)
|
||||
chunks: List[bytes] = []
|
||||
async for chunk in response.body_iterator:
|
||||
chunks.append(chunk)
|
||||
response_body = b"".join(chunks)
|
||||
except Exception as exc: # pragma: no cover - defensive capture path
|
||||
caught_exc = exc
|
||||
status_code = 500
|
||||
|
||||
latency_ms = max(0, int((time.time() - started_at) * 1000))
|
||||
|
||||
try:
|
||||
req_payload = _safe_json_from_bytes(req_body)
|
||||
resp_payload = _safe_json_from_bytes(response_body)
|
||||
metadata = req_payload.get("metadata")
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = {}
|
||||
|
||||
prompt = str(req_payload.get("prompt") or "")
|
||||
normalized_input = normalize_input_for_hash(prompt)
|
||||
inputs_hash = hashlib.sha256(normalized_input.encode("utf-8")).hexdigest()
|
||||
|
||||
profile = _resolve_profile_for_event(infer_agent_id, req_payload)
|
||||
profile_cfg = {}
|
||||
if profile and isinstance(router_config, dict):
|
||||
profile_cfg = (router_config.get("llm_profiles") or {}).get(profile, {}) or {}
|
||||
if not isinstance(profile_cfg, dict):
|
||||
profile_cfg = {}
|
||||
|
||||
model = str(resp_payload.get("model") or profile_cfg.get("model") or "unknown")
|
||||
backend = str(resp_payload.get("backend") or "")
|
||||
provider = _derive_provider_from_backend_model(backend, model, profile)
|
||||
|
||||
tokens_total = resp_payload.get("tokens_used")
|
||||
tokens_out = int(tokens_total) if isinstance(tokens_total, int) else None
|
||||
request_id = str(
|
||||
metadata.get("request_id")
|
||||
or metadata.get("trace_id")
|
||||
or request.headers.get("x-request-id")
|
||||
or ""
|
||||
).strip() or None
|
||||
|
||||
err_class: Optional[str] = None
|
||||
err_msg: Optional[str] = None
|
||||
detail_obj = resp_payload.get("detail")
|
||||
if caught_exc is not None:
|
||||
err_class = type(caught_exc).__name__
|
||||
err_msg = str(caught_exc)
|
||||
elif status_code >= 400:
|
||||
if isinstance(detail_obj, dict):
|
||||
err_class = str(detail_obj.get("code") or detail_obj.get("error_class") or f"http_{status_code}")
|
||||
err_msg = str(detail_obj.get("message") or detail_obj.get("detail") or json.dumps(detail_obj))
|
||||
elif isinstance(detail_obj, str):
|
||||
err_class = f"http_{status_code}"
|
||||
err_msg = detail_obj
|
||||
else:
|
||||
err_class = f"http_{status_code}"
|
||||
err_msg = f"http_status={status_code}"
|
||||
|
||||
await _update_last_infer_signal(
|
||||
infer_agent_id,
|
||||
ok=status_code < 400,
|
||||
error_class=err_class,
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
|
||||
event = {
|
||||
"event_id": str(uuid.uuid4()),
|
||||
"ts": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
||||
"node_id": os.getenv("NODE_ID", "NODA1"),
|
||||
"source": "router",
|
||||
"agent_id": infer_agent_id,
|
||||
"request_id": request_id,
|
||||
"channel": _infer_channel_from_metadata(metadata),
|
||||
"task_type": "infer",
|
||||
"inputs_hash": inputs_hash,
|
||||
"llm": {
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"profile": profile,
|
||||
"latency_ms": latency_ms,
|
||||
"tokens_in": None,
|
||||
"tokens_out": tokens_out,
|
||||
},
|
||||
"result": {
|
||||
"ok": status_code < 400,
|
||||
"error_class": err_class,
|
||||
"error_msg_redacted": redact_error_message(err_msg),
|
||||
"http_status": status_code,
|
||||
},
|
||||
}
|
||||
await experience_bus.capture(event)
|
||||
except Exception as exp_err:
|
||||
logger.debug("Experience capture skipped: %s", exp_err)
|
||||
|
||||
if caught_exc is not None:
|
||||
raise caught_exc
|
||||
|
||||
headers = dict(response.headers) if response is not None else {}
|
||||
headers.pop("content-length", None)
|
||||
return Response(
|
||||
content=response_body,
|
||||
status_code=status_code,
|
||||
headers=headers,
|
||||
media_type=response.media_type if response is not None else "application/json",
|
||||
background=response.background if response is not None else None,
|
||||
)
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""Initialize NATS connection and subscriptions"""
|
||||
global nc, nats_available, http_client, neo4j_driver, neo4j_available, runtime_guard_engine
|
||||
global nc, nats_available, http_client, neo4j_driver, neo4j_available, runtime_guard_engine, experience_bus, lessons_db_pool
|
||||
logger.info("🚀 DAGI Router v2.0.0 starting up...")
|
||||
|
||||
# Initialize HTTP client
|
||||
@@ -1041,6 +1540,34 @@ async def startup_event():
|
||||
logger.warning(f"⚠️ NATS not available: {e}")
|
||||
logger.warning("⚠️ Running in test mode (HTTP only)")
|
||||
nats_available = False
|
||||
|
||||
# Initialize Experience Bus (Phase-1)
|
||||
if EXPERIENCE_BUS_AVAILABLE and ExperienceBus is not None:
|
||||
try:
|
||||
experience_bus = ExperienceBus()
|
||||
await experience_bus.start(nats_client=nc if nats_available else None)
|
||||
logger.info("✅ Experience Bus initialized")
|
||||
except Exception as e:
|
||||
experience_bus = None
|
||||
logger.warning(f"⚠️ Experience Bus init failed: {e}")
|
||||
|
||||
# Initialize lessons retrieval pool (Phase-3 read path)
|
||||
if LESSONS_ATTACH_ENABLED:
|
||||
if asyncpg is None:
|
||||
logger.warning("⚠️ Lessons attach enabled but asyncpg is unavailable")
|
||||
elif not LESSONS_DATABASE_URL:
|
||||
logger.warning("⚠️ Lessons attach enabled but LESSONS_DATABASE_URL is missing")
|
||||
else:
|
||||
try:
|
||||
lessons_db_pool = await asyncpg.create_pool(
|
||||
LESSONS_DATABASE_URL,
|
||||
min_size=1,
|
||||
max_size=3,
|
||||
)
|
||||
logger.info("✅ Lessons DB pool initialized")
|
||||
except Exception as e:
|
||||
lessons_db_pool = None
|
||||
logger.warning(f"⚠️ Lessons DB pool init failed: {e}")
|
||||
|
||||
# Initialize Memory Retrieval Pipeline
|
||||
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval:
|
||||
@@ -1765,6 +2292,24 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
"""
|
||||
logger.info(f"🔀 Inference request for agent: {agent_id}")
|
||||
logger.info(f"📝 Prompt: {request.prompt[:100]}...")
|
||||
|
||||
inactive_state = _inactive_agent_state(agent_id)
|
||||
if inactive_state is not None:
|
||||
status_code = 410 if inactive_state == "planned" else 404
|
||||
logger.info(
|
||||
"⛔ Agent unavailable by lifecycle state: agent=%s state=%s",
|
||||
agent_id,
|
||||
inactive_state,
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=status_code,
|
||||
detail={
|
||||
"code": f"agent_{inactive_state}",
|
||||
"agent_id": str(agent_id).strip().lower(),
|
||||
"state": inactive_state,
|
||||
"message": "Agent is not active in this environment",
|
||||
},
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# MEMORY RETRIEVAL (v4.0 - Universal for all agents)
|
||||
@@ -2682,23 +3227,77 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
# SMART LLM ROUTER WITH AUTO-FALLBACK
|
||||
# Priority: DeepSeek → Mistral → Grok → Local Ollama
|
||||
# =========================================================================
|
||||
|
||||
|
||||
lessons_block = ""
|
||||
lessons_attached_count = 0
|
||||
if LESSONS_ATTACH_ENABLED and not request.images:
|
||||
retrieval_always_on = False
|
||||
retrieval_limit = LESSONS_ATTACH_MIN
|
||||
last_signal = await _get_last_infer_signal(request_agent_id)
|
||||
last_error_class = None
|
||||
if last_signal:
|
||||
last_error_class = last_signal.get("error_class")
|
||||
if (not bool(last_signal.get("ok", True))) or int(last_signal.get("latency_ms", 0) or 0) >= LESSONS_LATENCY_SPIKE_MS:
|
||||
retrieval_always_on = True
|
||||
retrieval_limit = LESSONS_ATTACH_MAX
|
||||
|
||||
should_retrieve = retrieval_always_on or (random_module.random() * 100.0 < LESSONS_ATTACH_SAMPLE_PCT)
|
||||
if should_retrieve:
|
||||
lessons_rows, retrieval_status, retrieval_latency_ms = await _fetch_ranked_lessons(
|
||||
agent_id=request_agent_id,
|
||||
provider=str(provider or "").strip().lower(),
|
||||
model=str(model or "").strip().lower(),
|
||||
profile=str(default_llm or "").strip().lower(),
|
||||
last_error_class=str(last_error_class or "").strip() or None,
|
||||
limit=retrieval_limit,
|
||||
)
|
||||
inc_lessons_retrieved(status=retrieval_status)
|
||||
observe_lessons_attach_latency(latency_ms=float(retrieval_latency_ms))
|
||||
|
||||
if retrieval_status == "ok" and lessons_rows:
|
||||
selected_lessons = lessons_rows[:retrieval_limit]
|
||||
lessons_block = _render_operational_lessons(selected_lessons, LESSONS_ATTACH_MAX_CHARS)
|
||||
if lessons_block:
|
||||
lessons_attached_count = len(selected_lessons)
|
||||
logger.info(
|
||||
"🧠 lessons_attached=%s agent=%s mode=%s",
|
||||
lessons_attached_count,
|
||||
request_agent_id,
|
||||
"always_on" if retrieval_always_on else "sampled",
|
||||
)
|
||||
inc_lessons_attached(count=lessons_attached_count)
|
||||
|
||||
# Build messages array once for all providers
|
||||
messages = []
|
||||
if system_prompt:
|
||||
combined_parts: List[str] = [system_prompt]
|
||||
if memory_brief_text:
|
||||
enhanced_prompt = f"{system_prompt}\n\n[INTERNAL MEMORY - do NOT repeat to user]\n{memory_brief_text}"
|
||||
messages.append({"role": "system", "content": enhanced_prompt})
|
||||
logger.info(f"📝 Added system message with prompt ({len(system_prompt)} chars) + memory ({len(memory_brief_text)} chars)")
|
||||
else:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
logger.info(f"📝 Added system message with prompt ({len(system_prompt)} chars)")
|
||||
elif memory_brief_text:
|
||||
messages.append({"role": "system", "content": f"[INTERNAL MEMORY - do NOT repeat to user]\n{memory_brief_text}"})
|
||||
logger.warning(f"⚠️ No system_prompt! Using only memory brief ({len(memory_brief_text)} chars)")
|
||||
combined_parts.append(f"[INTERNAL MEMORY - do NOT repeat to user]\n{memory_brief_text}")
|
||||
if lessons_block:
|
||||
combined_parts.append(f"[OPERATIONAL LESSONS - INTERNAL]\n{lessons_block}")
|
||||
enhanced_prompt = "\n\n".join(combined_parts)
|
||||
messages.append({"role": "system", "content": enhanced_prompt})
|
||||
logger.info(
|
||||
"📝 Added system message prompt=%s memory=%s lessons=%s",
|
||||
len(system_prompt),
|
||||
len(memory_brief_text or ""),
|
||||
lessons_attached_count,
|
||||
)
|
||||
elif memory_brief_text or lessons_block:
|
||||
fallback_parts: List[str] = []
|
||||
if memory_brief_text:
|
||||
fallback_parts.append(f"[INTERNAL MEMORY - do NOT repeat to user]\n{memory_brief_text}")
|
||||
if lessons_block:
|
||||
fallback_parts.append(f"[OPERATIONAL LESSONS - INTERNAL]\n{lessons_block}")
|
||||
messages.append({"role": "system", "content": "\n\n".join(fallback_parts)})
|
||||
logger.warning(
|
||||
"⚠️ No system_prompt! Using fallback context memory=%s lessons=%s",
|
||||
len(memory_brief_text or ""),
|
||||
lessons_attached_count,
|
||||
)
|
||||
else:
|
||||
logger.error(f"❌ No system_prompt AND no memory_brief! LLM will have no context!")
|
||||
|
||||
logger.error("❌ No system_prompt, memory_brief, or lessons; LLM will have no context")
|
||||
|
||||
messages.append({"role": "user", "content": request.prompt})
|
||||
logger.debug(f"📨 Messages array: {len(messages)} messages, system={len(messages[0].get('content', '')) if messages else 0} chars")
|
||||
|
||||
@@ -4555,7 +5154,7 @@ async def sofiia_model_catalog(refresh_ollama: bool = False):
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event():
|
||||
"""Cleanup connections on shutdown"""
|
||||
global neo4j_driver, http_client, nc
|
||||
global neo4j_driver, http_client, nc, experience_bus, lessons_db_pool
|
||||
|
||||
# Close Memory Retrieval
|
||||
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval:
|
||||
@@ -4576,3 +5175,17 @@ async def shutdown_event():
|
||||
if nc:
|
||||
await nc.close()
|
||||
logger.info("🔌 NATS connection closed")
|
||||
|
||||
if EXPERIENCE_BUS_AVAILABLE and experience_bus:
|
||||
try:
|
||||
await experience_bus.stop()
|
||||
logger.info("🔌 Experience Bus closed")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Experience Bus close error: {e}")
|
||||
|
||||
if lessons_db_pool is not None:
|
||||
try:
|
||||
await lessons_db_pool.close()
|
||||
logger.info("🔌 Lessons DB pool closed")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Lessons DB pool close error: {e}")
|
||||
|
||||
Reference in New Issue
Block a user