feat(platform): add new services, tools, tests and crews modules

New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
2026-03-03 07:14:14 -08:00
parent e9dedffa48
commit 129e4ea1fc
241 changed files with 69349 additions and 0 deletions
--- a/services/sofiia-supervisor/app/graphs/release_check_graph.py
+++ b/services/sofiia-supervisor/app/graphs/release_check_graph.py
@@ -0,0 +1,249 @@
+"""
+Graph 1: release_check_graph
+
+Uses the DAARION job_orchestrator_tool to start a release_check task
+via the gateway, then polls until completion.
+
+Node sequence:
+  start_job → poll_job (loop) → finalize → END
+
+State:
+  job_id          str          Job ID returned by start_task
+  job_status      str          "running"|"succeeded"|"failed"|"cancelled"
+  poll_count      int          Guard against infinite polling
+  result          dict|None    Final release_check report
+  error           str|None     Error message if failed
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import time
+from typing import Any, Dict, Optional, TypedDict
+
+from langgraph.graph import StateGraph, END
+
+from ..config import settings
+from ..gateway_client import GatewayClient
+
+logger = logging.getLogger(__name__)
+
+MAX_POLL_ITERATIONS = int(settings.JOB_MAX_WAIT_SEC / settings.JOB_POLL_INTERVAL_SEC) + 5
+
+
+# ─── State ────────────────────────────────────────────────────────────────────
+
+class ReleaseCheckState(TypedDict, total=False):
+    # Context (injected before graph.invoke)
+    run_id: str
+    agent_id: str
+    workspace_id: str
+    user_id: str
+    input: Dict[str, Any]
+
+    # Intermediate
+    job_id: Optional[str]
+    job_status: Optional[str]
+    poll_count: int
+
+    # Output
+    result: Optional[Dict[str, Any]]
+    error: Optional[str]
+    graph_status: str      # "succeeded" | "failed"
+
+
+# ─── Node implementations ────────────────────────────────────────────────────
+
+async def start_job_node(state: ReleaseCheckState) -> ReleaseCheckState:
+    """
+    Call job_orchestrator_tool action=start_task with task_id=release_check.
+    Expects response: {"job_id": "...", "status": "queued|running"}.
+    """
+    run_id = state.get("run_id", "")
+    inp = state.get("input", {})
+
+    # Build release_check inputs from graph input
+    task_inputs = {
+        "service_name": inp.get("service_name", "unknown"),
+        "diff": inp.get("diff_text", ""),
+        "fail_fast": inp.get("fail_fast", True),
+        "run_smoke": inp.get("run_smoke", False),
+        "run_drift": inp.get("run_drift", True),
+        "run_deps": inp.get("run_deps", True),
+        "deps_targets": inp.get("deps_targets", ["python", "node"]),
+        "deps_vuln_mode": inp.get("deps_vuln_mode", "offline_cache"),
+        "deps_fail_on": inp.get("deps_fail_on", ["CRITICAL", "HIGH"]),
+        "drift_categories": inp.get("drift_categories", ["services", "openapi", "nats", "tools"]),
+        "risk_profile": inp.get("risk_profile", "default"),
+    }
+    if inp.get("openapi_base"):
+        task_inputs["openapi_base"] = inp["openapi_base"]
+    if inp.get("openapi_head"):
+        task_inputs["openapi_head"] = inp["openapi_head"]
+
+    overall_timeout = inp.get("timeouts", {}).get("overall_sec", 180)
+
+    async with GatewayClient() as gw:
+        result = await gw.call_tool(
+            tool="job_orchestrator_tool",
+            action="start_task",
+            params={"task_id": "release_check", "inputs": task_inputs, "timeout_sec": overall_timeout},
+            agent_id=state.get("agent_id", settings.DEFAULT_AGENT_ID),
+            workspace_id=state.get("workspace_id", settings.DEFAULT_WORKSPACE_ID),
+            user_id=state.get("user_id", ""),
+            graph_run_id=run_id,
+            graph_node="start_job",
+        )
+
+    if not result.success:
+        logger.error("release_check: start_job failed run=%s err=%s", run_id, result.error_message)
+        return {
+            **state,
+            "job_id": None,
+            "poll_count": 0,
+            "graph_status": "failed",
+            "error": f"start_task failed: {result.error_message}",
+        }
+
+    data = result.data or {}
+    job_id = data.get("job_id") or data.get("id")
+    job_status = data.get("status", "running")
+
+    logger.info("release_check: job started run=%s job_id=%s status=%s", run_id, job_id, job_status)
+
+    # If job completed synchronously (no async job system), extract result directly
+    if job_status in ("succeeded", "failed") and "result" in data:
+        return {
+            **state,
+            "job_id": job_id,
+            "job_status": job_status,
+            "poll_count": 0,
+            "result": data.get("result"),
+            "graph_status": "succeeded" if job_status == "succeeded" else "failed",
+            "error": data.get("error") if job_status == "failed" else None,
+        }
+
+    return {**state, "job_id": job_id, "job_status": job_status, "poll_count": 0}
+
+
+async def poll_job_node(state: ReleaseCheckState) -> ReleaseCheckState:
+    """
+    Poll job_orchestrator_tool action=get_job for completion.
+    Loops back to itself if still running (via conditional edge).
+    """
+    run_id = state.get("run_id", "")
+    job_id = state.get("job_id")
+    poll_count = state.get("poll_count", 0) + 1
+
+    if not job_id:
+        return {**state, "poll_count": poll_count, "job_status": "failed",
+                "error": "No job_id to poll", "graph_status": "failed"}
+
+    if poll_count > MAX_POLL_ITERATIONS:
+        logger.warning("release_check: polling timeout run=%s job=%s", run_id, job_id)
+        return {**state, "poll_count": poll_count, "job_status": "failed",
+                "error": "Job polling timeout", "graph_status": "failed"}
+
+    # Brief pause before polling
+    await asyncio.sleep(settings.JOB_POLL_INTERVAL_SEC)
+
+    async with GatewayClient() as gw:
+        result = await gw.call_tool(
+            tool="job_orchestrator_tool",
+            action="get_job",
+            params={"job_id": job_id},
+            agent_id=state.get("agent_id", settings.DEFAULT_AGENT_ID),
+            workspace_id=state.get("workspace_id", settings.DEFAULT_WORKSPACE_ID),
+            user_id=state.get("user_id", ""),
+            graph_run_id=run_id,
+            graph_node="poll_job",
+        )
+
+    if not result.success:
+        logger.warning("release_check: poll error run=%s err=%s", run_id, result.error_message)
+        return {**state, "poll_count": poll_count}
+
+    data = result.data or {}
+    job_status = data.get("status", "running")
+
+    logger.info("release_check: poll run=%s job=%s status=%s count=%d",
+                run_id, job_id, job_status, poll_count)
+
+    update = {**state, "job_id": job_id, "job_status": job_status, "poll_count": poll_count}
+
+    if job_status == "succeeded":
+        update["result"] = data.get("result") or data.get("output")
+        update["graph_status"] = "succeeded"
+    elif job_status in ("failed", "cancelled"):
+        update["error"] = data.get("error") or f"Job {job_status}"
+        update["graph_status"] = "failed"
+
+    return update
+
+
+async def finalize_node(state: ReleaseCheckState) -> ReleaseCheckState:
+    """Ensure result has the expected release_check report structure."""
+    result = state.get("result")
+    if not result:
+        result = {
+            "pass": False,
+            "gates": [],
+            "recommendations": [state.get("error", "Unknown error")],
+            "summary": state.get("error", "Release check failed"),
+            "elapsed_ms": 0,
+        }
+    return {**state, "result": result}
+
+
+# ─── Conditional routing ──────────────────────────────────────────────────────
+
+def _should_continue_polling(state: ReleaseCheckState) -> str:
+    """Route: back to poll_job if still running, else go to finalize."""
+    job_status = state.get("job_status", "running")
+    graph_status = state.get("graph_status", "")
+    if graph_status in ("succeeded", "failed"):
+        return "finalize"
+    if job_status in ("succeeded", "failed", "cancelled"):
+        return "finalize"
+    return "poll_job"
+
+
+def _after_start(state: ReleaseCheckState) -> str:
+    """Route after start_job: go directly to finalize if already done, else poll."""
+    if state.get("graph_status") in ("succeeded", "failed"):
+        return "finalize"
+    return "poll_job"
+
+
+# ─── Graph builder ────────────────────────────────────────────────────────────
+
+def build_release_check_graph():
+    """
+    Build and compile the release_check LangGraph.
+
+    Graph:
+      start_job → [if done] finalize → END
+                → [if running] poll_job → [loop] → finalize → END
+    """
+    graph = StateGraph(ReleaseCheckState)
+
+    graph.add_node("start_job", start_job_node)
+    graph.add_node("poll_job", poll_job_node)
+    graph.add_node("finalize", finalize_node)
+
+    graph.set_entry_point("start_job")
+
+    graph.add_conditional_edges(
+        "start_job",
+        _after_start,
+        {"finalize": "finalize", "poll_job": "poll_job"},
+    )
+    graph.add_conditional_edges(
+        "poll_job",
+        _should_continue_polling,
+        {"poll_job": "poll_job", "finalize": "finalize"},
+    )
+    graph.add_edge("finalize", END)
+
+    return graph.compile()