Files
microdao-daarion/services/sofiia-supervisor/app/graphs/release_check_graph.py
Apple 129e4ea1fc feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store,
architecture_pressure, backlog_generator/store, cost_analyzer, data_governance,
dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment,
platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files),
signature_state_store, sofiia_auto_router, tool_governance

New services:
- sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static
- memory-service: integration_endpoints, integrations, voice_endpoints, static UI
- aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents)
- sofiia-supervisor: new supervisor service
- aistalk-bridge-lite: Telegram bridge lite
- calendar-service: CalDAV calendar service with reminders
- mlx-stt-service / mlx-tts-service: Apple Silicon speech services
- binance-bot-monitor: market monitor service
- node-worker: STT/TTS memory providers

New tools (9): agent_email, browser_tool, contract_tool, observability_tool,
oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault

New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus,
farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine,
session_context, style_adapter, telemetry)

Tests: 85+ test files for all new modules
Made-with: Cursor
2026-03-03 07:14:14 -08:00

250 lines
9.0 KiB
Python

"""
Graph 1: release_check_graph
Uses the DAARION job_orchestrator_tool to start a release_check task
via the gateway, then polls until completion.
Node sequence:
start_job → poll_job (loop) → finalize → END
State:
job_id str Job ID returned by start_task
job_status str "running"|"succeeded"|"failed"|"cancelled"
poll_count int Guard against infinite polling
result dict|None Final release_check report
error str|None Error message if failed
"""
from __future__ import annotations
import asyncio
import logging
import time
from typing import Any, Dict, Optional, TypedDict
from langgraph.graph import StateGraph, END
from ..config import settings
from ..gateway_client import GatewayClient
logger = logging.getLogger(__name__)
MAX_POLL_ITERATIONS = int(settings.JOB_MAX_WAIT_SEC / settings.JOB_POLL_INTERVAL_SEC) + 5
# ─── State ────────────────────────────────────────────────────────────────────
class ReleaseCheckState(TypedDict, total=False):
# Context (injected before graph.invoke)
run_id: str
agent_id: str
workspace_id: str
user_id: str
input: Dict[str, Any]
# Intermediate
job_id: Optional[str]
job_status: Optional[str]
poll_count: int
# Output
result: Optional[Dict[str, Any]]
error: Optional[str]
graph_status: str # "succeeded" | "failed"
# ─── Node implementations ────────────────────────────────────────────────────
async def start_job_node(state: ReleaseCheckState) -> ReleaseCheckState:
"""
Call job_orchestrator_tool action=start_task with task_id=release_check.
Expects response: {"job_id": "...", "status": "queued|running"}.
"""
run_id = state.get("run_id", "")
inp = state.get("input", {})
# Build release_check inputs from graph input
task_inputs = {
"service_name": inp.get("service_name", "unknown"),
"diff": inp.get("diff_text", ""),
"fail_fast": inp.get("fail_fast", True),
"run_smoke": inp.get("run_smoke", False),
"run_drift": inp.get("run_drift", True),
"run_deps": inp.get("run_deps", True),
"deps_targets": inp.get("deps_targets", ["python", "node"]),
"deps_vuln_mode": inp.get("deps_vuln_mode", "offline_cache"),
"deps_fail_on": inp.get("deps_fail_on", ["CRITICAL", "HIGH"]),
"drift_categories": inp.get("drift_categories", ["services", "openapi", "nats", "tools"]),
"risk_profile": inp.get("risk_profile", "default"),
}
if inp.get("openapi_base"):
task_inputs["openapi_base"] = inp["openapi_base"]
if inp.get("openapi_head"):
task_inputs["openapi_head"] = inp["openapi_head"]
overall_timeout = inp.get("timeouts", {}).get("overall_sec", 180)
async with GatewayClient() as gw:
result = await gw.call_tool(
tool="job_orchestrator_tool",
action="start_task",
params={"task_id": "release_check", "inputs": task_inputs, "timeout_sec": overall_timeout},
agent_id=state.get("agent_id", settings.DEFAULT_AGENT_ID),
workspace_id=state.get("workspace_id", settings.DEFAULT_WORKSPACE_ID),
user_id=state.get("user_id", ""),
graph_run_id=run_id,
graph_node="start_job",
)
if not result.success:
logger.error("release_check: start_job failed run=%s err=%s", run_id, result.error_message)
return {
**state,
"job_id": None,
"poll_count": 0,
"graph_status": "failed",
"error": f"start_task failed: {result.error_message}",
}
data = result.data or {}
job_id = data.get("job_id") or data.get("id")
job_status = data.get("status", "running")
logger.info("release_check: job started run=%s job_id=%s status=%s", run_id, job_id, job_status)
# If job completed synchronously (no async job system), extract result directly
if job_status in ("succeeded", "failed") and "result" in data:
return {
**state,
"job_id": job_id,
"job_status": job_status,
"poll_count": 0,
"result": data.get("result"),
"graph_status": "succeeded" if job_status == "succeeded" else "failed",
"error": data.get("error") if job_status == "failed" else None,
}
return {**state, "job_id": job_id, "job_status": job_status, "poll_count": 0}
async def poll_job_node(state: ReleaseCheckState) -> ReleaseCheckState:
"""
Poll job_orchestrator_tool action=get_job for completion.
Loops back to itself if still running (via conditional edge).
"""
run_id = state.get("run_id", "")
job_id = state.get("job_id")
poll_count = state.get("poll_count", 0) + 1
if not job_id:
return {**state, "poll_count": poll_count, "job_status": "failed",
"error": "No job_id to poll", "graph_status": "failed"}
if poll_count > MAX_POLL_ITERATIONS:
logger.warning("release_check: polling timeout run=%s job=%s", run_id, job_id)
return {**state, "poll_count": poll_count, "job_status": "failed",
"error": "Job polling timeout", "graph_status": "failed"}
# Brief pause before polling
await asyncio.sleep(settings.JOB_POLL_INTERVAL_SEC)
async with GatewayClient() as gw:
result = await gw.call_tool(
tool="job_orchestrator_tool",
action="get_job",
params={"job_id": job_id},
agent_id=state.get("agent_id", settings.DEFAULT_AGENT_ID),
workspace_id=state.get("workspace_id", settings.DEFAULT_WORKSPACE_ID),
user_id=state.get("user_id", ""),
graph_run_id=run_id,
graph_node="poll_job",
)
if not result.success:
logger.warning("release_check: poll error run=%s err=%s", run_id, result.error_message)
return {**state, "poll_count": poll_count}
data = result.data or {}
job_status = data.get("status", "running")
logger.info("release_check: poll run=%s job=%s status=%s count=%d",
run_id, job_id, job_status, poll_count)
update = {**state, "job_id": job_id, "job_status": job_status, "poll_count": poll_count}
if job_status == "succeeded":
update["result"] = data.get("result") or data.get("output")
update["graph_status"] = "succeeded"
elif job_status in ("failed", "cancelled"):
update["error"] = data.get("error") or f"Job {job_status}"
update["graph_status"] = "failed"
return update
async def finalize_node(state: ReleaseCheckState) -> ReleaseCheckState:
"""Ensure result has the expected release_check report structure."""
result = state.get("result")
if not result:
result = {
"pass": False,
"gates": [],
"recommendations": [state.get("error", "Unknown error")],
"summary": state.get("error", "Release check failed"),
"elapsed_ms": 0,
}
return {**state, "result": result}
# ─── Conditional routing ──────────────────────────────────────────────────────
def _should_continue_polling(state: ReleaseCheckState) -> str:
"""Route: back to poll_job if still running, else go to finalize."""
job_status = state.get("job_status", "running")
graph_status = state.get("graph_status", "")
if graph_status in ("succeeded", "failed"):
return "finalize"
if job_status in ("succeeded", "failed", "cancelled"):
return "finalize"
return "poll_job"
def _after_start(state: ReleaseCheckState) -> str:
"""Route after start_job: go directly to finalize if already done, else poll."""
if state.get("graph_status") in ("succeeded", "failed"):
return "finalize"
return "poll_job"
# ─── Graph builder ────────────────────────────────────────────────────────────
def build_release_check_graph():
"""
Build and compile the release_check LangGraph.
Graph:
start_job → [if done] finalize → END
→ [if running] poll_job → [loop] → finalize → END
"""
graph = StateGraph(ReleaseCheckState)
graph.add_node("start_job", start_job_node)
graph.add_node("poll_job", poll_job_node)
graph.add_node("finalize", finalize_node)
graph.set_entry_point("start_job")
graph.add_conditional_edges(
"start_job",
_after_start,
{"finalize": "finalize", "poll_job": "poll_job"},
)
graph.add_conditional_edges(
"poll_job",
_should_continue_polling,
{"poll_job": "poll_job", "finalize": "finalize"},
)
graph.add_edge("finalize", END)
return graph.compile()