New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
250 lines
9.0 KiB
Python
250 lines
9.0 KiB
Python
"""
|
|
Graph 1: release_check_graph
|
|
|
|
Uses the DAARION job_orchestrator_tool to start a release_check task
|
|
via the gateway, then polls until completion.
|
|
|
|
Node sequence:
|
|
start_job → poll_job (loop) → finalize → END
|
|
|
|
State:
|
|
job_id str Job ID returned by start_task
|
|
job_status str "running"|"succeeded"|"failed"|"cancelled"
|
|
poll_count int Guard against infinite polling
|
|
result dict|None Final release_check report
|
|
error str|None Error message if failed
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
from typing import Any, Dict, Optional, TypedDict
|
|
|
|
from langgraph.graph import StateGraph, END
|
|
|
|
from ..config import settings
|
|
from ..gateway_client import GatewayClient
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
MAX_POLL_ITERATIONS = int(settings.JOB_MAX_WAIT_SEC / settings.JOB_POLL_INTERVAL_SEC) + 5
|
|
|
|
|
|
# ─── State ────────────────────────────────────────────────────────────────────
|
|
|
|
class ReleaseCheckState(TypedDict, total=False):
|
|
# Context (injected before graph.invoke)
|
|
run_id: str
|
|
agent_id: str
|
|
workspace_id: str
|
|
user_id: str
|
|
input: Dict[str, Any]
|
|
|
|
# Intermediate
|
|
job_id: Optional[str]
|
|
job_status: Optional[str]
|
|
poll_count: int
|
|
|
|
# Output
|
|
result: Optional[Dict[str, Any]]
|
|
error: Optional[str]
|
|
graph_status: str # "succeeded" | "failed"
|
|
|
|
|
|
# ─── Node implementations ────────────────────────────────────────────────────
|
|
|
|
async def start_job_node(state: ReleaseCheckState) -> ReleaseCheckState:
|
|
"""
|
|
Call job_orchestrator_tool action=start_task with task_id=release_check.
|
|
Expects response: {"job_id": "...", "status": "queued|running"}.
|
|
"""
|
|
run_id = state.get("run_id", "")
|
|
inp = state.get("input", {})
|
|
|
|
# Build release_check inputs from graph input
|
|
task_inputs = {
|
|
"service_name": inp.get("service_name", "unknown"),
|
|
"diff": inp.get("diff_text", ""),
|
|
"fail_fast": inp.get("fail_fast", True),
|
|
"run_smoke": inp.get("run_smoke", False),
|
|
"run_drift": inp.get("run_drift", True),
|
|
"run_deps": inp.get("run_deps", True),
|
|
"deps_targets": inp.get("deps_targets", ["python", "node"]),
|
|
"deps_vuln_mode": inp.get("deps_vuln_mode", "offline_cache"),
|
|
"deps_fail_on": inp.get("deps_fail_on", ["CRITICAL", "HIGH"]),
|
|
"drift_categories": inp.get("drift_categories", ["services", "openapi", "nats", "tools"]),
|
|
"risk_profile": inp.get("risk_profile", "default"),
|
|
}
|
|
if inp.get("openapi_base"):
|
|
task_inputs["openapi_base"] = inp["openapi_base"]
|
|
if inp.get("openapi_head"):
|
|
task_inputs["openapi_head"] = inp["openapi_head"]
|
|
|
|
overall_timeout = inp.get("timeouts", {}).get("overall_sec", 180)
|
|
|
|
async with GatewayClient() as gw:
|
|
result = await gw.call_tool(
|
|
tool="job_orchestrator_tool",
|
|
action="start_task",
|
|
params={"task_id": "release_check", "inputs": task_inputs, "timeout_sec": overall_timeout},
|
|
agent_id=state.get("agent_id", settings.DEFAULT_AGENT_ID),
|
|
workspace_id=state.get("workspace_id", settings.DEFAULT_WORKSPACE_ID),
|
|
user_id=state.get("user_id", ""),
|
|
graph_run_id=run_id,
|
|
graph_node="start_job",
|
|
)
|
|
|
|
if not result.success:
|
|
logger.error("release_check: start_job failed run=%s err=%s", run_id, result.error_message)
|
|
return {
|
|
**state,
|
|
"job_id": None,
|
|
"poll_count": 0,
|
|
"graph_status": "failed",
|
|
"error": f"start_task failed: {result.error_message}",
|
|
}
|
|
|
|
data = result.data or {}
|
|
job_id = data.get("job_id") or data.get("id")
|
|
job_status = data.get("status", "running")
|
|
|
|
logger.info("release_check: job started run=%s job_id=%s status=%s", run_id, job_id, job_status)
|
|
|
|
# If job completed synchronously (no async job system), extract result directly
|
|
if job_status in ("succeeded", "failed") and "result" in data:
|
|
return {
|
|
**state,
|
|
"job_id": job_id,
|
|
"job_status": job_status,
|
|
"poll_count": 0,
|
|
"result": data.get("result"),
|
|
"graph_status": "succeeded" if job_status == "succeeded" else "failed",
|
|
"error": data.get("error") if job_status == "failed" else None,
|
|
}
|
|
|
|
return {**state, "job_id": job_id, "job_status": job_status, "poll_count": 0}
|
|
|
|
|
|
async def poll_job_node(state: ReleaseCheckState) -> ReleaseCheckState:
|
|
"""
|
|
Poll job_orchestrator_tool action=get_job for completion.
|
|
Loops back to itself if still running (via conditional edge).
|
|
"""
|
|
run_id = state.get("run_id", "")
|
|
job_id = state.get("job_id")
|
|
poll_count = state.get("poll_count", 0) + 1
|
|
|
|
if not job_id:
|
|
return {**state, "poll_count": poll_count, "job_status": "failed",
|
|
"error": "No job_id to poll", "graph_status": "failed"}
|
|
|
|
if poll_count > MAX_POLL_ITERATIONS:
|
|
logger.warning("release_check: polling timeout run=%s job=%s", run_id, job_id)
|
|
return {**state, "poll_count": poll_count, "job_status": "failed",
|
|
"error": "Job polling timeout", "graph_status": "failed"}
|
|
|
|
# Brief pause before polling
|
|
await asyncio.sleep(settings.JOB_POLL_INTERVAL_SEC)
|
|
|
|
async with GatewayClient() as gw:
|
|
result = await gw.call_tool(
|
|
tool="job_orchestrator_tool",
|
|
action="get_job",
|
|
params={"job_id": job_id},
|
|
agent_id=state.get("agent_id", settings.DEFAULT_AGENT_ID),
|
|
workspace_id=state.get("workspace_id", settings.DEFAULT_WORKSPACE_ID),
|
|
user_id=state.get("user_id", ""),
|
|
graph_run_id=run_id,
|
|
graph_node="poll_job",
|
|
)
|
|
|
|
if not result.success:
|
|
logger.warning("release_check: poll error run=%s err=%s", run_id, result.error_message)
|
|
return {**state, "poll_count": poll_count}
|
|
|
|
data = result.data or {}
|
|
job_status = data.get("status", "running")
|
|
|
|
logger.info("release_check: poll run=%s job=%s status=%s count=%d",
|
|
run_id, job_id, job_status, poll_count)
|
|
|
|
update = {**state, "job_id": job_id, "job_status": job_status, "poll_count": poll_count}
|
|
|
|
if job_status == "succeeded":
|
|
update["result"] = data.get("result") or data.get("output")
|
|
update["graph_status"] = "succeeded"
|
|
elif job_status in ("failed", "cancelled"):
|
|
update["error"] = data.get("error") or f"Job {job_status}"
|
|
update["graph_status"] = "failed"
|
|
|
|
return update
|
|
|
|
|
|
async def finalize_node(state: ReleaseCheckState) -> ReleaseCheckState:
|
|
"""Ensure result has the expected release_check report structure."""
|
|
result = state.get("result")
|
|
if not result:
|
|
result = {
|
|
"pass": False,
|
|
"gates": [],
|
|
"recommendations": [state.get("error", "Unknown error")],
|
|
"summary": state.get("error", "Release check failed"),
|
|
"elapsed_ms": 0,
|
|
}
|
|
return {**state, "result": result}
|
|
|
|
|
|
# ─── Conditional routing ──────────────────────────────────────────────────────
|
|
|
|
def _should_continue_polling(state: ReleaseCheckState) -> str:
|
|
"""Route: back to poll_job if still running, else go to finalize."""
|
|
job_status = state.get("job_status", "running")
|
|
graph_status = state.get("graph_status", "")
|
|
if graph_status in ("succeeded", "failed"):
|
|
return "finalize"
|
|
if job_status in ("succeeded", "failed", "cancelled"):
|
|
return "finalize"
|
|
return "poll_job"
|
|
|
|
|
|
def _after_start(state: ReleaseCheckState) -> str:
|
|
"""Route after start_job: go directly to finalize if already done, else poll."""
|
|
if state.get("graph_status") in ("succeeded", "failed"):
|
|
return "finalize"
|
|
return "poll_job"
|
|
|
|
|
|
# ─── Graph builder ────────────────────────────────────────────────────────────
|
|
|
|
def build_release_check_graph():
|
|
"""
|
|
Build and compile the release_check LangGraph.
|
|
|
|
Graph:
|
|
start_job → [if done] finalize → END
|
|
→ [if running] poll_job → [loop] → finalize → END
|
|
"""
|
|
graph = StateGraph(ReleaseCheckState)
|
|
|
|
graph.add_node("start_job", start_job_node)
|
|
graph.add_node("poll_job", poll_job_node)
|
|
graph.add_node("finalize", finalize_node)
|
|
|
|
graph.set_entry_point("start_job")
|
|
|
|
graph.add_conditional_edges(
|
|
"start_job",
|
|
_after_start,
|
|
{"finalize": "finalize", "poll_job": "poll_job"},
|
|
)
|
|
graph.add_conditional_edges(
|
|
"poll_job",
|
|
_should_continue_polling,
|
|
{"poll_job": "poll_job", "finalize": "finalize"},
|
|
)
|
|
graph.add_edge("finalize", END)
|
|
|
|
return graph.compile()
|