feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
This commit is contained in:
249
services/sofiia-supervisor/app/graphs/release_check_graph.py
Normal file
249
services/sofiia-supervisor/app/graphs/release_check_graph.py
Normal file
@@ -0,0 +1,249 @@
|
||||
"""
|
||||
Graph 1: release_check_graph
|
||||
|
||||
Uses the DAARION job_orchestrator_tool to start a release_check task
|
||||
via the gateway, then polls until completion.
|
||||
|
||||
Node sequence:
|
||||
start_job → poll_job (loop) → finalize → END
|
||||
|
||||
State:
|
||||
job_id str Job ID returned by start_task
|
||||
job_status str "running"|"succeeded"|"failed"|"cancelled"
|
||||
poll_count int Guard against infinite polling
|
||||
result dict|None Final release_check report
|
||||
error str|None Error message if failed
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, Optional, TypedDict
|
||||
|
||||
from langgraph.graph import StateGraph, END
|
||||
|
||||
from ..config import settings
|
||||
from ..gateway_client import GatewayClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_POLL_ITERATIONS = int(settings.JOB_MAX_WAIT_SEC / settings.JOB_POLL_INTERVAL_SEC) + 5
|
||||
|
||||
|
||||
# ─── State ────────────────────────────────────────────────────────────────────
|
||||
|
||||
class ReleaseCheckState(TypedDict, total=False):
|
||||
# Context (injected before graph.invoke)
|
||||
run_id: str
|
||||
agent_id: str
|
||||
workspace_id: str
|
||||
user_id: str
|
||||
input: Dict[str, Any]
|
||||
|
||||
# Intermediate
|
||||
job_id: Optional[str]
|
||||
job_status: Optional[str]
|
||||
poll_count: int
|
||||
|
||||
# Output
|
||||
result: Optional[Dict[str, Any]]
|
||||
error: Optional[str]
|
||||
graph_status: str # "succeeded" | "failed"
|
||||
|
||||
|
||||
# ─── Node implementations ────────────────────────────────────────────────────
|
||||
|
||||
async def start_job_node(state: ReleaseCheckState) -> ReleaseCheckState:
|
||||
"""
|
||||
Call job_orchestrator_tool action=start_task with task_id=release_check.
|
||||
Expects response: {"job_id": "...", "status": "queued|running"}.
|
||||
"""
|
||||
run_id = state.get("run_id", "")
|
||||
inp = state.get("input", {})
|
||||
|
||||
# Build release_check inputs from graph input
|
||||
task_inputs = {
|
||||
"service_name": inp.get("service_name", "unknown"),
|
||||
"diff": inp.get("diff_text", ""),
|
||||
"fail_fast": inp.get("fail_fast", True),
|
||||
"run_smoke": inp.get("run_smoke", False),
|
||||
"run_drift": inp.get("run_drift", True),
|
||||
"run_deps": inp.get("run_deps", True),
|
||||
"deps_targets": inp.get("deps_targets", ["python", "node"]),
|
||||
"deps_vuln_mode": inp.get("deps_vuln_mode", "offline_cache"),
|
||||
"deps_fail_on": inp.get("deps_fail_on", ["CRITICAL", "HIGH"]),
|
||||
"drift_categories": inp.get("drift_categories", ["services", "openapi", "nats", "tools"]),
|
||||
"risk_profile": inp.get("risk_profile", "default"),
|
||||
}
|
||||
if inp.get("openapi_base"):
|
||||
task_inputs["openapi_base"] = inp["openapi_base"]
|
||||
if inp.get("openapi_head"):
|
||||
task_inputs["openapi_head"] = inp["openapi_head"]
|
||||
|
||||
overall_timeout = inp.get("timeouts", {}).get("overall_sec", 180)
|
||||
|
||||
async with GatewayClient() as gw:
|
||||
result = await gw.call_tool(
|
||||
tool="job_orchestrator_tool",
|
||||
action="start_task",
|
||||
params={"task_id": "release_check", "inputs": task_inputs, "timeout_sec": overall_timeout},
|
||||
agent_id=state.get("agent_id", settings.DEFAULT_AGENT_ID),
|
||||
workspace_id=state.get("workspace_id", settings.DEFAULT_WORKSPACE_ID),
|
||||
user_id=state.get("user_id", ""),
|
||||
graph_run_id=run_id,
|
||||
graph_node="start_job",
|
||||
)
|
||||
|
||||
if not result.success:
|
||||
logger.error("release_check: start_job failed run=%s err=%s", run_id, result.error_message)
|
||||
return {
|
||||
**state,
|
||||
"job_id": None,
|
||||
"poll_count": 0,
|
||||
"graph_status": "failed",
|
||||
"error": f"start_task failed: {result.error_message}",
|
||||
}
|
||||
|
||||
data = result.data or {}
|
||||
job_id = data.get("job_id") or data.get("id")
|
||||
job_status = data.get("status", "running")
|
||||
|
||||
logger.info("release_check: job started run=%s job_id=%s status=%s", run_id, job_id, job_status)
|
||||
|
||||
# If job completed synchronously (no async job system), extract result directly
|
||||
if job_status in ("succeeded", "failed") and "result" in data:
|
||||
return {
|
||||
**state,
|
||||
"job_id": job_id,
|
||||
"job_status": job_status,
|
||||
"poll_count": 0,
|
||||
"result": data.get("result"),
|
||||
"graph_status": "succeeded" if job_status == "succeeded" else "failed",
|
||||
"error": data.get("error") if job_status == "failed" else None,
|
||||
}
|
||||
|
||||
return {**state, "job_id": job_id, "job_status": job_status, "poll_count": 0}
|
||||
|
||||
|
||||
async def poll_job_node(state: ReleaseCheckState) -> ReleaseCheckState:
|
||||
"""
|
||||
Poll job_orchestrator_tool action=get_job for completion.
|
||||
Loops back to itself if still running (via conditional edge).
|
||||
"""
|
||||
run_id = state.get("run_id", "")
|
||||
job_id = state.get("job_id")
|
||||
poll_count = state.get("poll_count", 0) + 1
|
||||
|
||||
if not job_id:
|
||||
return {**state, "poll_count": poll_count, "job_status": "failed",
|
||||
"error": "No job_id to poll", "graph_status": "failed"}
|
||||
|
||||
if poll_count > MAX_POLL_ITERATIONS:
|
||||
logger.warning("release_check: polling timeout run=%s job=%s", run_id, job_id)
|
||||
return {**state, "poll_count": poll_count, "job_status": "failed",
|
||||
"error": "Job polling timeout", "graph_status": "failed"}
|
||||
|
||||
# Brief pause before polling
|
||||
await asyncio.sleep(settings.JOB_POLL_INTERVAL_SEC)
|
||||
|
||||
async with GatewayClient() as gw:
|
||||
result = await gw.call_tool(
|
||||
tool="job_orchestrator_tool",
|
||||
action="get_job",
|
||||
params={"job_id": job_id},
|
||||
agent_id=state.get("agent_id", settings.DEFAULT_AGENT_ID),
|
||||
workspace_id=state.get("workspace_id", settings.DEFAULT_WORKSPACE_ID),
|
||||
user_id=state.get("user_id", ""),
|
||||
graph_run_id=run_id,
|
||||
graph_node="poll_job",
|
||||
)
|
||||
|
||||
if not result.success:
|
||||
logger.warning("release_check: poll error run=%s err=%s", run_id, result.error_message)
|
||||
return {**state, "poll_count": poll_count}
|
||||
|
||||
data = result.data or {}
|
||||
job_status = data.get("status", "running")
|
||||
|
||||
logger.info("release_check: poll run=%s job=%s status=%s count=%d",
|
||||
run_id, job_id, job_status, poll_count)
|
||||
|
||||
update = {**state, "job_id": job_id, "job_status": job_status, "poll_count": poll_count}
|
||||
|
||||
if job_status == "succeeded":
|
||||
update["result"] = data.get("result") or data.get("output")
|
||||
update["graph_status"] = "succeeded"
|
||||
elif job_status in ("failed", "cancelled"):
|
||||
update["error"] = data.get("error") or f"Job {job_status}"
|
||||
update["graph_status"] = "failed"
|
||||
|
||||
return update
|
||||
|
||||
|
||||
async def finalize_node(state: ReleaseCheckState) -> ReleaseCheckState:
|
||||
"""Ensure result has the expected release_check report structure."""
|
||||
result = state.get("result")
|
||||
if not result:
|
||||
result = {
|
||||
"pass": False,
|
||||
"gates": [],
|
||||
"recommendations": [state.get("error", "Unknown error")],
|
||||
"summary": state.get("error", "Release check failed"),
|
||||
"elapsed_ms": 0,
|
||||
}
|
||||
return {**state, "result": result}
|
||||
|
||||
|
||||
# ─── Conditional routing ──────────────────────────────────────────────────────
|
||||
|
||||
def _should_continue_polling(state: ReleaseCheckState) -> str:
|
||||
"""Route: back to poll_job if still running, else go to finalize."""
|
||||
job_status = state.get("job_status", "running")
|
||||
graph_status = state.get("graph_status", "")
|
||||
if graph_status in ("succeeded", "failed"):
|
||||
return "finalize"
|
||||
if job_status in ("succeeded", "failed", "cancelled"):
|
||||
return "finalize"
|
||||
return "poll_job"
|
||||
|
||||
|
||||
def _after_start(state: ReleaseCheckState) -> str:
|
||||
"""Route after start_job: go directly to finalize if already done, else poll."""
|
||||
if state.get("graph_status") in ("succeeded", "failed"):
|
||||
return "finalize"
|
||||
return "poll_job"
|
||||
|
||||
|
||||
# ─── Graph builder ────────────────────────────────────────────────────────────
|
||||
|
||||
def build_release_check_graph():
|
||||
"""
|
||||
Build and compile the release_check LangGraph.
|
||||
|
||||
Graph:
|
||||
start_job → [if done] finalize → END
|
||||
→ [if running] poll_job → [loop] → finalize → END
|
||||
"""
|
||||
graph = StateGraph(ReleaseCheckState)
|
||||
|
||||
graph.add_node("start_job", start_job_node)
|
||||
graph.add_node("poll_job", poll_job_node)
|
||||
graph.add_node("finalize", finalize_node)
|
||||
|
||||
graph.set_entry_point("start_job")
|
||||
|
||||
graph.add_conditional_edges(
|
||||
"start_job",
|
||||
_after_start,
|
||||
{"finalize": "finalize", "poll_job": "poll_job"},
|
||||
)
|
||||
graph.add_conditional_edges(
|
||||
"poll_job",
|
||||
_should_continue_polling,
|
||||
{"poll_job": "poll_job", "finalize": "finalize"},
|
||||
)
|
||||
graph.add_edge("finalize", END)
|
||||
|
||||
return graph.compile()
|
||||
Reference in New Issue
Block a user