""" Graph 1: release_check_graph Uses the DAARION job_orchestrator_tool to start a release_check task via the gateway, then polls until completion. Node sequence: start_job → poll_job (loop) → finalize → END State: job_id str Job ID returned by start_task job_status str "running"|"succeeded"|"failed"|"cancelled" poll_count int Guard against infinite polling result dict|None Final release_check report error str|None Error message if failed """ from __future__ import annotations import asyncio import logging import time from typing import Any, Dict, Optional, TypedDict from langgraph.graph import StateGraph, END from ..config import settings from ..gateway_client import GatewayClient logger = logging.getLogger(__name__) MAX_POLL_ITERATIONS = int(settings.JOB_MAX_WAIT_SEC / settings.JOB_POLL_INTERVAL_SEC) + 5 # ─── State ──────────────────────────────────────────────────────────────────── class ReleaseCheckState(TypedDict, total=False): # Context (injected before graph.invoke) run_id: str agent_id: str workspace_id: str user_id: str input: Dict[str, Any] # Intermediate job_id: Optional[str] job_status: Optional[str] poll_count: int # Output result: Optional[Dict[str, Any]] error: Optional[str] graph_status: str # "succeeded" | "failed" # ─── Node implementations ──────────────────────────────────────────────────── async def start_job_node(state: ReleaseCheckState) -> ReleaseCheckState: """ Call job_orchestrator_tool action=start_task with task_id=release_check. Expects response: {"job_id": "...", "status": "queued|running"}. """ run_id = state.get("run_id", "") inp = state.get("input", {}) # Build release_check inputs from graph input task_inputs = { "service_name": inp.get("service_name", "unknown"), "diff": inp.get("diff_text", ""), "fail_fast": inp.get("fail_fast", True), "run_smoke": inp.get("run_smoke", False), "run_drift": inp.get("run_drift", True), "run_deps": inp.get("run_deps", True), "deps_targets": inp.get("deps_targets", ["python", "node"]), "deps_vuln_mode": inp.get("deps_vuln_mode", "offline_cache"), "deps_fail_on": inp.get("deps_fail_on", ["CRITICAL", "HIGH"]), "drift_categories": inp.get("drift_categories", ["services", "openapi", "nats", "tools"]), "risk_profile": inp.get("risk_profile", "default"), } if inp.get("openapi_base"): task_inputs["openapi_base"] = inp["openapi_base"] if inp.get("openapi_head"): task_inputs["openapi_head"] = inp["openapi_head"] overall_timeout = inp.get("timeouts", {}).get("overall_sec", 180) async with GatewayClient() as gw: result = await gw.call_tool( tool="job_orchestrator_tool", action="start_task", params={"task_id": "release_check", "inputs": task_inputs, "timeout_sec": overall_timeout}, agent_id=state.get("agent_id", settings.DEFAULT_AGENT_ID), workspace_id=state.get("workspace_id", settings.DEFAULT_WORKSPACE_ID), user_id=state.get("user_id", ""), graph_run_id=run_id, graph_node="start_job", ) if not result.success: logger.error("release_check: start_job failed run=%s err=%s", run_id, result.error_message) return { **state, "job_id": None, "poll_count": 0, "graph_status": "failed", "error": f"start_task failed: {result.error_message}", } data = result.data or {} job_id = data.get("job_id") or data.get("id") job_status = data.get("status", "running") logger.info("release_check: job started run=%s job_id=%s status=%s", run_id, job_id, job_status) # If job completed synchronously (no async job system), extract result directly if job_status in ("succeeded", "failed") and "result" in data: return { **state, "job_id": job_id, "job_status": job_status, "poll_count": 0, "result": data.get("result"), "graph_status": "succeeded" if job_status == "succeeded" else "failed", "error": data.get("error") if job_status == "failed" else None, } return {**state, "job_id": job_id, "job_status": job_status, "poll_count": 0} async def poll_job_node(state: ReleaseCheckState) -> ReleaseCheckState: """ Poll job_orchestrator_tool action=get_job for completion. Loops back to itself if still running (via conditional edge). """ run_id = state.get("run_id", "") job_id = state.get("job_id") poll_count = state.get("poll_count", 0) + 1 if not job_id: return {**state, "poll_count": poll_count, "job_status": "failed", "error": "No job_id to poll", "graph_status": "failed"} if poll_count > MAX_POLL_ITERATIONS: logger.warning("release_check: polling timeout run=%s job=%s", run_id, job_id) return {**state, "poll_count": poll_count, "job_status": "failed", "error": "Job polling timeout", "graph_status": "failed"} # Brief pause before polling await asyncio.sleep(settings.JOB_POLL_INTERVAL_SEC) async with GatewayClient() as gw: result = await gw.call_tool( tool="job_orchestrator_tool", action="get_job", params={"job_id": job_id}, agent_id=state.get("agent_id", settings.DEFAULT_AGENT_ID), workspace_id=state.get("workspace_id", settings.DEFAULT_WORKSPACE_ID), user_id=state.get("user_id", ""), graph_run_id=run_id, graph_node="poll_job", ) if not result.success: logger.warning("release_check: poll error run=%s err=%s", run_id, result.error_message) return {**state, "poll_count": poll_count} data = result.data or {} job_status = data.get("status", "running") logger.info("release_check: poll run=%s job=%s status=%s count=%d", run_id, job_id, job_status, poll_count) update = {**state, "job_id": job_id, "job_status": job_status, "poll_count": poll_count} if job_status == "succeeded": update["result"] = data.get("result") or data.get("output") update["graph_status"] = "succeeded" elif job_status in ("failed", "cancelled"): update["error"] = data.get("error") or f"Job {job_status}" update["graph_status"] = "failed" return update async def finalize_node(state: ReleaseCheckState) -> ReleaseCheckState: """Ensure result has the expected release_check report structure.""" result = state.get("result") if not result: result = { "pass": False, "gates": [], "recommendations": [state.get("error", "Unknown error")], "summary": state.get("error", "Release check failed"), "elapsed_ms": 0, } return {**state, "result": result} # ─── Conditional routing ────────────────────────────────────────────────────── def _should_continue_polling(state: ReleaseCheckState) -> str: """Route: back to poll_job if still running, else go to finalize.""" job_status = state.get("job_status", "running") graph_status = state.get("graph_status", "") if graph_status in ("succeeded", "failed"): return "finalize" if job_status in ("succeeded", "failed", "cancelled"): return "finalize" return "poll_job" def _after_start(state: ReleaseCheckState) -> str: """Route after start_job: go directly to finalize if already done, else poll.""" if state.get("graph_status") in ("succeeded", "failed"): return "finalize" return "poll_job" # ─── Graph builder ──────────────────────────────────────────────────────────── def build_release_check_graph(): """ Build and compile the release_check LangGraph. Graph: start_job → [if done] finalize → END → [if running] poll_job → [loop] → finalize → END """ graph = StateGraph(ReleaseCheckState) graph.add_node("start_job", start_job_node) graph.add_node("poll_job", poll_job_node) graph.add_node("finalize", finalize_node) graph.set_entry_point("start_job") graph.add_conditional_edges( "start_job", _after_start, {"finalize": "finalize", "poll_job": "poll_job"}, ) graph.add_conditional_edges( "poll_job", _should_continue_polling, {"poll_job": "poll_job", "finalize": "finalize"}, ) graph.add_edge("finalize", END) return graph.compile()