feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
This commit is contained in:
284
services/sofiia-supervisor/app/main.py
Normal file
284
services/sofiia-supervisor/app/main.py
Normal file
@@ -0,0 +1,284 @@
|
||||
"""
|
||||
Sofiia Supervisor — FastAPI Application
|
||||
|
||||
HTTP API for launching and monitoring LangGraph runs.
|
||||
|
||||
Endpoints:
|
||||
POST /v1/graphs/{graph_name}/runs — start a new run (async)
|
||||
GET /v1/runs/{run_id} — get run status + result
|
||||
POST /v1/runs/{run_id}/cancel — cancel a running run
|
||||
GET /healthz — health check
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime
|
||||
import hashlib
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from fastapi import BackgroundTasks, FastAPI, HTTPException, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from .config import settings
|
||||
from .graphs import GRAPH_REGISTRY
|
||||
from .models import (
|
||||
CancelRunResponse,
|
||||
EventType,
|
||||
GetRunResponse,
|
||||
RunEvent,
|
||||
RunRecord,
|
||||
RunStatus,
|
||||
StartRunRequest,
|
||||
StartRunResponse,
|
||||
)
|
||||
from .state_backend import StateBackend, create_state_backend
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
)
|
||||
|
||||
# ─── App ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
app = FastAPI(
|
||||
title="Sofiia Supervisor",
|
||||
version="1.0.0",
|
||||
description="LangGraph orchestration service for DAARION.city",
|
||||
docs_url="/docs",
|
||||
redoc_url=None,
|
||||
)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_methods=["POST", "GET"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
_state_backend: Optional[StateBackend] = None
|
||||
|
||||
|
||||
def get_state_backend() -> StateBackend:
|
||||
global _state_backend
|
||||
if _state_backend is None:
|
||||
_state_backend = create_state_backend()
|
||||
return _state_backend
|
||||
|
||||
|
||||
# ─── Auth middleware ──────────────────────────────────────────────────────────
|
||||
|
||||
def _check_internal_key(request: Request):
|
||||
key = settings.SUPERVISOR_INTERNAL_KEY
|
||||
if not key:
|
||||
return # no key configured → open (rely on network-level protection)
|
||||
auth = request.headers.get("Authorization", "")
|
||||
provided = auth.removeprefix("Bearer ").strip()
|
||||
if provided != key:
|
||||
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||
|
||||
|
||||
# ─── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def _new_run_id() -> str:
|
||||
return "gr_" + uuid.uuid4().hex[:20]
|
||||
|
||||
|
||||
def _now() -> str:
|
||||
return datetime.datetime.now(datetime.timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _input_hash(inp: Dict) -> str:
|
||||
import json
|
||||
try:
|
||||
return hashlib.sha256(json.dumps(inp, sort_keys=True, ensure_ascii=False).encode()).hexdigest()[:12]
|
||||
except Exception:
|
||||
return "?"
|
||||
|
||||
|
||||
# ─── Graph runner (background task) ──────────────────────────────────────────
|
||||
|
||||
async def _run_graph(run_id: str, graph_name: str, initial_state: Dict[str, Any]):
|
||||
"""
|
||||
Execute the LangGraph in a background asyncio task.
|
||||
Updates run state in the backend as it progresses.
|
||||
Does NOT log payload — only hash + sizes in events.
|
||||
"""
|
||||
backend = get_state_backend()
|
||||
|
||||
# Mark as running
|
||||
run = await backend.get_run(run_id)
|
||||
if not run:
|
||||
logger.error("_run_graph: run %s not found in state backend", run_id)
|
||||
return
|
||||
|
||||
run.status = RunStatus.RUNNING
|
||||
run.started_at = _now()
|
||||
await backend.save_run(run)
|
||||
|
||||
await backend.append_event(run_id, RunEvent(
|
||||
ts=_now(), type=EventType.NODE_START, node="graph_start",
|
||||
details={"input_hash": _input_hash(initial_state.get("input", {}))},
|
||||
))
|
||||
|
||||
try:
|
||||
compiled = GRAPH_REGISTRY[graph_name]()
|
||||
|
||||
# Run graph asynchronously
|
||||
final_state = await compiled.ainvoke(initial_state)
|
||||
|
||||
graph_status = final_state.get("graph_status", "succeeded")
|
||||
result = final_state.get("result")
|
||||
error = final_state.get("error")
|
||||
|
||||
await backend.append_event(run_id, RunEvent(
|
||||
ts=_now(), type=EventType.NODE_END, node="graph_end",
|
||||
details={"graph_status": graph_status},
|
||||
))
|
||||
|
||||
run = await backend.get_run(run_id)
|
||||
if run and run.status != RunStatus.CANCELLED:
|
||||
run.status = RunStatus.SUCCEEDED if graph_status == "succeeded" else RunStatus.FAILED
|
||||
run.finished_at = _now()
|
||||
run.result = result
|
||||
run.error = error
|
||||
await backend.save_run(run)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info("run %s cancelled", run_id)
|
||||
run = await backend.get_run(run_id)
|
||||
if run:
|
||||
run.status = RunStatus.CANCELLED
|
||||
run.finished_at = _now()
|
||||
await backend.save_run(run)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("run %s graph execution error: %s", run_id, str(e)[:200])
|
||||
run = await backend.get_run(run_id)
|
||||
if run and run.status != RunStatus.CANCELLED:
|
||||
run.status = RunStatus.FAILED
|
||||
run.finished_at = _now()
|
||||
run.error = str(e)[:500]
|
||||
await backend.save_run(run)
|
||||
|
||||
await backend.append_event(run_id, RunEvent(
|
||||
ts=_now(), type=EventType.ERROR,
|
||||
details={"error": str(e)[:300]},
|
||||
))
|
||||
|
||||
|
||||
# ─── Endpoints ────────────────────────────────────────────────────────────────
|
||||
|
||||
@app.get("/healthz")
|
||||
async def healthz():
|
||||
return {
|
||||
"status": "ok",
|
||||
"service": "sofiia-supervisor",
|
||||
"graphs": list(GRAPH_REGISTRY.keys()),
|
||||
"state_backend": settings.STATE_BACKEND,
|
||||
"gateway_url": settings.GATEWAY_BASE_URL,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/v1/graphs/{graph_name}/runs", response_model=StartRunResponse)
|
||||
async def start_run(
|
||||
graph_name: str,
|
||||
body: StartRunRequest,
|
||||
request: Request,
|
||||
background_tasks: BackgroundTasks,
|
||||
):
|
||||
"""
|
||||
Start a new graph run asynchronously.
|
||||
|
||||
The run is queued immediately; execution happens in the background.
|
||||
Poll GET /v1/runs/{run_id} for status and result.
|
||||
"""
|
||||
_check_internal_key(request)
|
||||
|
||||
if graph_name not in GRAPH_REGISTRY:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Unknown graph '{graph_name}'. Available: {list(GRAPH_REGISTRY.keys())}",
|
||||
)
|
||||
|
||||
run_id = _new_run_id()
|
||||
now = _now()
|
||||
|
||||
run = RunRecord(
|
||||
run_id=run_id,
|
||||
graph=graph_name,
|
||||
status=RunStatus.QUEUED,
|
||||
agent_id=body.agent_id,
|
||||
workspace_id=body.workspace_id,
|
||||
user_id=body.user_id,
|
||||
started_at=now,
|
||||
)
|
||||
await get_state_backend().save_run(run)
|
||||
|
||||
# Build initial LangGraph state
|
||||
initial_state = {
|
||||
"run_id": run_id,
|
||||
"agent_id": body.agent_id,
|
||||
"workspace_id": body.workspace_id,
|
||||
"user_id": body.user_id,
|
||||
"input": body.input,
|
||||
"graph_status": "running",
|
||||
}
|
||||
|
||||
background_tasks.add_task(_run_graph, run_id, graph_name, initial_state)
|
||||
|
||||
logger.info(
|
||||
"start_run graph=%s run=%s agent=%s input_hash=%s",
|
||||
graph_name, run_id, body.agent_id, _input_hash(body.input),
|
||||
)
|
||||
|
||||
return StartRunResponse(run_id=run_id, status=RunStatus.QUEUED)
|
||||
|
||||
|
||||
@app.get("/v1/runs/{run_id}", response_model=GetRunResponse)
|
||||
async def get_run(run_id: str, request: Request):
|
||||
"""Get run status, result, and event log."""
|
||||
_check_internal_key(request)
|
||||
|
||||
run = await get_state_backend().get_run(run_id)
|
||||
if not run:
|
||||
raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
|
||||
|
||||
return GetRunResponse(
|
||||
run_id=run.run_id,
|
||||
graph=run.graph,
|
||||
status=run.status,
|
||||
started_at=run.started_at,
|
||||
finished_at=run.finished_at,
|
||||
result=run.result,
|
||||
events=run.events,
|
||||
)
|
||||
|
||||
|
||||
@app.post("/v1/runs/{run_id}/cancel", response_model=CancelRunResponse)
|
||||
async def cancel_run(run_id: str, request: Request):
|
||||
"""Request cancellation of a running/queued run."""
|
||||
_check_internal_key(request)
|
||||
|
||||
backend = get_state_backend()
|
||||
run = await backend.get_run(run_id)
|
||||
if not run:
|
||||
raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
|
||||
|
||||
cancelled = await backend.cancel_run(run_id)
|
||||
if not cancelled:
|
||||
return CancelRunResponse(
|
||||
run_id=run_id,
|
||||
status=run.status,
|
||||
message=f"Run is already {run.status.value}, cannot cancel",
|
||||
)
|
||||
|
||||
logger.info("cancel_run run=%s requested", run_id)
|
||||
return CancelRunResponse(
|
||||
run_id=run_id,
|
||||
status=RunStatus.CANCELLED,
|
||||
message="Cancellation requested. In-flight tool calls may still complete.",
|
||||
)
|
||||
Reference in New Issue
Block a user