Files
microdao-daarion/services/aurora-service/app/job_store.py
Apple 129e4ea1fc feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store,
architecture_pressure, backlog_generator/store, cost_analyzer, data_governance,
dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment,
platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files),
signature_state_store, sofiia_auto_router, tool_governance

New services:
- sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static
- memory-service: integration_endpoints, integrations, voice_endpoints, static UI
- aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents)
- sofiia-supervisor: new supervisor service
- aistalk-bridge-lite: Telegram bridge lite
- calendar-service: CalDAV calendar service with reminders
- mlx-stt-service / mlx-tts-service: Apple Silicon speech services
- binance-bot-monitor: market monitor service
- node-worker: STT/TTS memory providers

New tools (9): agent_email, browser_tool, contract_tool, observability_tool,
oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault

New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus,
farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine,
session_context, style_adapter, telemetry)

Tests: 85+ test files for all new modules
Made-with: Cursor
2026-03-03 07:14:14 -08:00

255 lines
8.4 KiB
Python

from __future__ import annotations
import json
import logging
import shutil
import threading
from pathlib import Path
from typing import Any, Dict, List, Optional
from .schemas import AuroraJob, AuroraResult, AuroraMode, JobStatus, MediaType, ProcessingStep
logger = logging.getLogger(__name__)
def _model_dump(model: Any) -> Dict[str, Any]:
if hasattr(model, "model_dump"):
return model.model_dump()
return model.dict()
class JobStore:
def __init__(self, data_dir: Path) -> None:
self.data_dir = data_dir
self.jobs_dir = data_dir / "jobs"
self.uploads_dir = data_dir / "uploads"
self.outputs_dir = data_dir / "outputs"
self.jobs_dir.mkdir(parents=True, exist_ok=True)
self.uploads_dir.mkdir(parents=True, exist_ok=True)
self.outputs_dir.mkdir(parents=True, exist_ok=True)
self._lock = threading.RLock()
self._jobs: Dict[str, AuroraJob] = {}
self._load_existing_jobs()
def _job_path(self, job_id: str) -> Path:
return self.jobs_dir / f"{job_id}.json"
def _save_job(self, job: AuroraJob) -> None:
self._job_path(job.job_id).write_text(
json.dumps(_model_dump(job), ensure_ascii=False, indent=2),
encoding="utf-8",
)
def _load_existing_jobs(self) -> None:
for path in sorted(self.jobs_dir.glob("*.json")):
try:
payload = json.loads(path.read_text(encoding="utf-8"))
job = AuroraJob(**payload)
self._jobs[job.job_id] = job
except Exception as exc:
logger.warning("Skipping unreadable job file %s: %s", path, exc)
def create_job(
self,
*,
job_id: str,
file_name: str,
input_path: Path,
input_hash: str,
mode: AuroraMode,
media_type: MediaType,
created_at: str,
metadata: Optional[Dict[str, Any]] = None,
) -> AuroraJob:
job = AuroraJob(
job_id=job_id,
file_name=file_name,
mode=mode,
media_type=media_type,
input_path=str(input_path),
input_hash=input_hash,
created_at=created_at,
metadata=metadata or {},
)
with self._lock:
self._jobs[job_id] = job
self._save_job(job)
return job
def get_job(self, job_id: str) -> Optional[AuroraJob]:
with self._lock:
return self._jobs.get(job_id)
def list_jobs(self) -> List[AuroraJob]:
with self._lock:
return list(self._jobs.values())
def patch_job(self, job_id: str, **changes: Any) -> AuroraJob:
with self._lock:
current = self._jobs.get(job_id)
if not current:
raise KeyError(job_id)
payload = _model_dump(current)
payload.update(changes)
payload["job_id"] = job_id
updated = AuroraJob(**payload)
self._jobs[job_id] = updated
self._save_job(updated)
return updated
def append_processing_step(self, job_id: str, step: ProcessingStep) -> AuroraJob:
job = self.get_job(job_id)
if not job:
raise KeyError(job_id)
steps = list(job.processing_log)
steps.append(step)
return self.patch_job(job_id, processing_log=steps)
def set_progress(self, job_id: str, *, progress: int, current_stage: str) -> AuroraJob:
bounded = max(0, min(100, int(progress)))
return self.patch_job(job_id, progress=bounded, current_stage=current_stage)
def mark_processing(self, job_id: str, *, started_at: str) -> AuroraJob:
return self.patch_job(
job_id,
status="processing",
progress=1,
current_stage="dispatching",
started_at=started_at,
error_message=None,
)
def mark_completed(self, job_id: str, *, result: AuroraResult, completed_at: str) -> AuroraJob:
return self.patch_job(
job_id,
status="completed",
progress=100,
current_stage="completed",
result=result,
completed_at=completed_at,
error_message=None,
)
def mark_failed(self, job_id: str, *, message: str, completed_at: str) -> AuroraJob:
return self.patch_job(
job_id,
status="failed",
current_stage="failed",
error_message=message,
completed_at=completed_at,
)
def request_cancel(self, job_id: str) -> AuroraJob:
job = self.get_job(job_id)
if not job:
raise KeyError(job_id)
if job.status in ("completed", "failed", "cancelled"):
return job
if job.status == "queued":
return self.patch_job(
job_id,
status="cancelled",
current_stage="cancelled",
cancel_requested=True,
progress=0,
)
return self.patch_job(
job_id,
cancel_requested=True,
current_stage="cancelling",
)
def delete_job(self, job_id: str, *, remove_artifacts: bool = True) -> bool:
with self._lock:
current = self._jobs.pop(job_id, None)
if not current:
return False
self._job_path(job_id).unlink(missing_ok=True)
if remove_artifacts:
shutil.rmtree(self.uploads_dir / job_id, ignore_errors=True)
shutil.rmtree(self.outputs_dir / job_id, ignore_errors=True)
return True
def mark_cancelled(self, job_id: str, *, completed_at: str, message: str = "Cancelled by user") -> AuroraJob:
return self.patch_job(
job_id,
status="cancelled",
current_stage="cancelled",
cancel_requested=True,
error_message=message,
completed_at=completed_at,
)
def count_by_status(self) -> Dict[JobStatus, int]:
counts: Dict[JobStatus, int] = {
"queued": 0,
"processing": 0,
"completed": 0,
"failed": 0,
"cancelled": 0,
}
with self._lock:
for job in self._jobs.values():
counts[job.status] += 1
return counts
def recover_interrupted_jobs(
self,
*,
completed_at: str,
message: str,
strategy: str = "failed",
) -> int:
"""Recover queued/processing jobs after service restart.
strategy:
- "failed": mark as failed
- "requeue": move back to queue for auto-retry on startup
"""
mode = (strategy or "failed").strip().lower()
recovered = 0
with self._lock:
for job_id, current in list(self._jobs.items()):
if current.status not in ("queued", "processing"):
continue
payload = _model_dump(current)
meta = payload.get("metadata") or {}
if not isinstance(meta, dict):
meta = {}
meta["recovery_count"] = int(meta.get("recovery_count", 0)) + 1
meta["last_recovery_at"] = completed_at
meta["last_recovery_reason"] = message
payload["metadata"] = meta
if mode == "requeue":
payload.update(
{
"status": "queued",
"current_stage": "queued (recovered after restart)",
"error_message": None,
"started_at": None,
"completed_at": None,
"cancel_requested": False,
"progress": 0,
}
)
else:
payload.update(
{
"status": "failed",
"current_stage": "failed",
"error_message": message,
"completed_at": completed_at,
"progress": max(1, int(payload.get("progress", 0))),
}
)
payload["job_id"] = job_id
updated = AuroraJob(**payload)
self._jobs[job_id] = updated
self._save_job(updated)
recovered += 1
return recovered