feat(platform): add new services, tools, tests and crews modules

New router intelligence modules (26 files): alert_ingest/store, audit_store,
architecture_pressure, backlog_generator/store, cost_analyzer, data_governance,
dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment,
platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files),
signature_state_store, sofiia_auto_router, tool_governance

New services:
- sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static
- memory-service: integration_endpoints, integrations, voice_endpoints, static UI
- aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents)
- sofiia-supervisor: new supervisor service
- aistalk-bridge-lite: Telegram bridge lite
- calendar-service: CalDAV calendar service with reminders
- mlx-stt-service / mlx-tts-service: Apple Silicon speech services
- binance-bot-monitor: market monitor service
- node-worker: STT/TTS memory providers

New tools (9): agent_email, browser_tool, contract_tool, observability_tool,
oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault

New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus,
farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine,
session_context, style_adapter, telemetry)

Tests: 85+ test files for all new modules
Made-with: Cursor
This commit is contained in:
Apple
2026-03-03 07:14:14 -08:00
parent e9dedffa48
commit 129e4ea1fc
241 changed files with 69349 additions and 0 deletions

View File

@@ -0,0 +1,595 @@
"""
Cost & Resource Analyzer (FinOps MVP)
Reads audit events from AuditStore and computes:
- Aggregated cost_units by tool/agent/workspace/status
- Top spenders (tools, agents, users)
- Anomalies (cost spikes, error rate spikes)
- Cost model weights
"cost_units" = cost_per_call(tool) + duration_ms * cost_per_ms(tool)
These are relative units, not real dollars.
No payload access — all inputs are aggregation parameters only.
"""
from __future__ import annotations
import datetime
import logging
import os
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
# ─── Config loader ────────────────────────────────────────────────────────────
_weights_cache: Optional[Dict] = None
_WEIGHTS_PATH = os.path.join(
os.getenv("REPO_ROOT", str(Path(__file__).parent.parent.parent)),
"config", "cost_weights.yml",
)
def _load_weights() -> Dict:
global _weights_cache
if _weights_cache is not None:
return _weights_cache
try:
import yaml
with open(_WEIGHTS_PATH, "r") as f:
_weights_cache = yaml.safe_load(f) or {}
except Exception as e:
logger.warning("cost_weights.yml not loaded: %s", e)
_weights_cache = {}
return _weights_cache
def reload_cost_weights() -> None:
"""Force reload weights (for tests)."""
global _weights_cache
_weights_cache = None
def get_weights_for_tool(tool: str) -> Tuple[float, float]:
"""Return (cost_per_call, cost_per_ms) for a tool."""
cfg = _load_weights()
defaults = cfg.get("defaults", {})
tool_cfg = (cfg.get("tools") or {}).get(tool, {})
cpc = float(tool_cfg.get("cost_per_call", defaults.get("cost_per_call", 1.0)))
cpm = float(tool_cfg.get("cost_per_ms", defaults.get("cost_per_ms", 0.001)))
return cpc, cpm
def compute_event_cost(event: Dict) -> float:
"""Compute cost_units for a single audit event."""
tool = event.get("tool", "")
duration_ms = float(event.get("duration_ms", 0))
cpc, cpm = get_weights_for_tool(tool)
return round(cpc + duration_ms * cpm, 4)
# ─── Time helpers ─────────────────────────────────────────────────────────────
def _now_utc() -> datetime.datetime:
return datetime.datetime.now(datetime.timezone.utc)
def _iso(dt: datetime.datetime) -> str:
return dt.isoformat()
def _parse_iso(s: str) -> datetime.datetime:
s = s.replace("Z", "+00:00")
try:
return datetime.datetime.fromisoformat(s)
except Exception:
return _now_utc()
def _bucket_hour(ts: str) -> str:
"""Truncate ISO ts to hour: '2026-02-23T10:00:00+00:00'."""
return ts[:13] + ":00"
# ─── Aggregation helpers ──────────────────────────────────────────────────────
def _aggregate(
events: List[Dict],
group_keys: List[str],
) -> Dict[str, Dict]:
"""
Aggregate events by composite key (e.g. ["tool"] or ["agent_id", "tool"]).
Returns {key_str: {count, cost_units, duration_sum, failed_count, ...}}.
"""
result: Dict[str, Dict] = defaultdict(lambda: {
"count": 0,
"cost_units": 0.0,
"duration_ms_sum": 0.0,
"failed_count": 0,
"denied_count": 0,
"in_size_sum": 0,
"out_size_sum": 0,
})
for ev in events:
parts = [str(ev.get(k, "unknown")) for k in group_keys]
key = ":".join(parts)
cost = compute_event_cost(ev)
status = ev.get("status", "pass")
r = result[key]
r["count"] += 1
r["cost_units"] = round(r["cost_units"] + cost, 4)
r["duration_ms_sum"] = round(r["duration_ms_sum"] + float(ev.get("duration_ms", 0)), 2)
r["in_size_sum"] += int(ev.get("in_size", 0))
r["out_size_sum"] += int(ev.get("out_size", 0))
if status in ("failed", "error"):
r["failed_count"] += 1
elif status == "denied":
r["denied_count"] += 1
# Enrich with averages
for key, r in result.items():
n = r["count"] or 1
r["avg_duration_ms"] = round(r["duration_ms_sum"] / n, 1)
r["avg_cost_units"] = round(r["cost_units"] / n, 4)
r["error_rate"] = round(r["failed_count"] / (r["count"] or 1), 4)
return dict(result)
def _top_n(aggregated: Dict[str, Dict], key_field: str, n: int, sort_by: str = "cost_units") -> List[Dict]:
"""Sort aggregated dict by sort_by and return top N."""
items = [
{"key": k, key_field: k, **v}
for k, v in aggregated.items()
]
items.sort(key=lambda x: x.get(sort_by, 0), reverse=True)
return items[:n]
# ─── Actions ──────────────────────────────────────────────────────────────────
def action_report(
store,
time_range: Optional[Dict[str, str]] = None,
group_by: Optional[List[str]] = None,
top_n: int = 10,
include_failed: bool = True,
include_hourly: bool = False,
) -> Dict[str, Any]:
"""
Generate aggregated cost report for a time range.
Returns:
totals, breakdowns by group_by keys, top spenders, optional hourly trend.
"""
now = _now_utc()
tr = time_range or {}
from_ts = tr.get("from") or _iso(now - datetime.timedelta(days=7))
to_ts = tr.get("to") or _iso(now)
events = store.read(from_ts=from_ts, to_ts=to_ts, limit=200_000)
if not include_failed:
events = [e for e in events if e.get("status", "pass") not in ("failed", "error")]
# Totals
total_cost = sum(compute_event_cost(e) for e in events)
total_calls = len(events)
total_failed = sum(1 for e in events if e.get("status") in ("failed", "error"))
total_denied = sum(1 for e in events if e.get("status") == "denied")
# Breakdowns
by_key = group_by or ["tool"]
breakdowns: Dict[str, List[Dict]] = {}
for gk in by_key:
agg = _aggregate(events, [gk])
breakdowns[gk] = _top_n(agg, gk, top_n)
# Hourly trend (optional, for last 7d max)
hourly: List[Dict] = []
if include_hourly and events:
hourly_agg: Dict[str, Dict] = defaultdict(lambda: {"count": 0, "cost_units": 0.0})
for ev in events:
bucket = _bucket_hour(ev.get("ts", ""))
hourly_agg[bucket]["count"] += 1
hourly_agg[bucket]["cost_units"] = round(
hourly_agg[bucket]["cost_units"] + compute_event_cost(ev), 4
)
hourly = [{"hour": k, **v} for k, v in sorted(hourly_agg.items())]
return {
"time_range": {"from": from_ts, "to": to_ts},
"totals": {
"calls": total_calls,
"cost_units": round(total_cost, 2),
"failed": total_failed,
"denied": total_denied,
"error_rate": round(total_failed / (total_calls or 1), 4),
},
"breakdowns": breakdowns,
**({"hourly": hourly} if include_hourly else {}),
}
def action_top(
store,
window_hours: int = 24,
top_n: int = 10,
) -> Dict[str, Any]:
"""
Quick top-N report for tools, agents, and users over window_hours.
"""
now = _now_utc()
from_ts = _iso(now - datetime.timedelta(hours=window_hours))
to_ts = _iso(now)
events = store.read(from_ts=from_ts, to_ts=to_ts, limit=100_000)
top_tools = _top_n(_aggregate(events, ["tool"]), "tool", top_n)
top_agents = _top_n(_aggregate(events, ["agent_id"]), "agent_id", top_n)
top_users = _top_n(_aggregate(events, ["user_id"]), "user_id", top_n)
top_workspaces = _top_n(_aggregate(events, ["workspace_id"]), "workspace_id", top_n)
return {
"window_hours": window_hours,
"time_range": {"from": from_ts, "to": to_ts},
"total_calls": len(events),
"top_tools": top_tools,
"top_agents": top_agents,
"top_users": top_users,
"top_workspaces": top_workspaces,
}
def action_anomalies(
store,
window_minutes: int = 60,
baseline_hours: int = 24,
ratio_threshold: Optional[float] = None,
min_calls: Optional[int] = None,
tools_filter: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""
Detect cost/call spikes and elevated error rates.
Algorithm:
1. Compute per-tool metrics for window [now-window_minutes, now]
2. Compute per-tool metrics for baseline [now-baseline_hours, now-window_minutes]
3. Spike = window_rate / baseline_rate >= ratio_threshold AND calls >= min_calls
4. Error spike = failed_rate > 10% AND calls >= min_calls
"""
cfg = _load_weights()
anomaly_cfg = cfg.get("anomaly", {})
if ratio_threshold is None:
ratio_threshold = float(anomaly_cfg.get("spike_ratio_threshold", 3.0))
if min_calls is None:
min_calls = int(anomaly_cfg.get("min_calls_threshold", 10))
now = _now_utc()
window_from = _iso(now - datetime.timedelta(minutes=window_minutes))
baseline_from = _iso(now - datetime.timedelta(hours=baseline_hours))
baseline_to = window_from # non-overlapping
# Fetch both windows
window_events = store.read(from_ts=window_from, to_ts=_iso(now), limit=50_000)
baseline_events = store.read(from_ts=baseline_from, to_ts=baseline_to, limit=200_000)
if tools_filter:
window_events = [e for e in window_events if e.get("tool") in tools_filter]
baseline_events = [e for e in baseline_events if e.get("tool") in tools_filter]
# Aggregate by tool
window_by_tool = _aggregate(window_events, ["tool"])
baseline_by_tool = _aggregate(baseline_events, ["tool"])
# Normalise baseline to per-minute rate
baseline_minutes = (baseline_hours * 60) - window_minutes
baseline_minutes = max(baseline_minutes, 1)
window_minutes_actual = float(window_minutes)
anomalies = []
all_tools = set(window_by_tool.keys()) | set(baseline_by_tool.keys())
for tool_key in sorted(all_tools):
w = window_by_tool.get(tool_key, {})
b = baseline_by_tool.get(tool_key, {})
w_calls = w.get("count", 0)
b_calls = b.get("count", 0)
if w_calls < min_calls:
continue # Not enough traffic for meaningful anomaly
# Per-minute rates
w_rate = w_calls / window_minutes_actual
b_rate = b_calls / baseline_minutes if b_calls > 0 else 0.0
# Cost spike
w_cost_pm = w.get("cost_units", 0) / window_minutes_actual
b_cost_pm = b.get("cost_units", 0) / baseline_minutes if b_calls > 0 else 0.0
call_ratio = (w_rate / b_rate) if b_rate > 0 else float("inf")
cost_ratio = (w_cost_pm / b_cost_pm) if b_cost_pm > 0 else float("inf")
if call_ratio >= ratio_threshold or cost_ratio >= ratio_threshold:
ratio_display = round(max(call_ratio, cost_ratio), 2)
if ratio_display == float("inf"):
ratio_display = "∞ (no baseline)"
w_cost = w.get("cost_units", 0)
b_cost = b.get("cost_units", 0)
anomalies.append({
"type": "cost_spike",
"key": f"tool:{tool_key}",
"tool": tool_key,
"window": f"last_{window_minutes}m",
"baseline": f"prev_{baseline_hours}h",
"window_calls": w_calls,
"baseline_calls": b_calls,
"window_cost_units": round(w_cost, 2),
"baseline_cost_units": round(b_cost, 2),
"ratio": ratio_display,
"recommendation": _spike_recommendation(tool_key, ratio_display, w_calls),
})
# Error rate spike
w_err_rate = w.get("error_rate", 0)
if w_err_rate > 0.10 and w_calls >= min_calls:
anomalies.append({
"type": "error_spike",
"key": f"tool:{tool_key}",
"tool": tool_key,
"window": f"last_{window_minutes}m",
"failed_calls": w.get("failed_count", 0),
"total_calls": w_calls,
"error_rate": round(w_err_rate, 4),
"recommendation": f"Investigate failures for '{tool_key}': {w.get('failed_count',0)} failed / {w_calls} calls ({round(w_err_rate*100,1)}% error rate).",
})
# De-duplicate tool+type combos (error_spike already separate)
seen = set()
unique_anomalies = []
for a in anomalies:
key = (a["type"], a.get("tool", ""))
if key not in seen:
unique_anomalies.append(a)
seen.add(key)
return {
"anomalies": unique_anomalies,
"anomaly_count": len(unique_anomalies),
"window_minutes": window_minutes,
"baseline_hours": baseline_hours,
"ratio_threshold": ratio_threshold,
"min_calls": min_calls,
"stats": {
"window_calls": len(window_events),
"baseline_calls": len(baseline_events),
},
}
def action_weights(repo_root: Optional[str] = None) -> Dict[str, Any]:
"""Return current cost weights configuration."""
global _weights_cache
_weights_cache = None # Force reload
cfg = _load_weights()
return {
"defaults": cfg.get("defaults", {}),
"tools": cfg.get("tools", {}),
"anomaly": cfg.get("anomaly", {}),
"config_path": _WEIGHTS_PATH,
}
# ─── Recommendation templates ─────────────────────────────────────────────────
def _spike_recommendation(tool: str, ratio: Any, calls: int) -> str:
cfg = _load_weights()
tool_cfg = (cfg.get("tools") or {}).get(tool, {})
category = tool_cfg.get("category", "")
if category == "media":
return (
f"'{tool}' cost spike (ratio={ratio}, {calls} calls). "
"Consider: rate-limit per workspace, queue with priority, review calling agents."
)
if category == "release":
return (
f"'{tool}' called more frequently than baseline (ratio={ratio}). "
"Review if release_check is looping or being triggered too often."
)
if category == "web":
return (
f"'{tool}' spike (ratio={ratio}). Consider: result caching, dedup identical queries."
)
return (
f"'{tool}' cost spike (ratio={ratio}, {calls} calls in window). "
"Review caller agents and apply rate limits if needed."
)
# ─── backend=auto store resolver ─────────────────────────────────────────────
def _resolve_store(backend: str = "auto"):
"""
Return an AuditStore based on backend param.
backend='auto' (default): uses the globally configured store (which may be
AutoAuditStore, Postgres, or JSONL).
backend='jsonl': forces JsonlAuditStore (7-day window max recommended).
backend='memory': MemoryAuditStore (testing).
"""
from audit_store import get_audit_store, JsonlAuditStore, MemoryAuditStore
if backend in ("auto", None, ""):
return get_audit_store()
if backend == "jsonl":
import os
from pathlib import Path
audit_dir = os.getenv(
"AUDIT_JSONL_DIR",
str(Path(os.getenv("REPO_ROOT", ".")) / "ops" / "audit"),
)
return JsonlAuditStore(audit_dir)
if backend == "memory":
return MemoryAuditStore()
return get_audit_store()
# ─── Digest action ────────────────────────────────────────────────────────────
def action_digest(
store,
window_hours: int = 24,
baseline_hours: int = 168, # 7 days
top_n: int = 10,
max_markdown_chars: int = 3800,
) -> Dict:
"""
Daily/weekly cost digest: top tools/agents + anomalies + recommendations.
Returns both structured JSON and a Telegram/markdown-friendly `markdown` field.
"""
now = _now_utc()
window_from = _iso(now - datetime.timedelta(hours=window_hours))
window_to = _iso(now)
baseline_from = _iso(now - datetime.timedelta(hours=baseline_hours))
# ── Top ──────────────────────────────────────────────────────────────────
top_data = action_top(store, window_hours=window_hours, top_n=top_n)
top_tools = top_data.get("top_tools") or []
top_agents = top_data.get("top_agents") or []
total_calls = top_data.get("total_calls", 0)
# ── Anomalies ─────────────────────────────────────────────────────────────
anomaly_data = action_anomalies(
store,
window_minutes=int(window_hours * 60 / 4),
baseline_hours=baseline_hours,
min_calls=5,
)
anomalies = anomaly_data.get("anomalies") or []
# ── Total cost ────────────────────────────────────────────────────────────
events = store.read(from_ts=window_from, to_ts=window_to, limit=200_000)
total_cost = sum(compute_event_cost(e) for e in events)
failed = sum(1 for e in events if e.get("status") in ("failed", "error"))
error_rate = round(failed / max(len(events), 1), 4)
# ── Recommendations ───────────────────────────────────────────────────────
recs = []
for a in anomalies[:5]:
r = a.get("recommendation", "")
if r:
recs.append(r)
if error_rate > 0.05:
recs.append(f"High error rate {round(error_rate*100,1)}% — investigate failing tools.")
if top_tools and top_tools[0].get("cost_units", 0) > 500:
tool_name = top_tools[0].get("tool", "?")
recs.append(f"Top spender '{tool_name}' used {top_tools[0]['cost_units']:.0f} cost units — review frequency.")
recs = list(dict.fromkeys(recs))[:8]
# ── Markdown ─────────────────────────────────────────────────────────────
period_label = f"Last {window_hours}h" if window_hours <= 48 else f"Last {window_hours//24}d"
lines = [
f"📊 **Cost Digest** ({period_label})",
f"Total calls: {total_calls} | Cost units: {total_cost:.0f} | Errors: {round(error_rate*100,1)}%",
"",
"**Top Tools:**",
]
for t in top_tools[:5]:
lines.append(f" • `{t.get('tool','?')}` — {t.get('cost_units',0):.1f}u, {t.get('count',0)} calls")
lines.append("")
lines.append("**Top Agents:**")
for a in top_agents[:3]:
lines.append(f" • `{a.get('agent_id','?')}` — {a.get('cost_units',0):.1f}u, {a.get('count',0)} calls")
if anomalies:
lines.append("")
lines.append(f"⚠️ **{len(anomalies)} Anomaly(ies):**")
for anm in anomalies[:3]:
lines.append(f" • [{anm.get('type','?')}] `{anm.get('tool','?')}` ratio={anm.get('ratio','?')}")
if recs:
lines.append("")
lines.append("💡 **Recommendations:**")
for r in recs[:5]:
lines.append(f" {r[:200]}")
markdown = "\n".join(lines)
if len(markdown) > max_markdown_chars:
markdown = markdown[:max_markdown_chars] + "\n…[truncated]"
return {
"period": period_label,
"window_hours": window_hours,
"time_range": {"from": window_from, "to": window_to},
"totals": {
"calls": total_calls,
"cost_units": round(total_cost, 2),
"failed": failed,
"error_rate": error_rate,
},
"top_tools": top_tools[:top_n],
"top_agents": top_agents[:top_n],
"anomalies": anomalies[:10],
"anomaly_count": len(anomalies),
"recommendations": recs,
"markdown": markdown,
}
# ─── Main entrypoint ─────────────────────────────────────────────────────────
def analyze_cost_dict(action: str, params: Optional[Dict] = None, store=None) -> Dict:
"""
Wrapper called by tool_manager handler.
Returns plain dict for ToolResult.
"""
params = params or {}
if store is None:
backend = params.get("backend", "auto")
store = _resolve_store(backend)
if action == "digest":
return action_digest(
store,
window_hours=int(params.get("window_hours", 24)),
baseline_hours=int(params.get("baseline_hours", 168)),
top_n=int(params.get("top_n", 10)),
max_markdown_chars=int(params.get("max_markdown_chars", 3800)),
)
if action == "report":
return action_report(
store,
time_range=params.get("time_range"),
group_by=params.get("group_by", ["tool"]),
top_n=int(params.get("top_n", 10)),
include_failed=bool(params.get("include_failed", True)),
include_hourly=bool(params.get("include_hourly", False)),
)
if action == "top":
return action_top(
store,
window_hours=int(params.get("window_hours", 24)),
top_n=int(params.get("top_n", 10)),
)
if action == "anomalies":
return action_anomalies(
store,
window_minutes=int(params.get("window_minutes", 60)),
baseline_hours=int(params.get("baseline_hours", 24)),
ratio_threshold=params.get("ratio_threshold"),
min_calls=params.get("min_calls"),
tools_filter=params.get("tools_filter"),
)
if action == "weights":
return action_weights()
return {"error": f"Unknown action '{action}'. Valid: digest, report, top, anomalies, weights"}