Files
microdao-daarion/services/router/provider_budget.py
Apple 129e4ea1fc feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store,
architecture_pressure, backlog_generator/store, cost_analyzer, data_governance,
dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment,
platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files),
signature_state_store, sofiia_auto_router, tool_governance

New services:
- sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static
- memory-service: integration_endpoints, integrations, voice_endpoints, static UI
- aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents)
- sofiia-supervisor: new supervisor service
- aistalk-bridge-lite: Telegram bridge lite
- calendar-service: CalDAV calendar service with reminders
- mlx-stt-service / mlx-tts-service: Apple Silicon speech services
- binance-bot-monitor: market monitor service
- node-worker: STT/TTS memory providers

New tools (9): agent_email, browser_tool, contract_tool, observability_tool,
oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault

New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus,
farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine,
session_context, style_adapter, telemetry)

Tests: 85+ test files for all new modules
Made-with: Cursor
2026-03-03 07:14:14 -08:00

420 lines
14 KiB
Python

"""Provider Budget Tracker — real-money token usage accounting.
Tracks:
- Tokens used (input/output) per provider per model
- Estimated USD cost based on published pricing
- Approximate balance (if configured via env var)
- Rolling 24h / 7d / 30d windows
Pricing table: updated Feb 2026 (USD per 1M tokens)
"""
from __future__ import annotations
import json
import logging
import os
import threading
import time
from collections import defaultdict
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
# ── Pricing catalog (USD / 1M tokens) ─────────────────────────────────────────
PRICING: Dict[str, Dict[str, float]] = {
# provider → model_pattern → {input, output}
"anthropic": {
"claude-sonnet-4-5": {"input": 3.0, "output": 15.0},
"claude-opus-4-5": {"input": 15.0, "output": 75.0},
"claude-haiku-3-5": {"input": 0.8, "output": 4.0},
"claude-3-5-sonnet": {"input": 3.0, "output": 15.0},
"_default": {"input": 3.0, "output": 15.0},
},
"grok": {
"grok-4-1-fast-reasoning": {"input": 5.0, "output": 15.0},
"grok-3": {"input": 5.0, "output": 25.0},
"grok-2-1212": {"input": 2.0, "output": 10.0},
"_default": {"input": 5.0, "output": 15.0},
},
"deepseek": {
"deepseek-chat": {"input": 0.27, "output": 1.10},
"deepseek-reasoner": {"input": 0.55, "output": 2.19},
"_default": {"input": 0.27, "output": 1.10},
},
"mistral": {
"mistral-large-latest": {"input": 2.0, "output": 6.0},
"mistral-small-latest": {"input": 0.2, "output": 0.6},
"_default": {"input": 2.0, "output": 6.0},
},
"openai": {
"gpt-4o": {"input": 2.5, "output": 10.0},
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
"gpt-4-turbo": {"input": 10.0, "output": 30.0},
"_default": {"input": 2.5, "output": 10.0},
},
"glm": {
"glm-4-plus": {"input": 0.05, "output": 0.05},
"glm-4-flash": {"input": 0.0, "output": 0.0}, # free tier
"glm-4.7-flash": {"input": 0.0, "output": 0.0},
"glm-z1-plus": {"input": 0.07, "output": 0.07},
"_default": {"input": 0.05, "output": 0.05},
},
"ollama": {
"_default": {"input": 0.0, "output": 0.0},
},
}
def get_price(provider: str, model: str) -> Dict[str, float]:
p = PRICING.get(provider.lower(), PRICING.get("anthropic"))
# exact match
if model in p:
return p[model]
# prefix match
for k, v in p.items():
if k != "_default" and model.startswith(k):
return v
return p.get("_default", {"input": 3.0, "output": 15.0})
def calc_cost_usd(provider: str, model: str, input_tokens: int, output_tokens: int) -> float:
price = get_price(provider, model)
return (input_tokens * price["input"] + output_tokens * price["output"]) / 1_000_000
# ── Usage record ──────────────────────────────────────────────────────────────
@dataclass
class UsageRecord:
ts: float
provider: str
model: str
agent: str
input_tokens: int
output_tokens: int
cost_usd: float
latency_ms: int = 0
task_type: str = ""
fallback_used: bool = False
# ── Storage ────────────────────────────────────────────────────────────────────
_BUDGET_DIR = Path(os.getenv("BUDGET_DATA_DIR", os.path.expanduser("~/.sofiia/budget")))
_USAGE_FILE = _BUDGET_DIR / "usage.jsonl"
_LIMITS_FILE = _BUDGET_DIR / "limits.json"
_lock = threading.Lock()
def _ensure_dir() -> None:
_BUDGET_DIR.mkdir(parents=True, exist_ok=True)
def _append_usage(rec: UsageRecord) -> None:
_ensure_dir()
with _lock:
with open(_USAGE_FILE, "a", encoding="utf-8") as f:
f.write(json.dumps(asdict(rec)) + "\n")
def _load_usage(since_ts: float = 0.0) -> List[UsageRecord]:
if not _USAGE_FILE.exists():
return []
records: List[UsageRecord] = []
with _lock:
try:
with open(_USAGE_FILE, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
try:
d = json.loads(line)
if d.get("ts", 0) >= since_ts:
records.append(UsageRecord(**d))
except Exception:
pass
except Exception as e:
logger.warning("budget: failed to load usage: %s", e)
return records
# ── Manual balance config ──────────────────────────────────────────────────────
def _load_limits() -> Dict[str, Any]:
if not _LIMITS_FILE.exists():
return {}
try:
with open(_LIMITS_FILE, "r") as f:
return json.load(f)
except Exception:
return {}
def _save_limits(data: Dict[str, Any]) -> None:
_ensure_dir()
with _lock:
with open(_LIMITS_FILE, "w") as f:
json.dump(data, f, indent=2)
# ── Public API ─────────────────────────────────────────────────────────────────
def track_usage(
provider: str,
model: str,
agent: str,
input_tokens: int,
output_tokens: int,
latency_ms: int = 0,
task_type: str = "",
fallback_used: bool = False,
) -> float:
"""Record token usage and return cost in USD."""
cost = calc_cost_usd(provider, model, input_tokens, output_tokens)
rec = UsageRecord(
ts=time.time(),
provider=provider,
model=model,
agent=agent,
input_tokens=input_tokens,
output_tokens=output_tokens,
cost_usd=cost,
latency_ms=latency_ms,
task_type=task_type,
fallback_used=fallback_used,
)
_append_usage(rec)
logger.debug(
"💰 tracked: provider=%s model=%s tokens=%d+%d cost=$%.5f",
provider, model, input_tokens, output_tokens, cost,
)
return cost
@dataclass
class ProviderStats:
provider: str
total_input_tokens: int = 0
total_output_tokens: int = 0
total_cost_usd: float = 0.0
call_count: int = 0
avg_latency_ms: float = 0.0
top_models: List[Dict[str, Any]] = field(default_factory=list)
# Configured limits (from limits.json)
monthly_limit_usd: Optional[float] = None
topup_balance_usd: Optional[float] = None
estimated_remaining_usd: Optional[float] = None
def get_stats(window_hours: int = 720) -> Dict[str, ProviderStats]:
"""
Aggregate usage stats per provider for the given time window.
Default window = 720h = 30 days.
"""
since_ts = time.time() - window_hours * 3600
records = _load_usage(since_ts)
by_provider = _aggregate_records(records)
limits = _load_limits()
for p, s in by_provider.items():
lim = limits.get(p, {})
if "monthly_limit_usd" in lim:
s.monthly_limit_usd = lim["monthly_limit_usd"]
if "topup_balance_usd" in lim:
s.topup_balance_usd = lim["topup_balance_usd"]
s.estimated_remaining_usd = round(lim["topup_balance_usd"] - s.total_cost_usd, 4)
return by_provider
def get_dashboard_data() -> Dict[str, Any]:
"""
Returns structured data for the budget dashboard UI.
Includes 24h, 7d, 30d windows.
Single file read + in-memory filtering for all three windows.
"""
now = time.time()
ts_30d = now - 720 * 3600
ts_7d = now - 168 * 3600
ts_24h = now - 24 * 3600
all_records = _load_usage(since_ts=ts_30d)
records_7d = [r for r in all_records if r.ts >= ts_7d]
records_24h = [r for r in records_7d if r.ts >= ts_24h]
stats_30d = _aggregate_records(all_records)
stats_7d = _aggregate_records(records_7d)
stats_24h = _aggregate_records(records_24h)
limits = _load_limits()
# Apply limits to 30d stats
for p, s in stats_30d.items():
lim = limits.get(p, {})
if "monthly_limit_usd" in lim:
s.monthly_limit_usd = lim["monthly_limit_usd"]
if "topup_balance_usd" in lim:
s.topup_balance_usd = lim["topup_balance_usd"]
s.estimated_remaining_usd = round(lim["topup_balance_usd"] - s.total_cost_usd, 4)
all_providers = sorted({
*(k for k in PRICING if k != "ollama"),
*stats_30d.keys(),
})
providers_data = []
for p in all_providers:
s30 = stats_30d.get(p, ProviderStats(provider=p))
s7 = stats_7d.get(p, ProviderStats(provider=p))
s24 = stats_24h.get(p, ProviderStats(provider=p))
plim = limits.get(p, {})
providers_data.append({
"provider": p,
"display_name": _provider_display_name(p),
"icon": _provider_icon(p),
"available": bool(os.getenv(_provider_env_key(p), "").strip()),
"cost_24h": round(s24.total_cost_usd, 5),
"cost_7d": round(s7.total_cost_usd, 5),
"cost_30d": round(s30.total_cost_usd, 5),
"calls_24h": s24.call_count,
"calls_30d": s30.call_count,
"tokens_24h": s24.total_input_tokens + s24.total_output_tokens,
"tokens_30d": s30.total_input_tokens + s30.total_output_tokens,
"avg_latency_ms": round(s30.avg_latency_ms),
"monthly_limit_usd": s30.monthly_limit_usd,
"topup_balance_usd": plim.get("topup_balance_usd"),
"estimated_remaining_usd": s30.estimated_remaining_usd,
"top_models": s30.top_models,
})
total_24h = sum(s.total_cost_usd for s in stats_24h.values())
total_7d = sum(s.total_cost_usd for s in stats_7d.values())
total_30d = sum(s.total_cost_usd for s in stats_30d.values())
return {
"providers": providers_data,
"summary": {
"total_cost_24h": round(total_24h, 5),
"total_cost_7d": round(total_7d, 5),
"total_cost_30d": round(total_30d, 5),
"total_calls_30d": sum(s.call_count for s in stats_30d.values()),
},
"generated_at": now,
}
def _aggregate_records(records: List[UsageRecord]) -> Dict[str, ProviderStats]:
"""Aggregate a list of records into per-provider stats."""
by_provider: Dict[str, ProviderStats] = {}
model_usage: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(
lambda: defaultdict(lambda: {"calls": 0, "cost": 0.0, "tokens": 0})
)
for rec in records:
p = rec.provider
if p not in by_provider:
by_provider[p] = ProviderStats(provider=p)
s = by_provider[p]
s.total_input_tokens += rec.input_tokens
s.total_output_tokens += rec.output_tokens
s.total_cost_usd += rec.cost_usd
s.call_count += 1
if rec.latency_ms:
s.avg_latency_ms = (
(s.avg_latency_ms * (s.call_count - 1) + rec.latency_ms) / s.call_count
)
model_usage[p][rec.model]["calls"] += 1
model_usage[p][rec.model]["cost"] += rec.cost_usd
model_usage[p][rec.model]["tokens"] += rec.input_tokens + rec.output_tokens
for p, s in by_provider.items():
top = sorted(model_usage[p].items(), key=lambda x: x[1]["cost"], reverse=True)[:3]
s.top_models = [{"model": k, **v} for k, v in top]
return by_provider
def rotate_usage_log(max_age_days: int = 90) -> int:
"""Remove records older than max_age_days. Returns count of removed lines."""
if not _USAGE_FILE.exists():
return 0
cutoff = time.time() - max_age_days * 86400
kept = []
removed = 0
with _lock:
try:
with open(_USAGE_FILE, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
try:
d = json.loads(line)
if d.get("ts", 0) >= cutoff:
kept.append(line)
else:
removed += 1
except Exception:
removed += 1
with open(_USAGE_FILE, "w", encoding="utf-8") as f:
for line in kept:
f.write(line + "\n")
except Exception as e:
logger.warning("budget: rotate failed: %s", e)
if removed:
logger.info("budget: rotated %d old records (>%dd)", removed, max_age_days)
return removed
def set_provider_limit(provider: str, monthly_limit_usd: Optional[float] = None, topup_balance_usd: Optional[float] = None) -> None:
"""Configure budget limits for a provider."""
limits = _load_limits()
if provider not in limits:
limits[provider] = {}
if monthly_limit_usd is not None:
limits[provider]["monthly_limit_usd"] = monthly_limit_usd
if topup_balance_usd is not None:
limits[provider]["topup_balance_usd"] = topup_balance_usd
_save_limits(limits)
logger.info("budget: set limits for %s: %s", provider, limits[provider])
def _provider_display_name(p: str) -> str:
return {
"anthropic": "Anthropic Claude",
"grok": "xAI Grok",
"deepseek": "DeepSeek",
"mistral": "Mistral AI",
"openai": "OpenAI",
"glm": "GLM / Z.AI",
"ollama": "Local (Ollama)",
}.get(p, p.title())
def _provider_icon(p: str) -> str:
return {
"anthropic": "🟣",
"grok": "",
"deepseek": "🔵",
"mistral": "🌊",
"openai": "🟢",
"glm": "🐉",
"ollama": "🖥️",
}.get(p, "🤖")
def _provider_env_key(p: str) -> str:
return {
"anthropic": "ANTHROPIC_API_KEY",
"grok": "GROK_API_KEY",
"deepseek": "DEEPSEEK_API_KEY",
"mistral": "MISTRAL_API_KEY",
"openai": "OPENAI_API_KEY",
"glm": "GLM5_API_KEY",
}.get(p, f"{p.upper()}_API_KEY")