New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
420 lines
14 KiB
Python
420 lines
14 KiB
Python
"""Provider Budget Tracker — real-money token usage accounting.
|
|
|
|
Tracks:
|
|
- Tokens used (input/output) per provider per model
|
|
- Estimated USD cost based on published pricing
|
|
- Approximate balance (if configured via env var)
|
|
- Rolling 24h / 7d / 30d windows
|
|
|
|
Pricing table: updated Feb 2026 (USD per 1M tokens)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import threading
|
|
import time
|
|
from collections import defaultdict
|
|
from dataclasses import asdict, dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ── Pricing catalog (USD / 1M tokens) ─────────────────────────────────────────
|
|
|
|
PRICING: Dict[str, Dict[str, float]] = {
|
|
# provider → model_pattern → {input, output}
|
|
"anthropic": {
|
|
"claude-sonnet-4-5": {"input": 3.0, "output": 15.0},
|
|
"claude-opus-4-5": {"input": 15.0, "output": 75.0},
|
|
"claude-haiku-3-5": {"input": 0.8, "output": 4.0},
|
|
"claude-3-5-sonnet": {"input": 3.0, "output": 15.0},
|
|
"_default": {"input": 3.0, "output": 15.0},
|
|
},
|
|
"grok": {
|
|
"grok-4-1-fast-reasoning": {"input": 5.0, "output": 15.0},
|
|
"grok-3": {"input": 5.0, "output": 25.0},
|
|
"grok-2-1212": {"input": 2.0, "output": 10.0},
|
|
"_default": {"input": 5.0, "output": 15.0},
|
|
},
|
|
"deepseek": {
|
|
"deepseek-chat": {"input": 0.27, "output": 1.10},
|
|
"deepseek-reasoner": {"input": 0.55, "output": 2.19},
|
|
"_default": {"input": 0.27, "output": 1.10},
|
|
},
|
|
"mistral": {
|
|
"mistral-large-latest": {"input": 2.0, "output": 6.0},
|
|
"mistral-small-latest": {"input": 0.2, "output": 0.6},
|
|
"_default": {"input": 2.0, "output": 6.0},
|
|
},
|
|
"openai": {
|
|
"gpt-4o": {"input": 2.5, "output": 10.0},
|
|
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
|
|
"gpt-4-turbo": {"input": 10.0, "output": 30.0},
|
|
"_default": {"input": 2.5, "output": 10.0},
|
|
},
|
|
"glm": {
|
|
"glm-4-plus": {"input": 0.05, "output": 0.05},
|
|
"glm-4-flash": {"input": 0.0, "output": 0.0}, # free tier
|
|
"glm-4.7-flash": {"input": 0.0, "output": 0.0},
|
|
"glm-z1-plus": {"input": 0.07, "output": 0.07},
|
|
"_default": {"input": 0.05, "output": 0.05},
|
|
},
|
|
"ollama": {
|
|
"_default": {"input": 0.0, "output": 0.0},
|
|
},
|
|
}
|
|
|
|
|
|
def get_price(provider: str, model: str) -> Dict[str, float]:
|
|
p = PRICING.get(provider.lower(), PRICING.get("anthropic"))
|
|
# exact match
|
|
if model in p:
|
|
return p[model]
|
|
# prefix match
|
|
for k, v in p.items():
|
|
if k != "_default" and model.startswith(k):
|
|
return v
|
|
return p.get("_default", {"input": 3.0, "output": 15.0})
|
|
|
|
|
|
def calc_cost_usd(provider: str, model: str, input_tokens: int, output_tokens: int) -> float:
|
|
price = get_price(provider, model)
|
|
return (input_tokens * price["input"] + output_tokens * price["output"]) / 1_000_000
|
|
|
|
|
|
# ── Usage record ──────────────────────────────────────────────────────────────
|
|
|
|
@dataclass
|
|
class UsageRecord:
|
|
ts: float
|
|
provider: str
|
|
model: str
|
|
agent: str
|
|
input_tokens: int
|
|
output_tokens: int
|
|
cost_usd: float
|
|
latency_ms: int = 0
|
|
task_type: str = ""
|
|
fallback_used: bool = False
|
|
|
|
|
|
# ── Storage ────────────────────────────────────────────────────────────────────
|
|
|
|
_BUDGET_DIR = Path(os.getenv("BUDGET_DATA_DIR", os.path.expanduser("~/.sofiia/budget")))
|
|
_USAGE_FILE = _BUDGET_DIR / "usage.jsonl"
|
|
_LIMITS_FILE = _BUDGET_DIR / "limits.json"
|
|
|
|
_lock = threading.Lock()
|
|
|
|
|
|
def _ensure_dir() -> None:
|
|
_BUDGET_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
def _append_usage(rec: UsageRecord) -> None:
|
|
_ensure_dir()
|
|
with _lock:
|
|
with open(_USAGE_FILE, "a", encoding="utf-8") as f:
|
|
f.write(json.dumps(asdict(rec)) + "\n")
|
|
|
|
|
|
def _load_usage(since_ts: float = 0.0) -> List[UsageRecord]:
|
|
if not _USAGE_FILE.exists():
|
|
return []
|
|
records: List[UsageRecord] = []
|
|
with _lock:
|
|
try:
|
|
with open(_USAGE_FILE, "r", encoding="utf-8") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
d = json.loads(line)
|
|
if d.get("ts", 0) >= since_ts:
|
|
records.append(UsageRecord(**d))
|
|
except Exception:
|
|
pass
|
|
except Exception as e:
|
|
logger.warning("budget: failed to load usage: %s", e)
|
|
return records
|
|
|
|
|
|
# ── Manual balance config ──────────────────────────────────────────────────────
|
|
|
|
def _load_limits() -> Dict[str, Any]:
|
|
if not _LIMITS_FILE.exists():
|
|
return {}
|
|
try:
|
|
with open(_LIMITS_FILE, "r") as f:
|
|
return json.load(f)
|
|
except Exception:
|
|
return {}
|
|
|
|
|
|
def _save_limits(data: Dict[str, Any]) -> None:
|
|
_ensure_dir()
|
|
with _lock:
|
|
with open(_LIMITS_FILE, "w") as f:
|
|
json.dump(data, f, indent=2)
|
|
|
|
|
|
# ── Public API ─────────────────────────────────────────────────────────────────
|
|
|
|
def track_usage(
|
|
provider: str,
|
|
model: str,
|
|
agent: str,
|
|
input_tokens: int,
|
|
output_tokens: int,
|
|
latency_ms: int = 0,
|
|
task_type: str = "",
|
|
fallback_used: bool = False,
|
|
) -> float:
|
|
"""Record token usage and return cost in USD."""
|
|
cost = calc_cost_usd(provider, model, input_tokens, output_tokens)
|
|
rec = UsageRecord(
|
|
ts=time.time(),
|
|
provider=provider,
|
|
model=model,
|
|
agent=agent,
|
|
input_tokens=input_tokens,
|
|
output_tokens=output_tokens,
|
|
cost_usd=cost,
|
|
latency_ms=latency_ms,
|
|
task_type=task_type,
|
|
fallback_used=fallback_used,
|
|
)
|
|
_append_usage(rec)
|
|
logger.debug(
|
|
"💰 tracked: provider=%s model=%s tokens=%d+%d cost=$%.5f",
|
|
provider, model, input_tokens, output_tokens, cost,
|
|
)
|
|
return cost
|
|
|
|
|
|
@dataclass
|
|
class ProviderStats:
|
|
provider: str
|
|
total_input_tokens: int = 0
|
|
total_output_tokens: int = 0
|
|
total_cost_usd: float = 0.0
|
|
call_count: int = 0
|
|
avg_latency_ms: float = 0.0
|
|
top_models: List[Dict[str, Any]] = field(default_factory=list)
|
|
# Configured limits (from limits.json)
|
|
monthly_limit_usd: Optional[float] = None
|
|
topup_balance_usd: Optional[float] = None
|
|
estimated_remaining_usd: Optional[float] = None
|
|
|
|
|
|
def get_stats(window_hours: int = 720) -> Dict[str, ProviderStats]:
|
|
"""
|
|
Aggregate usage stats per provider for the given time window.
|
|
Default window = 720h = 30 days.
|
|
"""
|
|
since_ts = time.time() - window_hours * 3600
|
|
records = _load_usage(since_ts)
|
|
by_provider = _aggregate_records(records)
|
|
|
|
limits = _load_limits()
|
|
for p, s in by_provider.items():
|
|
lim = limits.get(p, {})
|
|
if "monthly_limit_usd" in lim:
|
|
s.monthly_limit_usd = lim["monthly_limit_usd"]
|
|
if "topup_balance_usd" in lim:
|
|
s.topup_balance_usd = lim["topup_balance_usd"]
|
|
s.estimated_remaining_usd = round(lim["topup_balance_usd"] - s.total_cost_usd, 4)
|
|
|
|
return by_provider
|
|
|
|
|
|
def get_dashboard_data() -> Dict[str, Any]:
|
|
"""
|
|
Returns structured data for the budget dashboard UI.
|
|
Includes 24h, 7d, 30d windows.
|
|
Single file read + in-memory filtering for all three windows.
|
|
"""
|
|
now = time.time()
|
|
ts_30d = now - 720 * 3600
|
|
ts_7d = now - 168 * 3600
|
|
ts_24h = now - 24 * 3600
|
|
|
|
all_records = _load_usage(since_ts=ts_30d)
|
|
records_7d = [r for r in all_records if r.ts >= ts_7d]
|
|
records_24h = [r for r in records_7d if r.ts >= ts_24h]
|
|
|
|
stats_30d = _aggregate_records(all_records)
|
|
stats_7d = _aggregate_records(records_7d)
|
|
stats_24h = _aggregate_records(records_24h)
|
|
|
|
limits = _load_limits()
|
|
|
|
# Apply limits to 30d stats
|
|
for p, s in stats_30d.items():
|
|
lim = limits.get(p, {})
|
|
if "monthly_limit_usd" in lim:
|
|
s.monthly_limit_usd = lim["monthly_limit_usd"]
|
|
if "topup_balance_usd" in lim:
|
|
s.topup_balance_usd = lim["topup_balance_usd"]
|
|
s.estimated_remaining_usd = round(lim["topup_balance_usd"] - s.total_cost_usd, 4)
|
|
|
|
all_providers = sorted({
|
|
*(k for k in PRICING if k != "ollama"),
|
|
*stats_30d.keys(),
|
|
})
|
|
|
|
providers_data = []
|
|
for p in all_providers:
|
|
s30 = stats_30d.get(p, ProviderStats(provider=p))
|
|
s7 = stats_7d.get(p, ProviderStats(provider=p))
|
|
s24 = stats_24h.get(p, ProviderStats(provider=p))
|
|
plim = limits.get(p, {})
|
|
|
|
providers_data.append({
|
|
"provider": p,
|
|
"display_name": _provider_display_name(p),
|
|
"icon": _provider_icon(p),
|
|
"available": bool(os.getenv(_provider_env_key(p), "").strip()),
|
|
"cost_24h": round(s24.total_cost_usd, 5),
|
|
"cost_7d": round(s7.total_cost_usd, 5),
|
|
"cost_30d": round(s30.total_cost_usd, 5),
|
|
"calls_24h": s24.call_count,
|
|
"calls_30d": s30.call_count,
|
|
"tokens_24h": s24.total_input_tokens + s24.total_output_tokens,
|
|
"tokens_30d": s30.total_input_tokens + s30.total_output_tokens,
|
|
"avg_latency_ms": round(s30.avg_latency_ms),
|
|
"monthly_limit_usd": s30.monthly_limit_usd,
|
|
"topup_balance_usd": plim.get("topup_balance_usd"),
|
|
"estimated_remaining_usd": s30.estimated_remaining_usd,
|
|
"top_models": s30.top_models,
|
|
})
|
|
|
|
total_24h = sum(s.total_cost_usd for s in stats_24h.values())
|
|
total_7d = sum(s.total_cost_usd for s in stats_7d.values())
|
|
total_30d = sum(s.total_cost_usd for s in stats_30d.values())
|
|
|
|
return {
|
|
"providers": providers_data,
|
|
"summary": {
|
|
"total_cost_24h": round(total_24h, 5),
|
|
"total_cost_7d": round(total_7d, 5),
|
|
"total_cost_30d": round(total_30d, 5),
|
|
"total_calls_30d": sum(s.call_count for s in stats_30d.values()),
|
|
},
|
|
"generated_at": now,
|
|
}
|
|
|
|
|
|
def _aggregate_records(records: List[UsageRecord]) -> Dict[str, ProviderStats]:
|
|
"""Aggregate a list of records into per-provider stats."""
|
|
by_provider: Dict[str, ProviderStats] = {}
|
|
model_usage: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(
|
|
lambda: defaultdict(lambda: {"calls": 0, "cost": 0.0, "tokens": 0})
|
|
)
|
|
for rec in records:
|
|
p = rec.provider
|
|
if p not in by_provider:
|
|
by_provider[p] = ProviderStats(provider=p)
|
|
s = by_provider[p]
|
|
s.total_input_tokens += rec.input_tokens
|
|
s.total_output_tokens += rec.output_tokens
|
|
s.total_cost_usd += rec.cost_usd
|
|
s.call_count += 1
|
|
if rec.latency_ms:
|
|
s.avg_latency_ms = (
|
|
(s.avg_latency_ms * (s.call_count - 1) + rec.latency_ms) / s.call_count
|
|
)
|
|
model_usage[p][rec.model]["calls"] += 1
|
|
model_usage[p][rec.model]["cost"] += rec.cost_usd
|
|
model_usage[p][rec.model]["tokens"] += rec.input_tokens + rec.output_tokens
|
|
|
|
for p, s in by_provider.items():
|
|
top = sorted(model_usage[p].items(), key=lambda x: x[1]["cost"], reverse=True)[:3]
|
|
s.top_models = [{"model": k, **v} for k, v in top]
|
|
|
|
return by_provider
|
|
|
|
|
|
def rotate_usage_log(max_age_days: int = 90) -> int:
|
|
"""Remove records older than max_age_days. Returns count of removed lines."""
|
|
if not _USAGE_FILE.exists():
|
|
return 0
|
|
cutoff = time.time() - max_age_days * 86400
|
|
kept = []
|
|
removed = 0
|
|
with _lock:
|
|
try:
|
|
with open(_USAGE_FILE, "r", encoding="utf-8") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
d = json.loads(line)
|
|
if d.get("ts", 0) >= cutoff:
|
|
kept.append(line)
|
|
else:
|
|
removed += 1
|
|
except Exception:
|
|
removed += 1
|
|
with open(_USAGE_FILE, "w", encoding="utf-8") as f:
|
|
for line in kept:
|
|
f.write(line + "\n")
|
|
except Exception as e:
|
|
logger.warning("budget: rotate failed: %s", e)
|
|
if removed:
|
|
logger.info("budget: rotated %d old records (>%dd)", removed, max_age_days)
|
|
return removed
|
|
|
|
|
|
def set_provider_limit(provider: str, monthly_limit_usd: Optional[float] = None, topup_balance_usd: Optional[float] = None) -> None:
|
|
"""Configure budget limits for a provider."""
|
|
limits = _load_limits()
|
|
if provider not in limits:
|
|
limits[provider] = {}
|
|
if monthly_limit_usd is not None:
|
|
limits[provider]["monthly_limit_usd"] = monthly_limit_usd
|
|
if topup_balance_usd is not None:
|
|
limits[provider]["topup_balance_usd"] = topup_balance_usd
|
|
_save_limits(limits)
|
|
logger.info("budget: set limits for %s: %s", provider, limits[provider])
|
|
|
|
|
|
def _provider_display_name(p: str) -> str:
|
|
return {
|
|
"anthropic": "Anthropic Claude",
|
|
"grok": "xAI Grok",
|
|
"deepseek": "DeepSeek",
|
|
"mistral": "Mistral AI",
|
|
"openai": "OpenAI",
|
|
"glm": "GLM / Z.AI",
|
|
"ollama": "Local (Ollama)",
|
|
}.get(p, p.title())
|
|
|
|
|
|
def _provider_icon(p: str) -> str:
|
|
return {
|
|
"anthropic": "🟣",
|
|
"grok": "⚡",
|
|
"deepseek": "🔵",
|
|
"mistral": "🌊",
|
|
"openai": "🟢",
|
|
"glm": "🐉",
|
|
"ollama": "🖥️",
|
|
}.get(p, "🤖")
|
|
|
|
|
|
def _provider_env_key(p: str) -> str:
|
|
return {
|
|
"anthropic": "ANTHROPIC_API_KEY",
|
|
"grok": "GROK_API_KEY",
|
|
"deepseek": "DEEPSEEK_API_KEY",
|
|
"mistral": "MISTRAL_API_KEY",
|
|
"openai": "OPENAI_API_KEY",
|
|
"glm": "GLM5_API_KEY",
|
|
}.get(p, f"{p.upper()}_API_KEY")
|