"""Provider Budget Tracker — real-money token usage accounting. Tracks: - Tokens used (input/output) per provider per model - Estimated USD cost based on published pricing - Approximate balance (if configured via env var) - Rolling 24h / 7d / 30d windows Pricing table: updated Feb 2026 (USD per 1M tokens) """ from __future__ import annotations import json import logging import os import threading import time from collections import defaultdict from dataclasses import asdict, dataclass, field from pathlib import Path from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) # ── Pricing catalog (USD / 1M tokens) ───────────────────────────────────────── PRICING: Dict[str, Dict[str, float]] = { # provider → model_pattern → {input, output} "anthropic": { "claude-sonnet-4-5": {"input": 3.0, "output": 15.0}, "claude-opus-4-5": {"input": 15.0, "output": 75.0}, "claude-haiku-3-5": {"input": 0.8, "output": 4.0}, "claude-3-5-sonnet": {"input": 3.0, "output": 15.0}, "_default": {"input": 3.0, "output": 15.0}, }, "grok": { "grok-4-1-fast-reasoning": {"input": 5.0, "output": 15.0}, "grok-3": {"input": 5.0, "output": 25.0}, "grok-2-1212": {"input": 2.0, "output": 10.0}, "_default": {"input": 5.0, "output": 15.0}, }, "deepseek": { "deepseek-chat": {"input": 0.27, "output": 1.10}, "deepseek-reasoner": {"input": 0.55, "output": 2.19}, "_default": {"input": 0.27, "output": 1.10}, }, "mistral": { "mistral-large-latest": {"input": 2.0, "output": 6.0}, "mistral-small-latest": {"input": 0.2, "output": 0.6}, "_default": {"input": 2.0, "output": 6.0}, }, "openai": { "gpt-4o": {"input": 2.5, "output": 10.0}, "gpt-4o-mini": {"input": 0.15, "output": 0.60}, "gpt-4-turbo": {"input": 10.0, "output": 30.0}, "_default": {"input": 2.5, "output": 10.0}, }, "glm": { "glm-4-plus": {"input": 0.05, "output": 0.05}, "glm-4-flash": {"input": 0.0, "output": 0.0}, # free tier "glm-4.7-flash": {"input": 0.0, "output": 0.0}, "glm-z1-plus": {"input": 0.07, "output": 0.07}, "_default": {"input": 0.05, "output": 0.05}, }, "ollama": { "_default": {"input": 0.0, "output": 0.0}, }, } def get_price(provider: str, model: str) -> Dict[str, float]: p = PRICING.get(provider.lower(), PRICING.get("anthropic")) # exact match if model in p: return p[model] # prefix match for k, v in p.items(): if k != "_default" and model.startswith(k): return v return p.get("_default", {"input": 3.0, "output": 15.0}) def calc_cost_usd(provider: str, model: str, input_tokens: int, output_tokens: int) -> float: price = get_price(provider, model) return (input_tokens * price["input"] + output_tokens * price["output"]) / 1_000_000 # ── Usage record ────────────────────────────────────────────────────────────── @dataclass class UsageRecord: ts: float provider: str model: str agent: str input_tokens: int output_tokens: int cost_usd: float latency_ms: int = 0 task_type: str = "" fallback_used: bool = False # ── Storage ──────────────────────────────────────────────────────────────────── _BUDGET_DIR = Path(os.getenv("BUDGET_DATA_DIR", os.path.expanduser("~/.sofiia/budget"))) _USAGE_FILE = _BUDGET_DIR / "usage.jsonl" _LIMITS_FILE = _BUDGET_DIR / "limits.json" _lock = threading.Lock() def _ensure_dir() -> None: _BUDGET_DIR.mkdir(parents=True, exist_ok=True) def _append_usage(rec: UsageRecord) -> None: _ensure_dir() with _lock: with open(_USAGE_FILE, "a", encoding="utf-8") as f: f.write(json.dumps(asdict(rec)) + "\n") def _load_usage(since_ts: float = 0.0) -> List[UsageRecord]: if not _USAGE_FILE.exists(): return [] records: List[UsageRecord] = [] with _lock: try: with open(_USAGE_FILE, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: d = json.loads(line) if d.get("ts", 0) >= since_ts: records.append(UsageRecord(**d)) except Exception: pass except Exception as e: logger.warning("budget: failed to load usage: %s", e) return records # ── Manual balance config ────────────────────────────────────────────────────── def _load_limits() -> Dict[str, Any]: if not _LIMITS_FILE.exists(): return {} try: with open(_LIMITS_FILE, "r") as f: return json.load(f) except Exception: return {} def _save_limits(data: Dict[str, Any]) -> None: _ensure_dir() with _lock: with open(_LIMITS_FILE, "w") as f: json.dump(data, f, indent=2) # ── Public API ───────────────────────────────────────────────────────────────── def track_usage( provider: str, model: str, agent: str, input_tokens: int, output_tokens: int, latency_ms: int = 0, task_type: str = "", fallback_used: bool = False, ) -> float: """Record token usage and return cost in USD.""" cost = calc_cost_usd(provider, model, input_tokens, output_tokens) rec = UsageRecord( ts=time.time(), provider=provider, model=model, agent=agent, input_tokens=input_tokens, output_tokens=output_tokens, cost_usd=cost, latency_ms=latency_ms, task_type=task_type, fallback_used=fallback_used, ) _append_usage(rec) logger.debug( "💰 tracked: provider=%s model=%s tokens=%d+%d cost=$%.5f", provider, model, input_tokens, output_tokens, cost, ) return cost @dataclass class ProviderStats: provider: str total_input_tokens: int = 0 total_output_tokens: int = 0 total_cost_usd: float = 0.0 call_count: int = 0 avg_latency_ms: float = 0.0 top_models: List[Dict[str, Any]] = field(default_factory=list) # Configured limits (from limits.json) monthly_limit_usd: Optional[float] = None topup_balance_usd: Optional[float] = None estimated_remaining_usd: Optional[float] = None def get_stats(window_hours: int = 720) -> Dict[str, ProviderStats]: """ Aggregate usage stats per provider for the given time window. Default window = 720h = 30 days. """ since_ts = time.time() - window_hours * 3600 records = _load_usage(since_ts) by_provider = _aggregate_records(records) limits = _load_limits() for p, s in by_provider.items(): lim = limits.get(p, {}) if "monthly_limit_usd" in lim: s.monthly_limit_usd = lim["monthly_limit_usd"] if "topup_balance_usd" in lim: s.topup_balance_usd = lim["topup_balance_usd"] s.estimated_remaining_usd = round(lim["topup_balance_usd"] - s.total_cost_usd, 4) return by_provider def get_dashboard_data() -> Dict[str, Any]: """ Returns structured data for the budget dashboard UI. Includes 24h, 7d, 30d windows. Single file read + in-memory filtering for all three windows. """ now = time.time() ts_30d = now - 720 * 3600 ts_7d = now - 168 * 3600 ts_24h = now - 24 * 3600 all_records = _load_usage(since_ts=ts_30d) records_7d = [r for r in all_records if r.ts >= ts_7d] records_24h = [r for r in records_7d if r.ts >= ts_24h] stats_30d = _aggregate_records(all_records) stats_7d = _aggregate_records(records_7d) stats_24h = _aggregate_records(records_24h) limits = _load_limits() # Apply limits to 30d stats for p, s in stats_30d.items(): lim = limits.get(p, {}) if "monthly_limit_usd" in lim: s.monthly_limit_usd = lim["monthly_limit_usd"] if "topup_balance_usd" in lim: s.topup_balance_usd = lim["topup_balance_usd"] s.estimated_remaining_usd = round(lim["topup_balance_usd"] - s.total_cost_usd, 4) all_providers = sorted({ *(k for k in PRICING if k != "ollama"), *stats_30d.keys(), }) providers_data = [] for p in all_providers: s30 = stats_30d.get(p, ProviderStats(provider=p)) s7 = stats_7d.get(p, ProviderStats(provider=p)) s24 = stats_24h.get(p, ProviderStats(provider=p)) plim = limits.get(p, {}) providers_data.append({ "provider": p, "display_name": _provider_display_name(p), "icon": _provider_icon(p), "available": bool(os.getenv(_provider_env_key(p), "").strip()), "cost_24h": round(s24.total_cost_usd, 5), "cost_7d": round(s7.total_cost_usd, 5), "cost_30d": round(s30.total_cost_usd, 5), "calls_24h": s24.call_count, "calls_30d": s30.call_count, "tokens_24h": s24.total_input_tokens + s24.total_output_tokens, "tokens_30d": s30.total_input_tokens + s30.total_output_tokens, "avg_latency_ms": round(s30.avg_latency_ms), "monthly_limit_usd": s30.monthly_limit_usd, "topup_balance_usd": plim.get("topup_balance_usd"), "estimated_remaining_usd": s30.estimated_remaining_usd, "top_models": s30.top_models, }) total_24h = sum(s.total_cost_usd for s in stats_24h.values()) total_7d = sum(s.total_cost_usd for s in stats_7d.values()) total_30d = sum(s.total_cost_usd for s in stats_30d.values()) return { "providers": providers_data, "summary": { "total_cost_24h": round(total_24h, 5), "total_cost_7d": round(total_7d, 5), "total_cost_30d": round(total_30d, 5), "total_calls_30d": sum(s.call_count for s in stats_30d.values()), }, "generated_at": now, } def _aggregate_records(records: List[UsageRecord]) -> Dict[str, ProviderStats]: """Aggregate a list of records into per-provider stats.""" by_provider: Dict[str, ProviderStats] = {} model_usage: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict( lambda: defaultdict(lambda: {"calls": 0, "cost": 0.0, "tokens": 0}) ) for rec in records: p = rec.provider if p not in by_provider: by_provider[p] = ProviderStats(provider=p) s = by_provider[p] s.total_input_tokens += rec.input_tokens s.total_output_tokens += rec.output_tokens s.total_cost_usd += rec.cost_usd s.call_count += 1 if rec.latency_ms: s.avg_latency_ms = ( (s.avg_latency_ms * (s.call_count - 1) + rec.latency_ms) / s.call_count ) model_usage[p][rec.model]["calls"] += 1 model_usage[p][rec.model]["cost"] += rec.cost_usd model_usage[p][rec.model]["tokens"] += rec.input_tokens + rec.output_tokens for p, s in by_provider.items(): top = sorted(model_usage[p].items(), key=lambda x: x[1]["cost"], reverse=True)[:3] s.top_models = [{"model": k, **v} for k, v in top] return by_provider def rotate_usage_log(max_age_days: int = 90) -> int: """Remove records older than max_age_days. Returns count of removed lines.""" if not _USAGE_FILE.exists(): return 0 cutoff = time.time() - max_age_days * 86400 kept = [] removed = 0 with _lock: try: with open(_USAGE_FILE, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: d = json.loads(line) if d.get("ts", 0) >= cutoff: kept.append(line) else: removed += 1 except Exception: removed += 1 with open(_USAGE_FILE, "w", encoding="utf-8") as f: for line in kept: f.write(line + "\n") except Exception as e: logger.warning("budget: rotate failed: %s", e) if removed: logger.info("budget: rotated %d old records (>%dd)", removed, max_age_days) return removed def set_provider_limit(provider: str, monthly_limit_usd: Optional[float] = None, topup_balance_usd: Optional[float] = None) -> None: """Configure budget limits for a provider.""" limits = _load_limits() if provider not in limits: limits[provider] = {} if monthly_limit_usd is not None: limits[provider]["monthly_limit_usd"] = monthly_limit_usd if topup_balance_usd is not None: limits[provider]["topup_balance_usd"] = topup_balance_usd _save_limits(limits) logger.info("budget: set limits for %s: %s", provider, limits[provider]) def _provider_display_name(p: str) -> str: return { "anthropic": "Anthropic Claude", "grok": "xAI Grok", "deepseek": "DeepSeek", "mistral": "Mistral AI", "openai": "OpenAI", "glm": "GLM / Z.AI", "ollama": "Local (Ollama)", }.get(p, p.title()) def _provider_icon(p: str) -> str: return { "anthropic": "🟣", "grok": "⚡", "deepseek": "🔵", "mistral": "🌊", "openai": "🟢", "glm": "🐉", "ollama": "🖥️", }.get(p, "🤖") def _provider_env_key(p: str) -> str: return { "anthropic": "ANTHROPIC_API_KEY", "grok": "GROK_API_KEY", "deepseek": "DEEPSEEK_API_KEY", "mistral": "MISTRAL_API_KEY", "openai": "OPENAI_API_KEY", "glm": "GLM5_API_KEY", }.get(p, f"{p.upper()}_API_KEY")