microdao-daarion/tests/test_cost_analyzer.py

"""
Tests for Cost & Resource Analyzer (FinOps MVP)

Covers:
  1. test_audit_persist_nonfatal    — broken store does not crash tool_governance
  2. test_cost_report_aggregation   — 20 synthetic events → correct totals
  3. test_anomalies_spike_detection — baseline low, window high → anomaly detected
  4. test_anomalies_no_spike        — stable traffic → no anomalies
  5. test_release_check_cost_watch  — cost_watch gate always passes, adds recs
  6. test_rbac_cost_tool_deny       — denied without entitlements
  7. test_weights_loaded            — weights read from cost_weights.yml
  8. test_top_report                — top returns correct leaders
  9. test_cost_watch_skipped_on_error — broken cost_analyzer → gate passes (skipped)
 10. test_cost_event_cost_units     — compute_event_cost correct calculation
"""

from __future__ import annotations

import asyncio
import datetime
import json
import os
import sys
import tempfile
from pathlib import Path
from typing import Any, Dict, List
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

# ─── Path setup ──────────────────────────────────────────────────────────────
ROUTER_DIR = Path(__file__).parent.parent / "services" / "router"
REPO_ROOT = Path(__file__).parent.parent
sys.path.insert(0, str(ROUTER_DIR))
sys.path.insert(0, str(REPO_ROOT))

os.environ.setdefault("REPO_ROOT", str(REPO_ROOT))
os.environ["AUDIT_BACKEND"] = "memory"   # default for all tests

# ─── Import modules ───────────────────────────────────────────────────────────
from audit_store import MemoryAuditStore, JsonlAuditStore, NullAuditStore, set_audit_store
from cost_analyzer import (
    action_report,
    action_top,
    action_anomalies,
    action_weights,
    compute_event_cost,
    reload_cost_weights,
    analyze_cost_dict,
)


# ─── Helpers ──────────────────────────────────────────────────────────────────

def _now_iso(delta_minutes: int = 0) -> str:
    dt = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(minutes=delta_minutes)
    return dt.isoformat()


def _make_event(
    tool: str = "observability_tool",
    agent_id: str = "sofiia",
    user_id: str = "user_x",
    workspace_id: str = "ws1",
    status: str = "pass",
    duration_ms: int = 200,
    ts: str = None,
) -> Dict:
    return {
        "ts": ts or _now_iso(),
        "req_id": "req-test-123",
        "workspace_id": workspace_id,
        "user_id": user_id,
        "agent_id": agent_id,
        "tool": tool,
        "action": "query",
        "status": status,
        "duration_ms": duration_ms,
        "in_size": 100,
        "out_size": 500,
        "input_hash": "sha256:abc",
    }


# ─── 1. audit_persist_nonfatal ────────────────────────────────────────────────

class BrokenStore:
    """Raises on every operation to simulate storage failure."""
    def write(self, event) -> None:
        raise RuntimeError("disk full")
    def read(self, **kwargs) -> List:
        raise RuntimeError("disk full")


def test_audit_persist_nonfatal(tmp_path):
    """
    If audit store raises, _emit_audit must NOT propagate the exception.
    Tool execution continues normally.
    """
    from tool_governance import ToolGovernance

    broken = BrokenStore()
    set_audit_store(broken)

    try:
        gov = ToolGovernance(enable_rbac=False, enable_limits=False, enable_allowlist=False)
        result = gov.pre_call("some_tool", "action", agent_id="agent_cto", input_text="hello")
        assert result.allowed

        # post_call must not raise even with broken store
        gov.post_call(result.call_ctx, {"data": "ok"})
        # If we get here without exception — test passes
    finally:
        # Restore memory store
        mem = MemoryAuditStore()
        set_audit_store(mem)


# ─── 2. cost_report_aggregation ───────────────────────────────────────────────

def test_cost_report_aggregation():
    """20 synthetic events → totals and top_tools correct."""
    store = MemoryAuditStore()
    # 10 observability calls @ 200ms each
    for _ in range(10):
        store.write(_make_event("observability_tool", duration_ms=200))
    # 5 pr_reviewer calls @ 1000ms each
    for _ in range(5):
        store.write(_make_event("pr_reviewer_tool", duration_ms=1000))
    # 5 memory_search calls @ 50ms each
    for _ in range(5):
        store.write(_make_event("memory_search", duration_ms=50))

    report = action_report(store, group_by=["tool"], top_n=10)

    assert report["totals"]["calls"] == 20
    assert report["totals"]["cost_units"] > 0

    top_tools = report["breakdowns"]["tool"]
    tool_names = [t["tool"] for t in top_tools]
    # pr_reviewer_tool should be most expensive (10 + 2 cost_per_ms*1000 each)
    assert "pr_reviewer_tool" in tool_names
    # pr_reviewer should be #1 spender
    assert top_tools[0]["tool"] == "pr_reviewer_tool"


def test_cost_event_cost_units():
    """compute_event_cost returns expected value."""
    reload_cost_weights()
    ev = _make_event("pr_reviewer_tool", duration_ms=500)
    cost = compute_event_cost(ev)
    # pr_reviewer: 10.0 + 500 * 0.002 = 11.0
    assert abs(cost - 11.0) < 0.01


def test_cost_event_cost_units_default():
    """Unknown tool uses default weights."""
    reload_cost_weights()
    ev = _make_event("unknown_fancy_tool", duration_ms=1000)
    cost = compute_event_cost(ev)
    # defaults: 1.0 + 1000 * 0.001 = 2.0
    assert abs(cost - 2.0) < 0.01


# ─── 3. anomalies_spike_detection ─────────────────────────────────────────────

def test_anomalies_spike_detection():
    """
    Baseline: 2 calls in last 24h.
    Window (last 60m): 80 calls — should trigger spike anomaly.
    """
    store = MemoryAuditStore()

    # Baseline events: 2 calls, ~23h ago
    for _ in range(2):
        ts = _now_iso(delta_minutes=-(23 * 60))
        store.write(_make_event("comfy_generate_image", ts=ts))

    # Window events: 80 calls, right now
    for _ in range(80):
        store.write(_make_event("comfy_generate_image"))

    result = action_anomalies(
        store,
        window_minutes=60,
        baseline_hours=24,
        ratio_threshold=2.0,
        min_calls=5,
    )

    assert result["anomaly_count"] >= 1
    types = [a["type"] for a in result["anomalies"]]
    assert "cost_spike" in types

    spike = next(a for a in result["anomalies"] if a["type"] == "cost_spike")
    assert spike["tool"] == "comfy_generate_image"
    assert spike["window_calls"] == 80


def test_anomalies_no_spike():
    """Stable traffic → no anomalies."""
    store = MemoryAuditStore()

    # Same rate: 5 calls per hour for 25 hours
    now = datetime.datetime.now(datetime.timezone.utc)
    for h in range(25):
        for _ in range(5):
            ts = (now - datetime.timedelta(hours=h)).isoformat()
            store.write(_make_event("observability_tool", ts=ts))

    result = action_anomalies(
        store,
        window_minutes=60,
        baseline_hours=24,
        ratio_threshold=3.0,
        min_calls=3,
    )

    # Should be 0 or very few — stable traffic
    assert result["anomaly_count"] == 0


# ─── 4. top report ────────────────────────────────────────────────────────────

def test_top_report():
    """top action returns correct leaders."""
    store = MemoryAuditStore()
    # 5 comfy calls (expensive)
    for _ in range(5):
        store.write(_make_event("comfy_generate_video", duration_ms=3000))
    # 2 memory calls (cheap)
    for _ in range(2):
        store.write(_make_event("memory_search", duration_ms=50, agent_id="agent_b"))

    result = action_top(store, window_hours=1, top_n=5)
    assert result["total_calls"] == 7
    top_tools = result["top_tools"]
    assert top_tools[0]["tool"] == "comfy_generate_video"

    top_agents = result["top_agents"]
    agent_names = [a["agent_id"] for a in top_agents]
    assert "sofiia" in agent_names  # "sofiia" is the agent_id mapped to role agent_cto


# ─── 5. release_check cost_watch gate ────────────────────────────────────────

def test_release_check_cost_watch_always_passes():
    """
    cost_watch gate always returns pass=True.
    Anomalies are added to recommendations, not to overall_pass=False.
    """
    async def _run():
        from release_check_runner import _run_cost_watch

        class FakeToolResult:
            def __init__(self, data):
                self.success = True
                self.result = data
                self.error = None

        async def fake_execute(tool_name, args, agent_id=None):
            if tool_name == "cost_analyzer_tool":
                return FakeToolResult({
                    "anomalies": [
                        {
                            "type": "cost_spike",
                            "tool": "comfy_generate_image",
                            "ratio": 5.0,
                            "window_calls": 100,
                            "baseline_calls": 2,
                            "recommendation": "Cost spike: comfy_generate_image — apply rate limit.",
                        }
                    ],
                    "anomaly_count": 1,
                })

        mock_tm = MagicMock()
        mock_tm.execute_tool = AsyncMock(side_effect=fake_execute)
        return await _run_cost_watch(mock_tm, "sofiia", ratio_threshold=2.0, min_calls=5)

    ok, gate = asyncio.run(_run())

    assert ok is True, "cost_watch must always return pass=True"
    assert gate["name"] == "cost_watch"
    assert gate["status"] == "pass"
    assert gate["anomalies_count"] >= 1
    assert any("comfy" in r or "cost" in r.lower() for r in gate.get("recommendations", []))


def test_cost_watch_gate_in_full_release_check():
    """
    Running release_check with minimal gates — cost_watch should appear in gates
    and overall_pass should NOT be False due to cost_watch.
    """
    async def _run():
        from release_check_runner import run_release_check

        class FakeTMResult:
            def __init__(self, data, success=True, error=None):
                self.success = success
                self.result = data
                self.error = error

        async def fake_exec(tool_name, args, agent_id=None):
            if tool_name == "pr_reviewer_tool":
                return FakeTMResult({"approved": True, "verdict": "LGTM", "issues": []})
            if tool_name == "config_linter_tool":
                return FakeTMResult({"pass": True, "errors": [], "warnings": []})
            if tool_name == "dependency_scanner_tool":
                return FakeTMResult({"pass": True, "summary": "No vulns", "vulnerabilities": []})
            if tool_name == "contract_tool":
                return FakeTMResult({"pass": True, "breaking_changes": [], "warnings": []})
            if tool_name == "threatmodel_tool":
                return FakeTMResult({"risk_level": "low", "threats": []})
            if tool_name == "cost_analyzer_tool":
                return FakeTMResult({
                    "anomalies": [
                        {"type": "cost_spike", "tool": "observability_tool",
                         "ratio": 4.5, "window_calls": 100, "baseline_calls": 5,
                         "recommendation": "Reduce observability polling frequency."}
                    ],
                    "anomaly_count": 1,
                })
            return FakeTMResult({})

        tm = MagicMock()
        tm.execute_tool = AsyncMock(side_effect=fake_exec)

        inputs = {
            "diff_text": "small change",
            "run_smoke": False,
            "run_drift": False,
            "run_deps": True,
            "run_cost_watch": True,
            "cost_spike_ratio_threshold": 2.0,
            "cost_min_calls_threshold": 5,
            "cost_watch_window_hours": 24,
            "fail_fast": False,
        }

        return await run_release_check(tm, inputs, agent_id="sofiia")

    report = asyncio.run(_run())

    gate_names = [g["name"] for g in report["gates"]]
    assert "cost_watch" in gate_names

    cost_gate = next(g for g in report["gates"] if g["name"] == "cost_watch")
    assert cost_gate["status"] == "pass"
    assert report["pass"] is True


# ─── 6. RBAC deny ─────────────────────────────────────────────────────────────

def test_rbac_cost_tool_deny():
    """Agent without tools.cost.read entitlements is denied.
    'alateya' maps to role agent_media which has no tools.cost.read.
    """
    from tool_governance import ToolGovernance

    gov = ToolGovernance(enable_rbac=True, enable_limits=False, enable_allowlist=False)
    result = gov.pre_call(
        tool="cost_analyzer_tool",
        action="report",
        agent_id="alateya",   # maps to agent_media (no tools.cost.read)
    )
    assert not result.allowed
    assert "denied" in result.reason.lower() or "entitlement" in result.reason.lower()


def test_rbac_cost_tool_allow():
    """'sofiia' maps to role agent_cto which has tools.cost.read → allowed."""
    from tool_governance import ToolGovernance

    gov = ToolGovernance(enable_rbac=True, enable_limits=False, enable_allowlist=False)
    result = gov.pre_call(
        tool="cost_analyzer_tool",
        action="report",
        agent_id="sofiia",    # maps to agent_cto
    )
    assert result.allowed


# ─── 7. weights_loaded ────────────────────────────────────────────────────────

def test_weights_loaded():
    """Weights read from cost_weights.yml and include expected tools."""
    reload_cost_weights()
    weights = action_weights()

    assert "defaults" in weights
    assert "tools" in weights
    assert "anomaly" in weights

    # Key tools must be present
    tools = weights["tools"]
    assert "pr_reviewer_tool" in tools
    assert "comfy_generate_image" in tools
    assert "comfy_generate_video" in tools

    # Verify pr_reviewer cost
    pr = tools["pr_reviewer_tool"]
    assert float(pr["cost_per_call"]) == 10.0

    # Defaults exist
    defaults = weights["defaults"]
    assert "cost_per_call" in defaults
    assert "cost_per_ms" in defaults


# ─── 8. JSONL store round-trip ────────────────────────────────────────────────

def test_jsonl_store_roundtrip():
    """Write + read cycle with JsonlAuditStore."""
    with tempfile.TemporaryDirectory() as tmpdir:
        store = JsonlAuditStore(directory=tmpdir)
        for i in range(10):
            ev = _make_event("observability_tool")
            store.write(ev)
        store.close()

        rows = store.read()
        assert len(rows) == 10
        assert all(r["tool"] == "observability_tool" for r in rows)


def test_jsonl_store_filter_by_tool():
    """JSONL read respects tool filter."""
    with tempfile.TemporaryDirectory() as tmpdir:
        store = JsonlAuditStore(directory=tmpdir)
        for i in range(5):
            store.write(_make_event("observability_tool"))
        for i in range(3):
            store.write(_make_event("memory_search"))
        store.close()

        rows = store.read(tool="memory_search")
        assert len(rows) == 3


# ─── 9. cost_watch skipped on error ──────────────────────────────────────────

def test_cost_watch_skipped_on_tool_error():
    """If cost_analyzer_tool fails, gate is skipped (pass=True, not error)."""
    async def _run():
        from release_check_runner import _run_cost_watch

        class FailResult:
            success = False
            result = None
            error = "tool unavailable"

        tm = MagicMock()
        tm.execute_tool = AsyncMock(return_value=FailResult())
        return await _run_cost_watch(tm, "sofiia")

    ok, gate = asyncio.run(_run())
    assert ok is True
    assert gate["status"] == "pass"
    assert gate.get("skipped") is True


# ─── 10. analyze_cost_dict dispatch ──────────────────────────────────────────

def test_analyze_cost_dict_top():
    """analyze_cost_dict dispatches 'top' action correctly."""
    store = MemoryAuditStore()
    for _ in range(3):
        store.write(_make_event("pr_reviewer_tool", duration_ms=800))

    result = analyze_cost_dict("top", {"window_hours": 1, "top_n": 5}, store=store)
    assert "top_tools" in result
    assert result["top_tools"][0]["tool"] == "pr_reviewer_tool"


def test_analyze_cost_dict_unknown_action():
    """Unknown action returns error dict without raising."""
    store = MemoryAuditStore()
    result = analyze_cost_dict("explode", {}, store=store)
    assert "error" in result


# ─── 11. Error rate spike ─────────────────────────────────────────────────────

def test_anomalies_error_rate_spike():
    """High failure rate triggers error_spike anomaly."""
    store = MemoryAuditStore()

    for _ in range(20):
        store.write(_make_event("observability_tool", status="failed"))
    for _ in range(5):
        store.write(_make_event("observability_tool", status="pass"))

    result = action_anomalies(
        store,
        window_minutes=60,
        baseline_hours=24,
        ratio_threshold=999.0,  # disable cost spike
        min_calls=5,
    )

    error_spikes = [a for a in result["anomalies"] if a["type"] == "error_spike"]
    assert len(error_spikes) >= 1
    es = error_spikes[0]
    assert es["tool"] == "observability_tool"
    assert float(es["error_rate"]) > 0.10