Files
microdao-daarion/services/router/drift_analyzer.py
Apple 129e4ea1fc feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store,
architecture_pressure, backlog_generator/store, cost_analyzer, data_governance,
dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment,
platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files),
signature_state_store, sofiia_auto_router, tool_governance

New services:
- sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static
- memory-service: integration_endpoints, integrations, voice_endpoints, static UI
- aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents)
- sofiia-supervisor: new supervisor service
- aistalk-bridge-lite: Telegram bridge lite
- calendar-service: CalDAV calendar service with reminders
- mlx-stt-service / mlx-tts-service: Apple Silicon speech services
- binance-bot-monitor: market monitor service
- node-worker: STT/TTS memory providers

New tools (9): agent_email, browser_tool, contract_tool, observability_tool,
oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault

New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus,
farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine,
session_context, style_adapter, telemetry)

Tests: 85+ test files for all new modules
Made-with: Cursor
2026-03-03 07:14:14 -08:00

899 lines
35 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Drift Analyzer — знаходить розбіжності між "джерелами правди" та "фактом".
4 категорії перевірок (незалежні, кожна повертає findings):
1. services — Service Catalog (inventory_services.csv / 01_SERVICE_CATALOG.md) vs docker-compose*.yml
2. openapi — OpenAPI specs (docs/contracts/*.yaml) vs routes у коді (FastAPI decorators)
3. nats — inventory_nats_topics.csv vs publish/subscribe usage у коді
4. tools — tools_rollout.yml + rbac_tools_matrix.yml vs фактичні handlers у tool_manager.py
Формат findings:
{ category, severity, id, title, evidence: {path, lines, details}, recommended_fix }
Pass rule: pass=false якщо errors > 0. Warnings/infos не валять gate.
"""
import csv
import fnmatch
import hashlib
import json
import logging
import os
import re
import time
import yaml
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, FrozenSet, List, Optional, Set, Tuple
logger = logging.getLogger(__name__)
# ─── Constants ────────────────────────────────────────────────────────────────
EXCLUDED_DIRS: FrozenSet[str] = frozenset({
"node_modules", ".git", "dist", "build", "vendor",
".venv", "venv", "venv_models", "sofia_venv",
"__pycache__", ".pytest_cache", "rollback_backups",
"docs/consolidation",
})
MAX_FILES_PER_CATEGORY = 300
MAX_BYTES_PER_FILE = 262144 # 256KB
TIMEOUT_SEC = 25.0 # Hard deadline per full analysis
# Known tool handlers (must be kept in sync with execute_tool dispatch in tool_manager.py)
# Source: Priority 117 handlers in tool_manager.py
KNOWN_TOOL_HANDLERS: FrozenSet[str] = frozenset({
"memory_search", "graph_query",
"web_search", "web_extract",
"image_generate", "comfy_generate_image", "comfy_generate_video",
"remember_fact",
"presentation_create", "presentation_status", "presentation_download",
"crawl4ai_scrape", "tts_speak", "file_tool",
"market_data",
"crm_search_client", "crm_upsert_client", "crm_upsert_site",
"crm_upsert_window_unit", "crm_create_quote", "crm_update_quote",
"crm_create_job", "calc_window_quote",
"docs_render_quote_pdf", "docs_render_invoice_pdf",
"schedule_propose_slots", "schedule_confirm_slot",
"repo_tool", "pr_reviewer_tool", "contract_tool",
"oncall_tool", "observability_tool", "config_linter_tool",
"threatmodel_tool", "job_orchestrator_tool", "kb_tool",
"drift_analyzer_tool", # self-registration
})
# ─── Data Structures ──────────────────────────────────────────────────────────
@dataclass
class Finding:
category: str
severity: str # "error" | "warning" | "info"
id: str
title: str
evidence: Dict[str, str] = field(default_factory=dict)
recommended_fix: str = ""
def to_dict(self) -> Dict:
return {
"category": self.category,
"severity": self.severity,
"id": self.id,
"title": self.title,
"evidence": self.evidence,
"recommended_fix": self.recommended_fix,
}
@dataclass
class DriftReport:
pass_: bool
summary: str
stats: Dict[str, Any]
findings: List[Dict]
# ─── Utility helpers ──────────────────────────────────────────────────────────
def _is_excluded(path: str) -> bool:
"""Check if any part of the path is in the excluded dirs set."""
parts = Path(path).parts
return any(p in EXCLUDED_DIRS for p in parts)
def _walk_files(root: str, extensions: Tuple[str, ...],
deadline: float) -> List[str]:
"""
Walk repo root and collect files with given extensions.
Respects EXCLUDED_DIRS, MAX_FILES_PER_CATEGORY, TIMEOUT_SEC.
"""
found = []
for dirpath, dirnames, filenames in os.walk(root):
# Prune excluded dirs in-place (affects os.walk recursion)
dirnames[:] = [
d for d in dirnames
if d not in EXCLUDED_DIRS and not d.startswith(".")
]
if time.monotonic() > deadline:
logger.warning("_walk_files: timeout reached")
break
for fname in filenames:
if fname.endswith(extensions):
full = os.path.join(dirpath, fname)
if not _is_excluded(full):
found.append(full)
if len(found) >= MAX_FILES_PER_CATEGORY:
return found
return found
def _read_file(path: str) -> str:
"""Read file with size limit. Returns empty string on error."""
try:
size = os.path.getsize(path)
if size > MAX_BYTES_PER_FILE:
with open(path, "r", errors="replace") as f:
return f.read(MAX_BYTES_PER_FILE)
with open(path, "r", errors="replace") as f:
return f.read()
except Exception:
return ""
_SECRET_PAT = re.compile(
r'(?i)(api[_-]?key|token|secret|password|bearer|jwt|private[_-]?key)'
r'[\s=:]+[\'"`]?([a-zA-Z0-9_\-\.]{8,})[\'"`]?'
)
def _redact_evidence(text: str) -> str:
"""Mask potential secrets in evidence strings."""
return _SECRET_PAT.sub(lambda m: f"{m.group(1)}=***REDACTED***", text)
def _rel(path: str, root: str) -> str:
"""Return path relative to root, or absolute if outside."""
try:
return os.path.relpath(path, root)
except ValueError:
return path
# ─── Category 1: Services ─────────────────────────────────────────────────────
def _load_service_catalog(repo_root: str) -> Dict[str, str]:
"""
Load services from inventory_services.csv.
Returns {service_name: status}.
"""
csv_path = os.path.join(
repo_root, "docs", "architecture_inventory", "inventory_services.csv"
)
services = {}
if not os.path.exists(csv_path):
# Fallback: scan 01_SERVICE_CATALOG.md for table rows
md_path = os.path.join(
repo_root, "docs", "architecture_inventory", "01_SERVICE_CATALOG.md"
)
if os.path.exists(md_path):
content = _read_file(md_path)
for line in content.splitlines():
m = re.match(r'\|\s*([\w\-]+)\s*\|\s*(DEPLOYED|DEFINED|PLANNED[^\|]*)', line)
if m:
services[m.group(1).strip()] = m.group(2).strip()
return services
try:
with open(csv_path, "r", newline="", errors="replace") as f:
reader = csv.DictReader(f)
for row in reader:
name = (row.get("service") or "").strip()
status = (row.get("type") or "").strip() # csv has 'type' not 'status'
if name:
services[name] = status
except Exception as e:
logger.warning(f"Could not load inventory_services.csv: {e}")
return services
def _load_compose_services(repo_root: str, deadline: float) -> Dict[str, str]:
"""
Parse docker-compose*.yml files and return {service_name: compose_file}.
"""
compose_files = []
for entry in os.listdir(repo_root):
if fnmatch.fnmatch(entry, "docker-compose*.yml"):
compose_files.append(os.path.join(repo_root, entry))
# Also infra subdir
infra_compose = os.path.join(repo_root, "infra", "compose", "docker-compose.yml")
if os.path.exists(infra_compose):
compose_files.append(infra_compose)
services = {}
for cf in compose_files:
if time.monotonic() > deadline:
break
try:
content = _read_file(cf)
data = yaml.safe_load(content) or {}
svc_section = data.get("services") or {}
for svc_name in svc_section:
services[svc_name] = _rel(cf, repo_root)
except Exception as e:
logger.debug(f"Could not parse {cf}: {e}")
return services
def _analyze_services(repo_root: str, deadline: float) -> Tuple[List[Finding], Dict]:
findings = []
catalog = _load_service_catalog(repo_root)
compose_svcs = _load_compose_services(repo_root, deadline)
compose_names = set(compose_svcs.keys())
catalog_names = set(catalog.keys())
# DEPLOYED in catalog but missing from ALL compose files
for svc, status in catalog.items():
if "DEPLOYED" in status.upper() and svc not in compose_names:
# Normalize: some catalog names use dashes vs underscores differently
normalized = svc.replace("-", "_")
variants = {svc, normalized, svc.replace("_", "-")}
if not variants.intersection(compose_names):
findings.append(Finding(
category="services",
severity="error",
id="DRIFT-SVC-001",
title=f"Service '{svc}' marked DEPLOYED in catalog but absent from all docker-compose files",
evidence={"path": "docs/architecture_inventory/inventory_services.csv",
"details": f"status={status}, not found in compose"},
recommended_fix=f"Add '{svc}' to appropriate docker-compose*.yml or update catalog status to DEFINED.",
))
# In compose but not mentioned in catalog at all
for svc, compose_file in compose_svcs.items():
if svc not in catalog_names:
normalized = svc.replace("-", "_").replace("_", "-")
if svc not in catalog_names and normalized not in catalog_names:
findings.append(Finding(
category="services",
severity="warning",
id="DRIFT-SVC-002",
title=f"Service '{svc}' found in compose but not in service catalog",
evidence={"path": compose_file, "details": f"defined in {compose_file}"},
recommended_fix=f"Add '{svc}' to inventory_services.csv / 01_SERVICE_CATALOG.md.",
))
stats = {
"catalog_entries": len(catalog),
"compose_services": len(compose_svcs),
"findings": len(findings),
}
return findings, stats
# ─── Category 2: OpenAPI ──────────────────────────────────────────────────────
def _load_openapi_paths(repo_root: str, deadline: float) -> Dict[str, Set[str]]:
"""
Scan docs/contracts/*.openapi.yaml and any openapi*.yaml/yml/json.
Returns {"/path": {"get", "post", ...}}.
"""
spec_files = []
contracts_dir = os.path.join(repo_root, "docs", "contracts")
if os.path.isdir(contracts_dir):
for f in os.listdir(contracts_dir):
if f.endswith((".yaml", ".yml", ".json")):
spec_files.append(os.path.join(contracts_dir, f))
# Also find any openapi*.yaml in repo root and services
for dirpath, dirnames, filenames in os.walk(repo_root):
dirnames[:] = [d for d in dirnames if d not in EXCLUDED_DIRS and not d.startswith(".")]
if time.monotonic() > deadline:
break
for f in filenames:
if re.match(r'openapi.*\.(ya?ml|json)$', f, re.IGNORECASE):
full = os.path.join(dirpath, f)
if full not in spec_files:
spec_files.append(full)
paths: Dict[str, Set[str]] = {}
for sf in spec_files:
if time.monotonic() > deadline:
break
try:
content = _read_file(sf)
data = yaml.safe_load(content) if sf.endswith((".yaml", ".yml")) else json.loads(content)
if not isinstance(data, dict) or "paths" not in data:
continue
for path, methods in (data.get("paths") or {}).items():
if not isinstance(methods, dict):
continue
methods_set = {
m.lower() for m in methods
if m.lower() in {"get", "post", "put", "patch", "delete", "head", "options"}
}
if path not in paths:
paths[path] = set()
paths[path].update(methods_set)
except Exception as e:
logger.debug(f"Could not parse OpenAPI spec {sf}: {e}")
return paths
_FASTAPI_ROUTE_PAT = re.compile(
r'@(?:app|router)\.(get|post|put|patch|delete|head|options)\(\s*[\'"]([^\'"]+)[\'"]',
re.MULTILINE,
)
_ADD_API_ROUTE_PAT = re.compile(
r'\.add_api_route\(\s*[\'"]([^\'"]+)[\'"].*?methods\s*=\s*\[([^\]]+)\]',
re.MULTILINE | re.DOTALL,
)
def _load_code_routes(repo_root: str, deadline: float) -> Dict[str, Set[str]]:
"""
Scan Python files for FastAPI route decorators.
Returns {"/path": {"get", "post", ...}}.
"""
py_files = _walk_files(repo_root, (".py",), deadline)
routes: Dict[str, Set[str]] = {}
for pf in py_files:
if time.monotonic() > deadline:
break
if ".venv" in pf or "venv" in pf or "node_modules" in pf:
continue
content = _read_file(pf)
if not content:
continue
for method, path in _FASTAPI_ROUTE_PAT.findall(content):
norm = path.rstrip("/") or "/"
if norm not in routes:
routes[norm] = set()
routes[norm].add(method.lower())
for path, methods_raw in _ADD_API_ROUTE_PAT.findall(content):
methods = {m.strip().strip('"\'').lower() for m in methods_raw.split(",")}
norm = path.rstrip("/") or "/"
if norm not in routes:
routes[norm] = set()
routes[norm].update(methods)
return routes
def _normalize_path(path: str) -> str:
"""Normalize OAS path for comparison: remove trailing slash, lowercase."""
return path.rstrip("/").lower() or "/"
# Paths that are infrastructure-level and expected to be missing from OAS specs.
# Add /internal/* and /debug/* patterns if your project uses them.
_OAS_IGNORE_PATH_PREFIXES: Tuple[str, ...] = (
"/healthz", "/readyz", "/livez", "/metrics",
"/internal/", "/debug/", "/__", "/favicon",
)
def _is_oas_ignored(path: str) -> bool:
"""Return True if path is on the OAS ignore allowlist."""
p = path.lower()
return any(p == prefix.rstrip("/") or p.startswith(prefix)
for prefix in _OAS_IGNORE_PATH_PREFIXES)
def _load_openapi_deprecated(repo_root: str) -> Set[str]:
"""
Return normalized paths marked as 'deprecated: true' in any OAS spec.
Deprecated endpoints downgrade from error to warning (DRIFT-OAS-001).
"""
deprecated: Set[str] = set()
spec_files: List[str] = []
for dirpath, dirnames, filenames in os.walk(repo_root):
dirnames[:] = [d for d in dirnames if d not in EXCLUDED_DIRS and not d.startswith(".")]
for f in filenames:
if re.match(r'openapi.*\.(ya?ml|json)$', f, re.IGNORECASE):
spec_files.append(os.path.join(dirpath, f))
for sf in spec_files:
try:
content = _read_file(sf)
data = yaml.safe_load(content) if sf.endswith((".yaml", ".yml")) else json.loads(content)
if not isinstance(data, dict) or "paths" not in data:
continue
for path, methods in (data.get("paths") or {}).items():
if not isinstance(methods, dict):
continue
for method, operation in methods.items():
if isinstance(operation, dict) and operation.get("deprecated", False):
deprecated.add(_normalize_path(path))
except Exception:
pass
return deprecated
def _analyze_openapi(repo_root: str, deadline: float) -> Tuple[List[Finding], Dict]:
findings = []
spec_paths = _load_openapi_paths(repo_root, deadline)
code_routes = _load_code_routes(repo_root, deadline)
if not spec_paths:
return findings, {"spec_paths": 0, "code_routes": len(code_routes), "findings": 0}
deprecated_paths = _load_openapi_deprecated(repo_root)
spec_norm: Dict[str, Set[str]] = {
_normalize_path(p): methods for p, methods in spec_paths.items()
}
code_norm: Dict[str, Set[str]] = {
_normalize_path(p): methods for p, methods in code_routes.items()
}
# DRIFT-OAS-001: In spec but not in code
for path, methods in sorted(spec_norm.items()):
# Skip infra/health endpoints — they are expected to be absent from OAS
if _is_oas_ignored(path):
continue
if path not in code_norm:
# Deprecated spec paths → warning only, not blocking
severity = "warning" if path in deprecated_paths else "error"
dep_note = " (deprecated in spec)" if path in deprecated_paths else ""
findings.append(Finding(
category="openapi",
severity=severity,
id="DRIFT-OAS-001",
title=f"OpenAPI path '{path}'{dep_note} not found in codebase routes",
evidence={"path": "docs/contracts/",
"details": f"methods={sorted(methods)}, missing from FastAPI decorators"},
recommended_fix=(
f"Mark '{path}' as removed in OpenAPI or implement the route."
if path in deprecated_paths
else f"Implement '{path}' route in code or remove from OpenAPI spec."
),
))
else:
# DRIFT-OAS-003: Method mismatch
code_methods = code_norm[path]
missing_in_code = methods - code_methods
if missing_in_code:
findings.append(Finding(
category="openapi",
severity="warning",
id="DRIFT-OAS-003",
title=f"Method mismatch for path '{path}': spec has {sorted(missing_in_code)}, code missing",
evidence={"path": "docs/contracts/",
"details": f"spec={sorted(methods)}, code={sorted(code_methods)}"},
recommended_fix=f"Add missing HTTP methods to code route for '{path}'.",
))
# DRIFT-OAS-002: In code (/v1/ paths) but not in spec
for path, methods in sorted(code_norm.items()):
# Health/internal endpoints are expected to be absent from OAS
if _is_oas_ignored(path):
continue
if not path.startswith("/v1/"):
continue
if path not in spec_norm:
findings.append(Finding(
category="openapi",
severity="error",
id="DRIFT-OAS-002",
title=f"Code route '{path}' not documented in any OpenAPI spec",
evidence={"path": "services/", "details": f"methods={sorted(methods)}"},
recommended_fix=f"Add '{path}' to OpenAPI spec in docs/contracts/.",
))
stats = {
"spec_paths": len(spec_paths),
"code_routes": len(code_routes),
"findings": len(findings),
}
return findings, stats
# ─── Category 3: NATS ─────────────────────────────────────────────────────────
_NATS_WILDCARD_PAT = re.compile(r'\{[^}]+\}|\*|>') # {agent_id}, *, >
def _normalize_nats_subject(subj: str) -> str:
"""Replace wildcards with * for matching. Lowercase."""
return _NATS_WILDCARD_PAT.sub("*", subj.strip()).lower()
def _load_nats_inventory(repo_root: str) -> Optional[List[str]]:
"""
Load documented NATS subjects from inventory_nats_topics.csv.
Returns list of normalized subjects, or None if file absent.
"""
csv_path = os.path.join(
repo_root, "docs", "architecture_inventory", "inventory_nats_topics.csv"
)
if not os.path.exists(csv_path):
return None
subjects = []
try:
with open(csv_path, "r", newline="", errors="replace") as f:
reader = csv.DictReader(f)
for row in reader:
subj = (row.get("subject") or "").strip()
if subj:
subjects.append(_normalize_nats_subject(subj))
except Exception as e:
logger.warning(f"Could not load nats inventory: {e}")
return None
return subjects
_NATS_USAGE_PATTERNS = [
re.compile(r'(?:nc|nats|js|jetstream)\.publish\([\'"]([a-zA-Z0-9._{}*>-]+)[\'"]', re.IGNORECASE),
re.compile(r'(?:nc|nats|js|jetstream)\.subscribe\([\'"]([a-zA-Z0-9._{}*>-]+)[\'"]', re.IGNORECASE),
re.compile(r'nc\.subscribe\([\'"]([a-zA-Z0-9._{}*>-]+)[\'"]', re.IGNORECASE),
re.compile(r'subject\s*=\s*[\'"]([a-zA-Z0-9._{}*>-]{4,})[\'"]', re.IGNORECASE),
re.compile(r'SUBJECT\s*=\s*[\'"]([a-zA-Z0-9._{}*>-]{4,})[\'"]'),
re.compile(r'[\'"]([a-z][a-z0-9_]+\.[a-z][a-z0-9_]+(?:\.[a-zA-Z0-9_{}_.*>-]+){0,4})[\'"]'),
]
_NATS_SUBJECT_VALIDATE = re.compile(r'^[a-zA-Z][a-zA-Z0-9._{}*>-]{2,}$')
def _load_nats_code_subjects(repo_root: str, deadline: float) -> Set[str]:
"""Extract NATS subjects from code via regex patterns."""
py_files = _walk_files(repo_root, (".py",), deadline)
found: Set[str] = set()
for pf in py_files:
if time.monotonic() > deadline:
break
if "venv" in pf or "node_modules" in pf:
continue
content = _read_file(pf)
if not content:
continue
# Quick pre-filter: must contain at least one NATS-like call pattern
_NATS_CALL_HINTS = ("nc.", "nats.", "js.", "jetstream.", "subject=", "SUBJECT=", ".publish(", ".subscribe(")
if not any(hint in content for hint in _NATS_CALL_HINTS):
continue
for pat in _NATS_USAGE_PATTERNS:
for m in pat.finditer(content):
subj = m.group(1).strip()
# Basic subject validation (must contain a dot)
if "." in subj and _NATS_SUBJECT_VALIDATE.match(subj):
found.add(_normalize_nats_subject(subj))
return found
def _nats_subject_matches(code_subj: str, inventory_subjects: List[str]) -> bool:
"""
Check if a code subject matches any inventory subject (wildcard-aware).
Supports * (one segment) and > (one or more segments).
"""
code_parts = code_subj.split(".")
for inv in inventory_subjects:
inv_parts = inv.split(".")
if _nats_match(code_parts, inv_parts) or _nats_match(inv_parts, code_parts):
return True
return False
def _nats_match(a_parts: List[str], b_parts: List[str]) -> bool:
"""Match NATS subject a against pattern b (with * and > wildcards)."""
if not b_parts:
return not a_parts
if b_parts[-1] == ">":
return len(a_parts) >= len(b_parts) - 1
if len(a_parts) != len(b_parts):
return False
for a, b in zip(a_parts, b_parts):
if b == "*" or a == "*":
continue
if a != b:
return False
return True
def _analyze_nats(repo_root: str, deadline: float) -> Tuple[List[Finding], Dict, bool]:
"""Returns (findings, stats, skipped)."""
inventory = _load_nats_inventory(repo_root)
if inventory is None:
return [], {"skipped": True}, True
code_subjects = _load_nats_code_subjects(repo_root, deadline)
findings = []
# DRIFT-NATS-001: Used in code but not in inventory
for subj in sorted(code_subjects):
if not _nats_subject_matches(subj, inventory):
findings.append(Finding(
category="nats",
severity="warning",
id="DRIFT-NATS-001",
title=f"NATS subject '{subj}' used in code but not in inventory",
evidence={"path": "docs/architecture_inventory/inventory_nats_topics.csv",
"details": f"subject '{subj}' not found (wildcard-aware match)"},
recommended_fix=f"Add '{subj}' to inventory_nats_topics.csv.",
))
# DRIFT-NATS-002: In inventory but not used in code (info — may be legacy)
for inv_subj in inventory:
if inv_subj.endswith(".*") or inv_subj.endswith(".>"):
continue # wildcard subscriptions — skip
if not _nats_subject_matches(inv_subj, list(code_subjects)):
findings.append(Finding(
category="nats",
severity="info",
id="DRIFT-NATS-002",
title=f"Documented NATS subject '{inv_subj}' not found in code (possibly legacy)",
evidence={"path": "docs/architecture_inventory/inventory_nats_topics.csv",
"details": "no matching publish/subscribe call found"},
recommended_fix="Verify if subject is still active; mark as deprecated in inventory if not.",
))
stats = {
"inventory_subjects": len(inventory),
"code_subjects": len(code_subjects),
"findings": len(findings),
}
return findings, stats, False
# ─── Category 4: Tools ────────────────────────────────────────────────────────
def _load_rollout_tools(repo_root: str) -> Set[str]:
"""Extract all tool names mentioned in tools_rollout.yml groups."""
rollout_path = os.path.join(repo_root, "config", "tools_rollout.yml")
tools: Set[str] = set()
try:
with open(rollout_path, "r") as f:
data = yaml.safe_load(f) or {}
except Exception:
return tools
# Collect all values from group lists (non-@group entries are tool names)
def _collect(obj):
if isinstance(obj, list):
for item in obj:
if isinstance(item, str) and not item.startswith("@"):
tools.add(item)
elif isinstance(item, str) and item.startswith("@"):
group_name = item[1:]
if group_name in data:
_collect(data[group_name])
elif isinstance(obj, dict):
for v in obj.values():
_collect(v)
for key, value in data.items():
if key not in ("role_map", "agent_roles"): # these are role configs, not tool lists
_collect(value)
# Also scan role_map tool lists
role_map = data.get("role_map", {})
for role_cfg in role_map.values():
_collect(role_cfg.get("tools", []))
return tools
def _load_rbac_tools(repo_root: str) -> Dict[str, Set[str]]:
"""Load tool→{actions} from rbac_tools_matrix.yml."""
matrix_path = os.path.join(repo_root, "config", "rbac_tools_matrix.yml")
result: Dict[str, Set[str]] = {}
try:
with open(matrix_path, "r") as f:
data = yaml.safe_load(f) or {}
for tool, cfg in (data.get("tools") or {}).items():
actions = set((cfg.get("actions") or {}).keys())
result[tool] = actions
except Exception:
pass
return result
def _get_effective_tools_for_roles(repo_root: str) -> Dict[str, Set[str]]:
"""Get effective tools for agent_default and agent_cto roles."""
result = {}
try:
import sys
router_path = os.path.join(repo_root, "services", "router")
if router_path not in sys.path:
sys.path.insert(0, router_path)
if repo_root not in sys.path:
sys.path.insert(0, repo_root)
from agent_tools_config import get_agent_tools, reload_rollout_config
reload_rollout_config()
# Use representative agents per role
result["agent_default"] = set(get_agent_tools("brand_new_agent_xyz_test"))
result["agent_cto"] = set(get_agent_tools("sofiia"))
except Exception as e:
logger.warning(f"Could not load effective tools: {e}")
return result
def _analyze_tools(repo_root: str) -> Tuple[List[Finding], Dict]:
findings = []
rollout_tools = _load_rollout_tools(repo_root)
rbac_tools = _load_rbac_tools(repo_root)
role_tools = _get_effective_tools_for_roles(repo_root)
all_role_tools: Set[str] = set()
for tools in role_tools.values():
all_role_tools.update(tools)
# DRIFT-TOOLS-001: Tool in rollout but no handler in tool_manager.py
for tool in sorted(rollout_tools):
if tool not in KNOWN_TOOL_HANDLERS:
findings.append(Finding(
category="tools",
severity="error",
id="DRIFT-TOOLS-001",
title=f"Tool '{tool}' in tools_rollout.yml but no handler in tool_manager.py",
evidence={"path": "config/tools_rollout.yml",
"details": f"'{tool}' referenced in rollout groups but missing from KNOWN_TOOL_HANDLERS"},
recommended_fix=f"Add handler for '{tool}' in tool_manager.py execute_tool dispatch, or remove from rollout.",
))
# DRIFT-TOOLS-002: Handler exists but not in RBAC matrix
# Severity = error if tool is in rollout/standard_stack (actively used, no RBAC gate)
# Severity = warning if tool appears experimental / not yet rolled out
for tool in sorted(KNOWN_TOOL_HANDLERS):
if tool not in rbac_tools:
# Escalate to error if tool is actively distributed to agents
is_rollouted = tool in rollout_tools or tool in all_role_tools
severity = "error" if is_rollouted else "warning"
findings.append(Finding(
category="tools",
severity=severity,
id="DRIFT-TOOLS-002",
title=f"Tool '{tool}' has a handler but is absent from rbac_tools_matrix.yml",
evidence={"path": "config/rbac_tools_matrix.yml",
"details": (
f"'{tool}' not found in matrix.tools section. "
+ ("In rollout → no RBAC gate applied." if is_rollouted
else "Not in rollout (experimental/legacy).")
)},
recommended_fix=f"Add '{tool}' with actions and entitlements to rbac_tools_matrix.yml.",
))
# DRIFT-TOOLS-003: Tool in RBAC matrix but never appears in effective_tools
if all_role_tools:
for tool in sorted(rbac_tools.keys()):
if tool not in all_role_tools:
findings.append(Finding(
category="tools",
severity="warning",
id="DRIFT-TOOLS-003",
title=f"Tool '{tool}' is in RBAC matrix but never appears in effective_tools (dead config?)",
evidence={"path": "config/rbac_tools_matrix.yml",
"details": f"'{tool}' in matrix but not in any role's effective tool list"},
recommended_fix=f"Add '{tool}' to a role in tools_rollout.yml or remove from matrix.",
))
stats = {
"rollout_tools": len(rollout_tools),
"rbac_tools": len(rbac_tools),
"handlers": len(KNOWN_TOOL_HANDLERS),
"role_tools": {role: len(tools) for role, tools in role_tools.items()},
"findings": len(findings),
}
return findings, stats
# ─── Main Analyzer ────────────────────────────────────────────────────────────
def analyze_drift(
repo_root: str,
categories: Optional[List[str]] = None,
timeout_sec: float = TIMEOUT_SEC,
) -> DriftReport:
"""
Run drift analysis across requested categories.
Args:
repo_root: absolute path to repository root
categories: subset of ["services", "openapi", "nats", "tools"] (all if None)
timeout_sec: hard deadline for full analysis
Returns:
DriftReport with pass/fail verdict
"""
all_categories = {"services", "openapi", "nats", "tools"}
if categories:
run_cats = {c for c in categories if c in all_categories}
else:
run_cats = all_categories
deadline = time.monotonic() + timeout_sec
all_findings: List[Finding] = []
skipped: List[str] = []
items_checked: Dict[str, int] = {}
cat_stats: Dict[str, Any] = {}
if "services" in run_cats:
findings, stats = _analyze_services(repo_root, deadline)
all_findings.extend(findings)
cat_stats["services"] = stats
items_checked["services"] = stats.get("catalog_entries", 0) + stats.get("compose_services", 0)
if "openapi" in run_cats:
findings, stats = _analyze_openapi(repo_root, deadline)
all_findings.extend(findings)
cat_stats["openapi"] = stats
items_checked["openapi"] = stats.get("spec_paths", 0) + stats.get("code_routes", 0)
if "nats" in run_cats:
findings, stats, was_skipped = _analyze_nats(repo_root, deadline)
if was_skipped:
skipped.append("nats")
else:
all_findings.extend(findings)
cat_stats["nats"] = stats
items_checked["nats"] = stats.get("inventory_subjects", 0) + stats.get("code_subjects", 0)
if "tools" in run_cats:
findings, stats = _analyze_tools(repo_root)
all_findings.extend(findings)
cat_stats["tools"] = stats
items_checked["tools"] = stats.get("rollout_tools", 0) + stats.get("rbac_tools", 0)
# Sort findings: severity desc (error > warning > info), then category, then id
severity_order = {"error": 0, "warning": 1, "info": 2}
all_findings.sort(key=lambda f: (severity_order.get(f.severity, 9), f.category, f.id))
# Redact evidence
for f in all_findings:
if f.evidence.get("details"):
f.evidence["details"] = _redact_evidence(f.evidence["details"])
errors = sum(1 for f in all_findings if f.severity == "error")
warnings = sum(1 for f in all_findings if f.severity == "warning")
infos = sum(1 for f in all_findings if f.severity == "info")
pass_ = errors == 0
if pass_:
summary = f"✅ Drift analysis PASSED. {len(all_findings)} findings ({warnings} warnings, {infos} infos)."
else:
summary = (
f"❌ Drift analysis FAILED. {errors} error(s), {warnings} warning(s). "
f"Categories checked: {sorted(run_cats - {'nats'} if 'nats' in skipped else run_cats)}."
)
if skipped:
summary += f" Skipped (no inventory): {skipped}."
elapsed_ms = round((time.monotonic() - (deadline - timeout_sec)) * 1000, 1)
return DriftReport(
pass_=pass_,
summary=summary,
stats={
"errors": errors,
"warnings": warnings,
"infos": infos,
"skipped": skipped,
"items_checked": items_checked,
"elapsed_ms": elapsed_ms,
"by_category": cat_stats,
},
findings=[f.to_dict() for f in all_findings],
)
def analyze_drift_dict(repo_root: str, **kwargs) -> Dict:
"""Convenience wrapper that returns a plain dict (for ToolResult)."""
report = analyze_drift(repo_root, **kwargs)
return {
"pass": report.pass_,
"summary": report.summary,
"stats": report.stats,
"findings": report.findings,
}