New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
899 lines
35 KiB
Python
899 lines
35 KiB
Python
"""
|
||
Drift Analyzer — знаходить розбіжності між "джерелами правди" та "фактом".
|
||
|
||
4 категорії перевірок (незалежні, кожна повертає findings):
|
||
1. services — Service Catalog (inventory_services.csv / 01_SERVICE_CATALOG.md) vs docker-compose*.yml
|
||
2. openapi — OpenAPI specs (docs/contracts/*.yaml) vs routes у коді (FastAPI decorators)
|
||
3. nats — inventory_nats_topics.csv vs publish/subscribe usage у коді
|
||
4. tools — tools_rollout.yml + rbac_tools_matrix.yml vs фактичні handlers у tool_manager.py
|
||
|
||
Формат findings:
|
||
{ category, severity, id, title, evidence: {path, lines, details}, recommended_fix }
|
||
|
||
Pass rule: pass=false якщо errors > 0. Warnings/infos не валять gate.
|
||
"""
|
||
|
||
import csv
|
||
import fnmatch
|
||
import hashlib
|
||
import json
|
||
import logging
|
||
import os
|
||
import re
|
||
import time
|
||
import yaml
|
||
from dataclasses import dataclass, field
|
||
from pathlib import Path
|
||
from typing import Any, Dict, FrozenSet, List, Optional, Set, Tuple
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# ─── Constants ────────────────────────────────────────────────────────────────
|
||
|
||
EXCLUDED_DIRS: FrozenSet[str] = frozenset({
|
||
"node_modules", ".git", "dist", "build", "vendor",
|
||
".venv", "venv", "venv_models", "sofia_venv",
|
||
"__pycache__", ".pytest_cache", "rollback_backups",
|
||
"docs/consolidation",
|
||
})
|
||
|
||
MAX_FILES_PER_CATEGORY = 300
|
||
MAX_BYTES_PER_FILE = 262144 # 256KB
|
||
TIMEOUT_SEC = 25.0 # Hard deadline per full analysis
|
||
|
||
# Known tool handlers (must be kept in sync with execute_tool dispatch in tool_manager.py)
|
||
# Source: Priority 1–17 handlers in tool_manager.py
|
||
KNOWN_TOOL_HANDLERS: FrozenSet[str] = frozenset({
|
||
"memory_search", "graph_query",
|
||
"web_search", "web_extract",
|
||
"image_generate", "comfy_generate_image", "comfy_generate_video",
|
||
"remember_fact",
|
||
"presentation_create", "presentation_status", "presentation_download",
|
||
"crawl4ai_scrape", "tts_speak", "file_tool",
|
||
"market_data",
|
||
"crm_search_client", "crm_upsert_client", "crm_upsert_site",
|
||
"crm_upsert_window_unit", "crm_create_quote", "crm_update_quote",
|
||
"crm_create_job", "calc_window_quote",
|
||
"docs_render_quote_pdf", "docs_render_invoice_pdf",
|
||
"schedule_propose_slots", "schedule_confirm_slot",
|
||
"repo_tool", "pr_reviewer_tool", "contract_tool",
|
||
"oncall_tool", "observability_tool", "config_linter_tool",
|
||
"threatmodel_tool", "job_orchestrator_tool", "kb_tool",
|
||
"drift_analyzer_tool", # self-registration
|
||
})
|
||
|
||
# ─── Data Structures ──────────────────────────────────────────────────────────
|
||
|
||
@dataclass
|
||
class Finding:
|
||
category: str
|
||
severity: str # "error" | "warning" | "info"
|
||
id: str
|
||
title: str
|
||
evidence: Dict[str, str] = field(default_factory=dict)
|
||
recommended_fix: str = ""
|
||
|
||
def to_dict(self) -> Dict:
|
||
return {
|
||
"category": self.category,
|
||
"severity": self.severity,
|
||
"id": self.id,
|
||
"title": self.title,
|
||
"evidence": self.evidence,
|
||
"recommended_fix": self.recommended_fix,
|
||
}
|
||
|
||
|
||
@dataclass
|
||
class DriftReport:
|
||
pass_: bool
|
||
summary: str
|
||
stats: Dict[str, Any]
|
||
findings: List[Dict]
|
||
|
||
|
||
# ─── Utility helpers ──────────────────────────────────────────────────────────
|
||
|
||
def _is_excluded(path: str) -> bool:
|
||
"""Check if any part of the path is in the excluded dirs set."""
|
||
parts = Path(path).parts
|
||
return any(p in EXCLUDED_DIRS for p in parts)
|
||
|
||
|
||
def _walk_files(root: str, extensions: Tuple[str, ...],
|
||
deadline: float) -> List[str]:
|
||
"""
|
||
Walk repo root and collect files with given extensions.
|
||
Respects EXCLUDED_DIRS, MAX_FILES_PER_CATEGORY, TIMEOUT_SEC.
|
||
"""
|
||
found = []
|
||
for dirpath, dirnames, filenames in os.walk(root):
|
||
# Prune excluded dirs in-place (affects os.walk recursion)
|
||
dirnames[:] = [
|
||
d for d in dirnames
|
||
if d not in EXCLUDED_DIRS and not d.startswith(".")
|
||
]
|
||
if time.monotonic() > deadline:
|
||
logger.warning("_walk_files: timeout reached")
|
||
break
|
||
for fname in filenames:
|
||
if fname.endswith(extensions):
|
||
full = os.path.join(dirpath, fname)
|
||
if not _is_excluded(full):
|
||
found.append(full)
|
||
if len(found) >= MAX_FILES_PER_CATEGORY:
|
||
return found
|
||
return found
|
||
|
||
|
||
def _read_file(path: str) -> str:
|
||
"""Read file with size limit. Returns empty string on error."""
|
||
try:
|
||
size = os.path.getsize(path)
|
||
if size > MAX_BYTES_PER_FILE:
|
||
with open(path, "r", errors="replace") as f:
|
||
return f.read(MAX_BYTES_PER_FILE)
|
||
with open(path, "r", errors="replace") as f:
|
||
return f.read()
|
||
except Exception:
|
||
return ""
|
||
|
||
|
||
_SECRET_PAT = re.compile(
|
||
r'(?i)(api[_-]?key|token|secret|password|bearer|jwt|private[_-]?key)'
|
||
r'[\s=:]+[\'"`]?([a-zA-Z0-9_\-\.]{8,})[\'"`]?'
|
||
)
|
||
|
||
|
||
def _redact_evidence(text: str) -> str:
|
||
"""Mask potential secrets in evidence strings."""
|
||
return _SECRET_PAT.sub(lambda m: f"{m.group(1)}=***REDACTED***", text)
|
||
|
||
|
||
def _rel(path: str, root: str) -> str:
|
||
"""Return path relative to root, or absolute if outside."""
|
||
try:
|
||
return os.path.relpath(path, root)
|
||
except ValueError:
|
||
return path
|
||
|
||
|
||
# ─── Category 1: Services ─────────────────────────────────────────────────────
|
||
|
||
def _load_service_catalog(repo_root: str) -> Dict[str, str]:
|
||
"""
|
||
Load services from inventory_services.csv.
|
||
Returns {service_name: status}.
|
||
"""
|
||
csv_path = os.path.join(
|
||
repo_root, "docs", "architecture_inventory", "inventory_services.csv"
|
||
)
|
||
services = {}
|
||
if not os.path.exists(csv_path):
|
||
# Fallback: scan 01_SERVICE_CATALOG.md for table rows
|
||
md_path = os.path.join(
|
||
repo_root, "docs", "architecture_inventory", "01_SERVICE_CATALOG.md"
|
||
)
|
||
if os.path.exists(md_path):
|
||
content = _read_file(md_path)
|
||
for line in content.splitlines():
|
||
m = re.match(r'\|\s*([\w\-]+)\s*\|\s*(DEPLOYED|DEFINED|PLANNED[^\|]*)', line)
|
||
if m:
|
||
services[m.group(1).strip()] = m.group(2).strip()
|
||
return services
|
||
|
||
try:
|
||
with open(csv_path, "r", newline="", errors="replace") as f:
|
||
reader = csv.DictReader(f)
|
||
for row in reader:
|
||
name = (row.get("service") or "").strip()
|
||
status = (row.get("type") or "").strip() # csv has 'type' not 'status'
|
||
if name:
|
||
services[name] = status
|
||
except Exception as e:
|
||
logger.warning(f"Could not load inventory_services.csv: {e}")
|
||
return services
|
||
|
||
|
||
def _load_compose_services(repo_root: str, deadline: float) -> Dict[str, str]:
|
||
"""
|
||
Parse docker-compose*.yml files and return {service_name: compose_file}.
|
||
"""
|
||
compose_files = []
|
||
for entry in os.listdir(repo_root):
|
||
if fnmatch.fnmatch(entry, "docker-compose*.yml"):
|
||
compose_files.append(os.path.join(repo_root, entry))
|
||
|
||
# Also infra subdir
|
||
infra_compose = os.path.join(repo_root, "infra", "compose", "docker-compose.yml")
|
||
if os.path.exists(infra_compose):
|
||
compose_files.append(infra_compose)
|
||
|
||
services = {}
|
||
for cf in compose_files:
|
||
if time.monotonic() > deadline:
|
||
break
|
||
try:
|
||
content = _read_file(cf)
|
||
data = yaml.safe_load(content) or {}
|
||
svc_section = data.get("services") or {}
|
||
for svc_name in svc_section:
|
||
services[svc_name] = _rel(cf, repo_root)
|
||
except Exception as e:
|
||
logger.debug(f"Could not parse {cf}: {e}")
|
||
return services
|
||
|
||
|
||
def _analyze_services(repo_root: str, deadline: float) -> Tuple[List[Finding], Dict]:
|
||
findings = []
|
||
catalog = _load_service_catalog(repo_root)
|
||
compose_svcs = _load_compose_services(repo_root, deadline)
|
||
|
||
compose_names = set(compose_svcs.keys())
|
||
catalog_names = set(catalog.keys())
|
||
|
||
# DEPLOYED in catalog but missing from ALL compose files
|
||
for svc, status in catalog.items():
|
||
if "DEPLOYED" in status.upper() and svc not in compose_names:
|
||
# Normalize: some catalog names use dashes vs underscores differently
|
||
normalized = svc.replace("-", "_")
|
||
variants = {svc, normalized, svc.replace("_", "-")}
|
||
if not variants.intersection(compose_names):
|
||
findings.append(Finding(
|
||
category="services",
|
||
severity="error",
|
||
id="DRIFT-SVC-001",
|
||
title=f"Service '{svc}' marked DEPLOYED in catalog but absent from all docker-compose files",
|
||
evidence={"path": "docs/architecture_inventory/inventory_services.csv",
|
||
"details": f"status={status}, not found in compose"},
|
||
recommended_fix=f"Add '{svc}' to appropriate docker-compose*.yml or update catalog status to DEFINED.",
|
||
))
|
||
|
||
# In compose but not mentioned in catalog at all
|
||
for svc, compose_file in compose_svcs.items():
|
||
if svc not in catalog_names:
|
||
normalized = svc.replace("-", "_").replace("_", "-")
|
||
if svc not in catalog_names and normalized not in catalog_names:
|
||
findings.append(Finding(
|
||
category="services",
|
||
severity="warning",
|
||
id="DRIFT-SVC-002",
|
||
title=f"Service '{svc}' found in compose but not in service catalog",
|
||
evidence={"path": compose_file, "details": f"defined in {compose_file}"},
|
||
recommended_fix=f"Add '{svc}' to inventory_services.csv / 01_SERVICE_CATALOG.md.",
|
||
))
|
||
|
||
stats = {
|
||
"catalog_entries": len(catalog),
|
||
"compose_services": len(compose_svcs),
|
||
"findings": len(findings),
|
||
}
|
||
return findings, stats
|
||
|
||
|
||
# ─── Category 2: OpenAPI ──────────────────────────────────────────────────────
|
||
|
||
def _load_openapi_paths(repo_root: str, deadline: float) -> Dict[str, Set[str]]:
|
||
"""
|
||
Scan docs/contracts/*.openapi.yaml and any openapi*.yaml/yml/json.
|
||
Returns {"/path": {"get", "post", ...}}.
|
||
"""
|
||
spec_files = []
|
||
contracts_dir = os.path.join(repo_root, "docs", "contracts")
|
||
if os.path.isdir(contracts_dir):
|
||
for f in os.listdir(contracts_dir):
|
||
if f.endswith((".yaml", ".yml", ".json")):
|
||
spec_files.append(os.path.join(contracts_dir, f))
|
||
|
||
# Also find any openapi*.yaml in repo root and services
|
||
for dirpath, dirnames, filenames in os.walk(repo_root):
|
||
dirnames[:] = [d for d in dirnames if d not in EXCLUDED_DIRS and not d.startswith(".")]
|
||
if time.monotonic() > deadline:
|
||
break
|
||
for f in filenames:
|
||
if re.match(r'openapi.*\.(ya?ml|json)$', f, re.IGNORECASE):
|
||
full = os.path.join(dirpath, f)
|
||
if full not in spec_files:
|
||
spec_files.append(full)
|
||
|
||
paths: Dict[str, Set[str]] = {}
|
||
for sf in spec_files:
|
||
if time.monotonic() > deadline:
|
||
break
|
||
try:
|
||
content = _read_file(sf)
|
||
data = yaml.safe_load(content) if sf.endswith((".yaml", ".yml")) else json.loads(content)
|
||
if not isinstance(data, dict) or "paths" not in data:
|
||
continue
|
||
for path, methods in (data.get("paths") or {}).items():
|
||
if not isinstance(methods, dict):
|
||
continue
|
||
methods_set = {
|
||
m.lower() for m in methods
|
||
if m.lower() in {"get", "post", "put", "patch", "delete", "head", "options"}
|
||
}
|
||
if path not in paths:
|
||
paths[path] = set()
|
||
paths[path].update(methods_set)
|
||
except Exception as e:
|
||
logger.debug(f"Could not parse OpenAPI spec {sf}: {e}")
|
||
|
||
return paths
|
||
|
||
|
||
_FASTAPI_ROUTE_PAT = re.compile(
|
||
r'@(?:app|router)\.(get|post|put|patch|delete|head|options)\(\s*[\'"]([^\'"]+)[\'"]',
|
||
re.MULTILINE,
|
||
)
|
||
_ADD_API_ROUTE_PAT = re.compile(
|
||
r'\.add_api_route\(\s*[\'"]([^\'"]+)[\'"].*?methods\s*=\s*\[([^\]]+)\]',
|
||
re.MULTILINE | re.DOTALL,
|
||
)
|
||
|
||
|
||
def _load_code_routes(repo_root: str, deadline: float) -> Dict[str, Set[str]]:
|
||
"""
|
||
Scan Python files for FastAPI route decorators.
|
||
Returns {"/path": {"get", "post", ...}}.
|
||
"""
|
||
py_files = _walk_files(repo_root, (".py",), deadline)
|
||
routes: Dict[str, Set[str]] = {}
|
||
|
||
for pf in py_files:
|
||
if time.monotonic() > deadline:
|
||
break
|
||
if ".venv" in pf or "venv" in pf or "node_modules" in pf:
|
||
continue
|
||
content = _read_file(pf)
|
||
if not content:
|
||
continue
|
||
|
||
for method, path in _FASTAPI_ROUTE_PAT.findall(content):
|
||
norm = path.rstrip("/") or "/"
|
||
if norm not in routes:
|
||
routes[norm] = set()
|
||
routes[norm].add(method.lower())
|
||
|
||
for path, methods_raw in _ADD_API_ROUTE_PAT.findall(content):
|
||
methods = {m.strip().strip('"\'').lower() for m in methods_raw.split(",")}
|
||
norm = path.rstrip("/") or "/"
|
||
if norm not in routes:
|
||
routes[norm] = set()
|
||
routes[norm].update(methods)
|
||
|
||
return routes
|
||
|
||
|
||
def _normalize_path(path: str) -> str:
|
||
"""Normalize OAS path for comparison: remove trailing slash, lowercase."""
|
||
return path.rstrip("/").lower() or "/"
|
||
|
||
|
||
# Paths that are infrastructure-level and expected to be missing from OAS specs.
|
||
# Add /internal/* and /debug/* patterns if your project uses them.
|
||
_OAS_IGNORE_PATH_PREFIXES: Tuple[str, ...] = (
|
||
"/healthz", "/readyz", "/livez", "/metrics",
|
||
"/internal/", "/debug/", "/__", "/favicon",
|
||
)
|
||
|
||
|
||
def _is_oas_ignored(path: str) -> bool:
|
||
"""Return True if path is on the OAS ignore allowlist."""
|
||
p = path.lower()
|
||
return any(p == prefix.rstrip("/") or p.startswith(prefix)
|
||
for prefix in _OAS_IGNORE_PATH_PREFIXES)
|
||
|
||
|
||
def _load_openapi_deprecated(repo_root: str) -> Set[str]:
|
||
"""
|
||
Return normalized paths marked as 'deprecated: true' in any OAS spec.
|
||
Deprecated endpoints downgrade from error to warning (DRIFT-OAS-001).
|
||
"""
|
||
deprecated: Set[str] = set()
|
||
spec_files: List[str] = []
|
||
for dirpath, dirnames, filenames in os.walk(repo_root):
|
||
dirnames[:] = [d for d in dirnames if d not in EXCLUDED_DIRS and not d.startswith(".")]
|
||
for f in filenames:
|
||
if re.match(r'openapi.*\.(ya?ml|json)$', f, re.IGNORECASE):
|
||
spec_files.append(os.path.join(dirpath, f))
|
||
|
||
for sf in spec_files:
|
||
try:
|
||
content = _read_file(sf)
|
||
data = yaml.safe_load(content) if sf.endswith((".yaml", ".yml")) else json.loads(content)
|
||
if not isinstance(data, dict) or "paths" not in data:
|
||
continue
|
||
for path, methods in (data.get("paths") or {}).items():
|
||
if not isinstance(methods, dict):
|
||
continue
|
||
for method, operation in methods.items():
|
||
if isinstance(operation, dict) and operation.get("deprecated", False):
|
||
deprecated.add(_normalize_path(path))
|
||
except Exception:
|
||
pass
|
||
return deprecated
|
||
|
||
|
||
def _analyze_openapi(repo_root: str, deadline: float) -> Tuple[List[Finding], Dict]:
|
||
findings = []
|
||
spec_paths = _load_openapi_paths(repo_root, deadline)
|
||
code_routes = _load_code_routes(repo_root, deadline)
|
||
|
||
if not spec_paths:
|
||
return findings, {"spec_paths": 0, "code_routes": len(code_routes), "findings": 0}
|
||
|
||
deprecated_paths = _load_openapi_deprecated(repo_root)
|
||
|
||
spec_norm: Dict[str, Set[str]] = {
|
||
_normalize_path(p): methods for p, methods in spec_paths.items()
|
||
}
|
||
code_norm: Dict[str, Set[str]] = {
|
||
_normalize_path(p): methods for p, methods in code_routes.items()
|
||
}
|
||
|
||
# DRIFT-OAS-001: In spec but not in code
|
||
for path, methods in sorted(spec_norm.items()):
|
||
# Skip infra/health endpoints — they are expected to be absent from OAS
|
||
if _is_oas_ignored(path):
|
||
continue
|
||
if path not in code_norm:
|
||
# Deprecated spec paths → warning only, not blocking
|
||
severity = "warning" if path in deprecated_paths else "error"
|
||
dep_note = " (deprecated in spec)" if path in deprecated_paths else ""
|
||
findings.append(Finding(
|
||
category="openapi",
|
||
severity=severity,
|
||
id="DRIFT-OAS-001",
|
||
title=f"OpenAPI path '{path}'{dep_note} not found in codebase routes",
|
||
evidence={"path": "docs/contracts/",
|
||
"details": f"methods={sorted(methods)}, missing from FastAPI decorators"},
|
||
recommended_fix=(
|
||
f"Mark '{path}' as removed in OpenAPI or implement the route."
|
||
if path in deprecated_paths
|
||
else f"Implement '{path}' route in code or remove from OpenAPI spec."
|
||
),
|
||
))
|
||
else:
|
||
# DRIFT-OAS-003: Method mismatch
|
||
code_methods = code_norm[path]
|
||
missing_in_code = methods - code_methods
|
||
if missing_in_code:
|
||
findings.append(Finding(
|
||
category="openapi",
|
||
severity="warning",
|
||
id="DRIFT-OAS-003",
|
||
title=f"Method mismatch for path '{path}': spec has {sorted(missing_in_code)}, code missing",
|
||
evidence={"path": "docs/contracts/",
|
||
"details": f"spec={sorted(methods)}, code={sorted(code_methods)}"},
|
||
recommended_fix=f"Add missing HTTP methods to code route for '{path}'.",
|
||
))
|
||
|
||
# DRIFT-OAS-002: In code (/v1/ paths) but not in spec
|
||
for path, methods in sorted(code_norm.items()):
|
||
# Health/internal endpoints are expected to be absent from OAS
|
||
if _is_oas_ignored(path):
|
||
continue
|
||
if not path.startswith("/v1/"):
|
||
continue
|
||
if path not in spec_norm:
|
||
findings.append(Finding(
|
||
category="openapi",
|
||
severity="error",
|
||
id="DRIFT-OAS-002",
|
||
title=f"Code route '{path}' not documented in any OpenAPI spec",
|
||
evidence={"path": "services/", "details": f"methods={sorted(methods)}"},
|
||
recommended_fix=f"Add '{path}' to OpenAPI spec in docs/contracts/.",
|
||
))
|
||
|
||
stats = {
|
||
"spec_paths": len(spec_paths),
|
||
"code_routes": len(code_routes),
|
||
"findings": len(findings),
|
||
}
|
||
return findings, stats
|
||
|
||
|
||
# ─── Category 3: NATS ─────────────────────────────────────────────────────────
|
||
|
||
_NATS_WILDCARD_PAT = re.compile(r'\{[^}]+\}|\*|>') # {agent_id}, *, >
|
||
|
||
def _normalize_nats_subject(subj: str) -> str:
|
||
"""Replace wildcards with * for matching. Lowercase."""
|
||
return _NATS_WILDCARD_PAT.sub("*", subj.strip()).lower()
|
||
|
||
|
||
def _load_nats_inventory(repo_root: str) -> Optional[List[str]]:
|
||
"""
|
||
Load documented NATS subjects from inventory_nats_topics.csv.
|
||
Returns list of normalized subjects, or None if file absent.
|
||
"""
|
||
csv_path = os.path.join(
|
||
repo_root, "docs", "architecture_inventory", "inventory_nats_topics.csv"
|
||
)
|
||
if not os.path.exists(csv_path):
|
||
return None
|
||
|
||
subjects = []
|
||
try:
|
||
with open(csv_path, "r", newline="", errors="replace") as f:
|
||
reader = csv.DictReader(f)
|
||
for row in reader:
|
||
subj = (row.get("subject") or "").strip()
|
||
if subj:
|
||
subjects.append(_normalize_nats_subject(subj))
|
||
except Exception as e:
|
||
logger.warning(f"Could not load nats inventory: {e}")
|
||
return None
|
||
return subjects
|
||
|
||
|
||
_NATS_USAGE_PATTERNS = [
|
||
re.compile(r'(?:nc|nats|js|jetstream)\.publish\([\'"]([a-zA-Z0-9._{}*>-]+)[\'"]', re.IGNORECASE),
|
||
re.compile(r'(?:nc|nats|js|jetstream)\.subscribe\([\'"]([a-zA-Z0-9._{}*>-]+)[\'"]', re.IGNORECASE),
|
||
re.compile(r'nc\.subscribe\([\'"]([a-zA-Z0-9._{}*>-]+)[\'"]', re.IGNORECASE),
|
||
re.compile(r'subject\s*=\s*[\'"]([a-zA-Z0-9._{}*>-]{4,})[\'"]', re.IGNORECASE),
|
||
re.compile(r'SUBJECT\s*=\s*[\'"]([a-zA-Z0-9._{}*>-]{4,})[\'"]'),
|
||
re.compile(r'[\'"]([a-z][a-z0-9_]+\.[a-z][a-z0-9_]+(?:\.[a-zA-Z0-9_{}_.*>-]+){0,4})[\'"]'),
|
||
]
|
||
|
||
_NATS_SUBJECT_VALIDATE = re.compile(r'^[a-zA-Z][a-zA-Z0-9._{}*>-]{2,}$')
|
||
|
||
|
||
def _load_nats_code_subjects(repo_root: str, deadline: float) -> Set[str]:
|
||
"""Extract NATS subjects from code via regex patterns."""
|
||
py_files = _walk_files(repo_root, (".py",), deadline)
|
||
found: Set[str] = set()
|
||
|
||
for pf in py_files:
|
||
if time.monotonic() > deadline:
|
||
break
|
||
if "venv" in pf or "node_modules" in pf:
|
||
continue
|
||
content = _read_file(pf)
|
||
if not content:
|
||
continue
|
||
# Quick pre-filter: must contain at least one NATS-like call pattern
|
||
_NATS_CALL_HINTS = ("nc.", "nats.", "js.", "jetstream.", "subject=", "SUBJECT=", ".publish(", ".subscribe(")
|
||
if not any(hint in content for hint in _NATS_CALL_HINTS):
|
||
continue
|
||
|
||
for pat in _NATS_USAGE_PATTERNS:
|
||
for m in pat.finditer(content):
|
||
subj = m.group(1).strip()
|
||
# Basic subject validation (must contain a dot)
|
||
if "." in subj and _NATS_SUBJECT_VALIDATE.match(subj):
|
||
found.add(_normalize_nats_subject(subj))
|
||
|
||
return found
|
||
|
||
|
||
def _nats_subject_matches(code_subj: str, inventory_subjects: List[str]) -> bool:
|
||
"""
|
||
Check if a code subject matches any inventory subject (wildcard-aware).
|
||
Supports * (one segment) and > (one or more segments).
|
||
"""
|
||
code_parts = code_subj.split(".")
|
||
for inv in inventory_subjects:
|
||
inv_parts = inv.split(".")
|
||
if _nats_match(code_parts, inv_parts) or _nats_match(inv_parts, code_parts):
|
||
return True
|
||
return False
|
||
|
||
|
||
def _nats_match(a_parts: List[str], b_parts: List[str]) -> bool:
|
||
"""Match NATS subject a against pattern b (with * and > wildcards)."""
|
||
if not b_parts:
|
||
return not a_parts
|
||
if b_parts[-1] == ">":
|
||
return len(a_parts) >= len(b_parts) - 1
|
||
if len(a_parts) != len(b_parts):
|
||
return False
|
||
for a, b in zip(a_parts, b_parts):
|
||
if b == "*" or a == "*":
|
||
continue
|
||
if a != b:
|
||
return False
|
||
return True
|
||
|
||
|
||
def _analyze_nats(repo_root: str, deadline: float) -> Tuple[List[Finding], Dict, bool]:
|
||
"""Returns (findings, stats, skipped)."""
|
||
inventory = _load_nats_inventory(repo_root)
|
||
if inventory is None:
|
||
return [], {"skipped": True}, True
|
||
|
||
code_subjects = _load_nats_code_subjects(repo_root, deadline)
|
||
findings = []
|
||
|
||
# DRIFT-NATS-001: Used in code but not in inventory
|
||
for subj in sorted(code_subjects):
|
||
if not _nats_subject_matches(subj, inventory):
|
||
findings.append(Finding(
|
||
category="nats",
|
||
severity="warning",
|
||
id="DRIFT-NATS-001",
|
||
title=f"NATS subject '{subj}' used in code but not in inventory",
|
||
evidence={"path": "docs/architecture_inventory/inventory_nats_topics.csv",
|
||
"details": f"subject '{subj}' not found (wildcard-aware match)"},
|
||
recommended_fix=f"Add '{subj}' to inventory_nats_topics.csv.",
|
||
))
|
||
|
||
# DRIFT-NATS-002: In inventory but not used in code (info — may be legacy)
|
||
for inv_subj in inventory:
|
||
if inv_subj.endswith(".*") or inv_subj.endswith(".>"):
|
||
continue # wildcard subscriptions — skip
|
||
if not _nats_subject_matches(inv_subj, list(code_subjects)):
|
||
findings.append(Finding(
|
||
category="nats",
|
||
severity="info",
|
||
id="DRIFT-NATS-002",
|
||
title=f"Documented NATS subject '{inv_subj}' not found in code (possibly legacy)",
|
||
evidence={"path": "docs/architecture_inventory/inventory_nats_topics.csv",
|
||
"details": "no matching publish/subscribe call found"},
|
||
recommended_fix="Verify if subject is still active; mark as deprecated in inventory if not.",
|
||
))
|
||
|
||
stats = {
|
||
"inventory_subjects": len(inventory),
|
||
"code_subjects": len(code_subjects),
|
||
"findings": len(findings),
|
||
}
|
||
return findings, stats, False
|
||
|
||
|
||
# ─── Category 4: Tools ────────────────────────────────────────────────────────
|
||
|
||
def _load_rollout_tools(repo_root: str) -> Set[str]:
|
||
"""Extract all tool names mentioned in tools_rollout.yml groups."""
|
||
rollout_path = os.path.join(repo_root, "config", "tools_rollout.yml")
|
||
tools: Set[str] = set()
|
||
try:
|
||
with open(rollout_path, "r") as f:
|
||
data = yaml.safe_load(f) or {}
|
||
except Exception:
|
||
return tools
|
||
|
||
# Collect all values from group lists (non-@group entries are tool names)
|
||
def _collect(obj):
|
||
if isinstance(obj, list):
|
||
for item in obj:
|
||
if isinstance(item, str) and not item.startswith("@"):
|
||
tools.add(item)
|
||
elif isinstance(item, str) and item.startswith("@"):
|
||
group_name = item[1:]
|
||
if group_name in data:
|
||
_collect(data[group_name])
|
||
elif isinstance(obj, dict):
|
||
for v in obj.values():
|
||
_collect(v)
|
||
|
||
for key, value in data.items():
|
||
if key not in ("role_map", "agent_roles"): # these are role configs, not tool lists
|
||
_collect(value)
|
||
|
||
# Also scan role_map tool lists
|
||
role_map = data.get("role_map", {})
|
||
for role_cfg in role_map.values():
|
||
_collect(role_cfg.get("tools", []))
|
||
|
||
return tools
|
||
|
||
|
||
def _load_rbac_tools(repo_root: str) -> Dict[str, Set[str]]:
|
||
"""Load tool→{actions} from rbac_tools_matrix.yml."""
|
||
matrix_path = os.path.join(repo_root, "config", "rbac_tools_matrix.yml")
|
||
result: Dict[str, Set[str]] = {}
|
||
try:
|
||
with open(matrix_path, "r") as f:
|
||
data = yaml.safe_load(f) or {}
|
||
for tool, cfg in (data.get("tools") or {}).items():
|
||
actions = set((cfg.get("actions") or {}).keys())
|
||
result[tool] = actions
|
||
except Exception:
|
||
pass
|
||
return result
|
||
|
||
|
||
def _get_effective_tools_for_roles(repo_root: str) -> Dict[str, Set[str]]:
|
||
"""Get effective tools for agent_default and agent_cto roles."""
|
||
result = {}
|
||
try:
|
||
import sys
|
||
router_path = os.path.join(repo_root, "services", "router")
|
||
if router_path not in sys.path:
|
||
sys.path.insert(0, router_path)
|
||
if repo_root not in sys.path:
|
||
sys.path.insert(0, repo_root)
|
||
|
||
from agent_tools_config import get_agent_tools, reload_rollout_config
|
||
reload_rollout_config()
|
||
|
||
# Use representative agents per role
|
||
result["agent_default"] = set(get_agent_tools("brand_new_agent_xyz_test"))
|
||
result["agent_cto"] = set(get_agent_tools("sofiia"))
|
||
except Exception as e:
|
||
logger.warning(f"Could not load effective tools: {e}")
|
||
return result
|
||
|
||
|
||
def _analyze_tools(repo_root: str) -> Tuple[List[Finding], Dict]:
|
||
findings = []
|
||
|
||
rollout_tools = _load_rollout_tools(repo_root)
|
||
rbac_tools = _load_rbac_tools(repo_root)
|
||
role_tools = _get_effective_tools_for_roles(repo_root)
|
||
|
||
all_role_tools: Set[str] = set()
|
||
for tools in role_tools.values():
|
||
all_role_tools.update(tools)
|
||
|
||
# DRIFT-TOOLS-001: Tool in rollout but no handler in tool_manager.py
|
||
for tool in sorted(rollout_tools):
|
||
if tool not in KNOWN_TOOL_HANDLERS:
|
||
findings.append(Finding(
|
||
category="tools",
|
||
severity="error",
|
||
id="DRIFT-TOOLS-001",
|
||
title=f"Tool '{tool}' in tools_rollout.yml but no handler in tool_manager.py",
|
||
evidence={"path": "config/tools_rollout.yml",
|
||
"details": f"'{tool}' referenced in rollout groups but missing from KNOWN_TOOL_HANDLERS"},
|
||
recommended_fix=f"Add handler for '{tool}' in tool_manager.py execute_tool dispatch, or remove from rollout.",
|
||
))
|
||
|
||
# DRIFT-TOOLS-002: Handler exists but not in RBAC matrix
|
||
# Severity = error if tool is in rollout/standard_stack (actively used, no RBAC gate)
|
||
# Severity = warning if tool appears experimental / not yet rolled out
|
||
for tool in sorted(KNOWN_TOOL_HANDLERS):
|
||
if tool not in rbac_tools:
|
||
# Escalate to error if tool is actively distributed to agents
|
||
is_rollouted = tool in rollout_tools or tool in all_role_tools
|
||
severity = "error" if is_rollouted else "warning"
|
||
findings.append(Finding(
|
||
category="tools",
|
||
severity=severity,
|
||
id="DRIFT-TOOLS-002",
|
||
title=f"Tool '{tool}' has a handler but is absent from rbac_tools_matrix.yml",
|
||
evidence={"path": "config/rbac_tools_matrix.yml",
|
||
"details": (
|
||
f"'{tool}' not found in matrix.tools section. "
|
||
+ ("In rollout → no RBAC gate applied." if is_rollouted
|
||
else "Not in rollout (experimental/legacy).")
|
||
)},
|
||
recommended_fix=f"Add '{tool}' with actions and entitlements to rbac_tools_matrix.yml.",
|
||
))
|
||
|
||
# DRIFT-TOOLS-003: Tool in RBAC matrix but never appears in effective_tools
|
||
if all_role_tools:
|
||
for tool in sorted(rbac_tools.keys()):
|
||
if tool not in all_role_tools:
|
||
findings.append(Finding(
|
||
category="tools",
|
||
severity="warning",
|
||
id="DRIFT-TOOLS-003",
|
||
title=f"Tool '{tool}' is in RBAC matrix but never appears in effective_tools (dead config?)",
|
||
evidence={"path": "config/rbac_tools_matrix.yml",
|
||
"details": f"'{tool}' in matrix but not in any role's effective tool list"},
|
||
recommended_fix=f"Add '{tool}' to a role in tools_rollout.yml or remove from matrix.",
|
||
))
|
||
|
||
stats = {
|
||
"rollout_tools": len(rollout_tools),
|
||
"rbac_tools": len(rbac_tools),
|
||
"handlers": len(KNOWN_TOOL_HANDLERS),
|
||
"role_tools": {role: len(tools) for role, tools in role_tools.items()},
|
||
"findings": len(findings),
|
||
}
|
||
return findings, stats
|
||
|
||
|
||
# ─── Main Analyzer ────────────────────────────────────────────────────────────
|
||
|
||
def analyze_drift(
|
||
repo_root: str,
|
||
categories: Optional[List[str]] = None,
|
||
timeout_sec: float = TIMEOUT_SEC,
|
||
) -> DriftReport:
|
||
"""
|
||
Run drift analysis across requested categories.
|
||
|
||
Args:
|
||
repo_root: absolute path to repository root
|
||
categories: subset of ["services", "openapi", "nats", "tools"] (all if None)
|
||
timeout_sec: hard deadline for full analysis
|
||
|
||
Returns:
|
||
DriftReport with pass/fail verdict
|
||
"""
|
||
all_categories = {"services", "openapi", "nats", "tools"}
|
||
if categories:
|
||
run_cats = {c for c in categories if c in all_categories}
|
||
else:
|
||
run_cats = all_categories
|
||
|
||
deadline = time.monotonic() + timeout_sec
|
||
all_findings: List[Finding] = []
|
||
skipped: List[str] = []
|
||
|
||
items_checked: Dict[str, int] = {}
|
||
cat_stats: Dict[str, Any] = {}
|
||
|
||
if "services" in run_cats:
|
||
findings, stats = _analyze_services(repo_root, deadline)
|
||
all_findings.extend(findings)
|
||
cat_stats["services"] = stats
|
||
items_checked["services"] = stats.get("catalog_entries", 0) + stats.get("compose_services", 0)
|
||
|
||
if "openapi" in run_cats:
|
||
findings, stats = _analyze_openapi(repo_root, deadline)
|
||
all_findings.extend(findings)
|
||
cat_stats["openapi"] = stats
|
||
items_checked["openapi"] = stats.get("spec_paths", 0) + stats.get("code_routes", 0)
|
||
|
||
if "nats" in run_cats:
|
||
findings, stats, was_skipped = _analyze_nats(repo_root, deadline)
|
||
if was_skipped:
|
||
skipped.append("nats")
|
||
else:
|
||
all_findings.extend(findings)
|
||
cat_stats["nats"] = stats
|
||
items_checked["nats"] = stats.get("inventory_subjects", 0) + stats.get("code_subjects", 0)
|
||
|
||
if "tools" in run_cats:
|
||
findings, stats = _analyze_tools(repo_root)
|
||
all_findings.extend(findings)
|
||
cat_stats["tools"] = stats
|
||
items_checked["tools"] = stats.get("rollout_tools", 0) + stats.get("rbac_tools", 0)
|
||
|
||
# Sort findings: severity desc (error > warning > info), then category, then id
|
||
severity_order = {"error": 0, "warning": 1, "info": 2}
|
||
all_findings.sort(key=lambda f: (severity_order.get(f.severity, 9), f.category, f.id))
|
||
|
||
# Redact evidence
|
||
for f in all_findings:
|
||
if f.evidence.get("details"):
|
||
f.evidence["details"] = _redact_evidence(f.evidence["details"])
|
||
|
||
errors = sum(1 for f in all_findings if f.severity == "error")
|
||
warnings = sum(1 for f in all_findings if f.severity == "warning")
|
||
infos = sum(1 for f in all_findings if f.severity == "info")
|
||
|
||
pass_ = errors == 0
|
||
|
||
if pass_:
|
||
summary = f"✅ Drift analysis PASSED. {len(all_findings)} findings ({warnings} warnings, {infos} infos)."
|
||
else:
|
||
summary = (
|
||
f"❌ Drift analysis FAILED. {errors} error(s), {warnings} warning(s). "
|
||
f"Categories checked: {sorted(run_cats - {'nats'} if 'nats' in skipped else run_cats)}."
|
||
)
|
||
if skipped:
|
||
summary += f" Skipped (no inventory): {skipped}."
|
||
|
||
elapsed_ms = round((time.monotonic() - (deadline - timeout_sec)) * 1000, 1)
|
||
|
||
return DriftReport(
|
||
pass_=pass_,
|
||
summary=summary,
|
||
stats={
|
||
"errors": errors,
|
||
"warnings": warnings,
|
||
"infos": infos,
|
||
"skipped": skipped,
|
||
"items_checked": items_checked,
|
||
"elapsed_ms": elapsed_ms,
|
||
"by_category": cat_stats,
|
||
},
|
||
findings=[f.to_dict() for f in all_findings],
|
||
)
|
||
|
||
|
||
def analyze_drift_dict(repo_root: str, **kwargs) -> Dict:
|
||
"""Convenience wrapper that returns a plain dict (for ToolResult)."""
|
||
report = analyze_drift(repo_root, **kwargs)
|
||
return {
|
||
"pass": report.pass_,
|
||
"summary": report.summary,
|
||
"stats": report.stats,
|
||
"findings": report.findings,
|
||
}
|