""" Drift Analyzer — знаходить розбіжності між "джерелами правди" та "фактом". 4 категорії перевірок (незалежні, кожна повертає findings): 1. services — Service Catalog (inventory_services.csv / 01_SERVICE_CATALOG.md) vs docker-compose*.yml 2. openapi — OpenAPI specs (docs/contracts/*.yaml) vs routes у коді (FastAPI decorators) 3. nats — inventory_nats_topics.csv vs publish/subscribe usage у коді 4. tools — tools_rollout.yml + rbac_tools_matrix.yml vs фактичні handlers у tool_manager.py Формат findings: { category, severity, id, title, evidence: {path, lines, details}, recommended_fix } Pass rule: pass=false якщо errors > 0. Warnings/infos не валять gate. """ import csv import fnmatch import hashlib import json import logging import os import re import time import yaml from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, FrozenSet, List, Optional, Set, Tuple logger = logging.getLogger(__name__) # ─── Constants ──────────────────────────────────────────────────────────────── EXCLUDED_DIRS: FrozenSet[str] = frozenset({ "node_modules", ".git", "dist", "build", "vendor", ".venv", "venv", "venv_models", "sofia_venv", "__pycache__", ".pytest_cache", "rollback_backups", "docs/consolidation", }) MAX_FILES_PER_CATEGORY = 300 MAX_BYTES_PER_FILE = 262144 # 256KB TIMEOUT_SEC = 25.0 # Hard deadline per full analysis # Known tool handlers (must be kept in sync with execute_tool dispatch in tool_manager.py) # Source: Priority 1–17 handlers in tool_manager.py KNOWN_TOOL_HANDLERS: FrozenSet[str] = frozenset({ "memory_search", "graph_query", "web_search", "web_extract", "image_generate", "comfy_generate_image", "comfy_generate_video", "remember_fact", "presentation_create", "presentation_status", "presentation_download", "crawl4ai_scrape", "tts_speak", "file_tool", "market_data", "crm_search_client", "crm_upsert_client", "crm_upsert_site", "crm_upsert_window_unit", "crm_create_quote", "crm_update_quote", "crm_create_job", "calc_window_quote", "docs_render_quote_pdf", "docs_render_invoice_pdf", "schedule_propose_slots", "schedule_confirm_slot", "repo_tool", "pr_reviewer_tool", "contract_tool", "oncall_tool", "observability_tool", "config_linter_tool", "threatmodel_tool", "job_orchestrator_tool", "kb_tool", "drift_analyzer_tool", # self-registration }) # ─── Data Structures ────────────────────────────────────────────────────────── @dataclass class Finding: category: str severity: str # "error" | "warning" | "info" id: str title: str evidence: Dict[str, str] = field(default_factory=dict) recommended_fix: str = "" def to_dict(self) -> Dict: return { "category": self.category, "severity": self.severity, "id": self.id, "title": self.title, "evidence": self.evidence, "recommended_fix": self.recommended_fix, } @dataclass class DriftReport: pass_: bool summary: str stats: Dict[str, Any] findings: List[Dict] # ─── Utility helpers ────────────────────────────────────────────────────────── def _is_excluded(path: str) -> bool: """Check if any part of the path is in the excluded dirs set.""" parts = Path(path).parts return any(p in EXCLUDED_DIRS for p in parts) def _walk_files(root: str, extensions: Tuple[str, ...], deadline: float) -> List[str]: """ Walk repo root and collect files with given extensions. Respects EXCLUDED_DIRS, MAX_FILES_PER_CATEGORY, TIMEOUT_SEC. """ found = [] for dirpath, dirnames, filenames in os.walk(root): # Prune excluded dirs in-place (affects os.walk recursion) dirnames[:] = [ d for d in dirnames if d not in EXCLUDED_DIRS and not d.startswith(".") ] if time.monotonic() > deadline: logger.warning("_walk_files: timeout reached") break for fname in filenames: if fname.endswith(extensions): full = os.path.join(dirpath, fname) if not _is_excluded(full): found.append(full) if len(found) >= MAX_FILES_PER_CATEGORY: return found return found def _read_file(path: str) -> str: """Read file with size limit. Returns empty string on error.""" try: size = os.path.getsize(path) if size > MAX_BYTES_PER_FILE: with open(path, "r", errors="replace") as f: return f.read(MAX_BYTES_PER_FILE) with open(path, "r", errors="replace") as f: return f.read() except Exception: return "" _SECRET_PAT = re.compile( r'(?i)(api[_-]?key|token|secret|password|bearer|jwt|private[_-]?key)' r'[\s=:]+[\'"`]?([a-zA-Z0-9_\-\.]{8,})[\'"`]?' ) def _redact_evidence(text: str) -> str: """Mask potential secrets in evidence strings.""" return _SECRET_PAT.sub(lambda m: f"{m.group(1)}=***REDACTED***", text) def _rel(path: str, root: str) -> str: """Return path relative to root, or absolute if outside.""" try: return os.path.relpath(path, root) except ValueError: return path # ─── Category 1: Services ───────────────────────────────────────────────────── def _load_service_catalog(repo_root: str) -> Dict[str, str]: """ Load services from inventory_services.csv. Returns {service_name: status}. """ csv_path = os.path.join( repo_root, "docs", "architecture_inventory", "inventory_services.csv" ) services = {} if not os.path.exists(csv_path): # Fallback: scan 01_SERVICE_CATALOG.md for table rows md_path = os.path.join( repo_root, "docs", "architecture_inventory", "01_SERVICE_CATALOG.md" ) if os.path.exists(md_path): content = _read_file(md_path) for line in content.splitlines(): m = re.match(r'\|\s*([\w\-]+)\s*\|\s*(DEPLOYED|DEFINED|PLANNED[^\|]*)', line) if m: services[m.group(1).strip()] = m.group(2).strip() return services try: with open(csv_path, "r", newline="", errors="replace") as f: reader = csv.DictReader(f) for row in reader: name = (row.get("service") or "").strip() status = (row.get("type") or "").strip() # csv has 'type' not 'status' if name: services[name] = status except Exception as e: logger.warning(f"Could not load inventory_services.csv: {e}") return services def _load_compose_services(repo_root: str, deadline: float) -> Dict[str, str]: """ Parse docker-compose*.yml files and return {service_name: compose_file}. """ compose_files = [] for entry in os.listdir(repo_root): if fnmatch.fnmatch(entry, "docker-compose*.yml"): compose_files.append(os.path.join(repo_root, entry)) # Also infra subdir infra_compose = os.path.join(repo_root, "infra", "compose", "docker-compose.yml") if os.path.exists(infra_compose): compose_files.append(infra_compose) services = {} for cf in compose_files: if time.monotonic() > deadline: break try: content = _read_file(cf) data = yaml.safe_load(content) or {} svc_section = data.get("services") or {} for svc_name in svc_section: services[svc_name] = _rel(cf, repo_root) except Exception as e: logger.debug(f"Could not parse {cf}: {e}") return services def _analyze_services(repo_root: str, deadline: float) -> Tuple[List[Finding], Dict]: findings = [] catalog = _load_service_catalog(repo_root) compose_svcs = _load_compose_services(repo_root, deadline) compose_names = set(compose_svcs.keys()) catalog_names = set(catalog.keys()) # DEPLOYED in catalog but missing from ALL compose files for svc, status in catalog.items(): if "DEPLOYED" in status.upper() and svc not in compose_names: # Normalize: some catalog names use dashes vs underscores differently normalized = svc.replace("-", "_") variants = {svc, normalized, svc.replace("_", "-")} if not variants.intersection(compose_names): findings.append(Finding( category="services", severity="error", id="DRIFT-SVC-001", title=f"Service '{svc}' marked DEPLOYED in catalog but absent from all docker-compose files", evidence={"path": "docs/architecture_inventory/inventory_services.csv", "details": f"status={status}, not found in compose"}, recommended_fix=f"Add '{svc}' to appropriate docker-compose*.yml or update catalog status to DEFINED.", )) # In compose but not mentioned in catalog at all for svc, compose_file in compose_svcs.items(): if svc not in catalog_names: normalized = svc.replace("-", "_").replace("_", "-") if svc not in catalog_names and normalized not in catalog_names: findings.append(Finding( category="services", severity="warning", id="DRIFT-SVC-002", title=f"Service '{svc}' found in compose but not in service catalog", evidence={"path": compose_file, "details": f"defined in {compose_file}"}, recommended_fix=f"Add '{svc}' to inventory_services.csv / 01_SERVICE_CATALOG.md.", )) stats = { "catalog_entries": len(catalog), "compose_services": len(compose_svcs), "findings": len(findings), } return findings, stats # ─── Category 2: OpenAPI ────────────────────────────────────────────────────── def _load_openapi_paths(repo_root: str, deadline: float) -> Dict[str, Set[str]]: """ Scan docs/contracts/*.openapi.yaml and any openapi*.yaml/yml/json. Returns {"/path": {"get", "post", ...}}. """ spec_files = [] contracts_dir = os.path.join(repo_root, "docs", "contracts") if os.path.isdir(contracts_dir): for f in os.listdir(contracts_dir): if f.endswith((".yaml", ".yml", ".json")): spec_files.append(os.path.join(contracts_dir, f)) # Also find any openapi*.yaml in repo root and services for dirpath, dirnames, filenames in os.walk(repo_root): dirnames[:] = [d for d in dirnames if d not in EXCLUDED_DIRS and not d.startswith(".")] if time.monotonic() > deadline: break for f in filenames: if re.match(r'openapi.*\.(ya?ml|json)$', f, re.IGNORECASE): full = os.path.join(dirpath, f) if full not in spec_files: spec_files.append(full) paths: Dict[str, Set[str]] = {} for sf in spec_files: if time.monotonic() > deadline: break try: content = _read_file(sf) data = yaml.safe_load(content) if sf.endswith((".yaml", ".yml")) else json.loads(content) if not isinstance(data, dict) or "paths" not in data: continue for path, methods in (data.get("paths") or {}).items(): if not isinstance(methods, dict): continue methods_set = { m.lower() for m in methods if m.lower() in {"get", "post", "put", "patch", "delete", "head", "options"} } if path not in paths: paths[path] = set() paths[path].update(methods_set) except Exception as e: logger.debug(f"Could not parse OpenAPI spec {sf}: {e}") return paths _FASTAPI_ROUTE_PAT = re.compile( r'@(?:app|router)\.(get|post|put|patch|delete|head|options)\(\s*[\'"]([^\'"]+)[\'"]', re.MULTILINE, ) _ADD_API_ROUTE_PAT = re.compile( r'\.add_api_route\(\s*[\'"]([^\'"]+)[\'"].*?methods\s*=\s*\[([^\]]+)\]', re.MULTILINE | re.DOTALL, ) def _load_code_routes(repo_root: str, deadline: float) -> Dict[str, Set[str]]: """ Scan Python files for FastAPI route decorators. Returns {"/path": {"get", "post", ...}}. """ py_files = _walk_files(repo_root, (".py",), deadline) routes: Dict[str, Set[str]] = {} for pf in py_files: if time.monotonic() > deadline: break if ".venv" in pf or "venv" in pf or "node_modules" in pf: continue content = _read_file(pf) if not content: continue for method, path in _FASTAPI_ROUTE_PAT.findall(content): norm = path.rstrip("/") or "/" if norm not in routes: routes[norm] = set() routes[norm].add(method.lower()) for path, methods_raw in _ADD_API_ROUTE_PAT.findall(content): methods = {m.strip().strip('"\'').lower() for m in methods_raw.split(",")} norm = path.rstrip("/") or "/" if norm not in routes: routes[norm] = set() routes[norm].update(methods) return routes def _normalize_path(path: str) -> str: """Normalize OAS path for comparison: remove trailing slash, lowercase.""" return path.rstrip("/").lower() or "/" # Paths that are infrastructure-level and expected to be missing from OAS specs. # Add /internal/* and /debug/* patterns if your project uses them. _OAS_IGNORE_PATH_PREFIXES: Tuple[str, ...] = ( "/healthz", "/readyz", "/livez", "/metrics", "/internal/", "/debug/", "/__", "/favicon", ) def _is_oas_ignored(path: str) -> bool: """Return True if path is on the OAS ignore allowlist.""" p = path.lower() return any(p == prefix.rstrip("/") or p.startswith(prefix) for prefix in _OAS_IGNORE_PATH_PREFIXES) def _load_openapi_deprecated(repo_root: str) -> Set[str]: """ Return normalized paths marked as 'deprecated: true' in any OAS spec. Deprecated endpoints downgrade from error to warning (DRIFT-OAS-001). """ deprecated: Set[str] = set() spec_files: List[str] = [] for dirpath, dirnames, filenames in os.walk(repo_root): dirnames[:] = [d for d in dirnames if d not in EXCLUDED_DIRS and not d.startswith(".")] for f in filenames: if re.match(r'openapi.*\.(ya?ml|json)$', f, re.IGNORECASE): spec_files.append(os.path.join(dirpath, f)) for sf in spec_files: try: content = _read_file(sf) data = yaml.safe_load(content) if sf.endswith((".yaml", ".yml")) else json.loads(content) if not isinstance(data, dict) or "paths" not in data: continue for path, methods in (data.get("paths") or {}).items(): if not isinstance(methods, dict): continue for method, operation in methods.items(): if isinstance(operation, dict) and operation.get("deprecated", False): deprecated.add(_normalize_path(path)) except Exception: pass return deprecated def _analyze_openapi(repo_root: str, deadline: float) -> Tuple[List[Finding], Dict]: findings = [] spec_paths = _load_openapi_paths(repo_root, deadline) code_routes = _load_code_routes(repo_root, deadline) if not spec_paths: return findings, {"spec_paths": 0, "code_routes": len(code_routes), "findings": 0} deprecated_paths = _load_openapi_deprecated(repo_root) spec_norm: Dict[str, Set[str]] = { _normalize_path(p): methods for p, methods in spec_paths.items() } code_norm: Dict[str, Set[str]] = { _normalize_path(p): methods for p, methods in code_routes.items() } # DRIFT-OAS-001: In spec but not in code for path, methods in sorted(spec_norm.items()): # Skip infra/health endpoints — they are expected to be absent from OAS if _is_oas_ignored(path): continue if path not in code_norm: # Deprecated spec paths → warning only, not blocking severity = "warning" if path in deprecated_paths else "error" dep_note = " (deprecated in spec)" if path in deprecated_paths else "" findings.append(Finding( category="openapi", severity=severity, id="DRIFT-OAS-001", title=f"OpenAPI path '{path}'{dep_note} not found in codebase routes", evidence={"path": "docs/contracts/", "details": f"methods={sorted(methods)}, missing from FastAPI decorators"}, recommended_fix=( f"Mark '{path}' as removed in OpenAPI or implement the route." if path in deprecated_paths else f"Implement '{path}' route in code or remove from OpenAPI spec." ), )) else: # DRIFT-OAS-003: Method mismatch code_methods = code_norm[path] missing_in_code = methods - code_methods if missing_in_code: findings.append(Finding( category="openapi", severity="warning", id="DRIFT-OAS-003", title=f"Method mismatch for path '{path}': spec has {sorted(missing_in_code)}, code missing", evidence={"path": "docs/contracts/", "details": f"spec={sorted(methods)}, code={sorted(code_methods)}"}, recommended_fix=f"Add missing HTTP methods to code route for '{path}'.", )) # DRIFT-OAS-002: In code (/v1/ paths) but not in spec for path, methods in sorted(code_norm.items()): # Health/internal endpoints are expected to be absent from OAS if _is_oas_ignored(path): continue if not path.startswith("/v1/"): continue if path not in spec_norm: findings.append(Finding( category="openapi", severity="error", id="DRIFT-OAS-002", title=f"Code route '{path}' not documented in any OpenAPI spec", evidence={"path": "services/", "details": f"methods={sorted(methods)}"}, recommended_fix=f"Add '{path}' to OpenAPI spec in docs/contracts/.", )) stats = { "spec_paths": len(spec_paths), "code_routes": len(code_routes), "findings": len(findings), } return findings, stats # ─── Category 3: NATS ───────────────────────────────────────────────────────── _NATS_WILDCARD_PAT = re.compile(r'\{[^}]+\}|\*|>') # {agent_id}, *, > def _normalize_nats_subject(subj: str) -> str: """Replace wildcards with * for matching. Lowercase.""" return _NATS_WILDCARD_PAT.sub("*", subj.strip()).lower() def _load_nats_inventory(repo_root: str) -> Optional[List[str]]: """ Load documented NATS subjects from inventory_nats_topics.csv. Returns list of normalized subjects, or None if file absent. """ csv_path = os.path.join( repo_root, "docs", "architecture_inventory", "inventory_nats_topics.csv" ) if not os.path.exists(csv_path): return None subjects = [] try: with open(csv_path, "r", newline="", errors="replace") as f: reader = csv.DictReader(f) for row in reader: subj = (row.get("subject") or "").strip() if subj: subjects.append(_normalize_nats_subject(subj)) except Exception as e: logger.warning(f"Could not load nats inventory: {e}") return None return subjects _NATS_USAGE_PATTERNS = [ re.compile(r'(?:nc|nats|js|jetstream)\.publish\([\'"]([a-zA-Z0-9._{}*>-]+)[\'"]', re.IGNORECASE), re.compile(r'(?:nc|nats|js|jetstream)\.subscribe\([\'"]([a-zA-Z0-9._{}*>-]+)[\'"]', re.IGNORECASE), re.compile(r'nc\.subscribe\([\'"]([a-zA-Z0-9._{}*>-]+)[\'"]', re.IGNORECASE), re.compile(r'subject\s*=\s*[\'"]([a-zA-Z0-9._{}*>-]{4,})[\'"]', re.IGNORECASE), re.compile(r'SUBJECT\s*=\s*[\'"]([a-zA-Z0-9._{}*>-]{4,})[\'"]'), re.compile(r'[\'"]([a-z][a-z0-9_]+\.[a-z][a-z0-9_]+(?:\.[a-zA-Z0-9_{}_.*>-]+){0,4})[\'"]'), ] _NATS_SUBJECT_VALIDATE = re.compile(r'^[a-zA-Z][a-zA-Z0-9._{}*>-]{2,}$') def _load_nats_code_subjects(repo_root: str, deadline: float) -> Set[str]: """Extract NATS subjects from code via regex patterns.""" py_files = _walk_files(repo_root, (".py",), deadline) found: Set[str] = set() for pf in py_files: if time.monotonic() > deadline: break if "venv" in pf or "node_modules" in pf: continue content = _read_file(pf) if not content: continue # Quick pre-filter: must contain at least one NATS-like call pattern _NATS_CALL_HINTS = ("nc.", "nats.", "js.", "jetstream.", "subject=", "SUBJECT=", ".publish(", ".subscribe(") if not any(hint in content for hint in _NATS_CALL_HINTS): continue for pat in _NATS_USAGE_PATTERNS: for m in pat.finditer(content): subj = m.group(1).strip() # Basic subject validation (must contain a dot) if "." in subj and _NATS_SUBJECT_VALIDATE.match(subj): found.add(_normalize_nats_subject(subj)) return found def _nats_subject_matches(code_subj: str, inventory_subjects: List[str]) -> bool: """ Check if a code subject matches any inventory subject (wildcard-aware). Supports * (one segment) and > (one or more segments). """ code_parts = code_subj.split(".") for inv in inventory_subjects: inv_parts = inv.split(".") if _nats_match(code_parts, inv_parts) or _nats_match(inv_parts, code_parts): return True return False def _nats_match(a_parts: List[str], b_parts: List[str]) -> bool: """Match NATS subject a against pattern b (with * and > wildcards).""" if not b_parts: return not a_parts if b_parts[-1] == ">": return len(a_parts) >= len(b_parts) - 1 if len(a_parts) != len(b_parts): return False for a, b in zip(a_parts, b_parts): if b == "*" or a == "*": continue if a != b: return False return True def _analyze_nats(repo_root: str, deadline: float) -> Tuple[List[Finding], Dict, bool]: """Returns (findings, stats, skipped).""" inventory = _load_nats_inventory(repo_root) if inventory is None: return [], {"skipped": True}, True code_subjects = _load_nats_code_subjects(repo_root, deadline) findings = [] # DRIFT-NATS-001: Used in code but not in inventory for subj in sorted(code_subjects): if not _nats_subject_matches(subj, inventory): findings.append(Finding( category="nats", severity="warning", id="DRIFT-NATS-001", title=f"NATS subject '{subj}' used in code but not in inventory", evidence={"path": "docs/architecture_inventory/inventory_nats_topics.csv", "details": f"subject '{subj}' not found (wildcard-aware match)"}, recommended_fix=f"Add '{subj}' to inventory_nats_topics.csv.", )) # DRIFT-NATS-002: In inventory but not used in code (info — may be legacy) for inv_subj in inventory: if inv_subj.endswith(".*") or inv_subj.endswith(".>"): continue # wildcard subscriptions — skip if not _nats_subject_matches(inv_subj, list(code_subjects)): findings.append(Finding( category="nats", severity="info", id="DRIFT-NATS-002", title=f"Documented NATS subject '{inv_subj}' not found in code (possibly legacy)", evidence={"path": "docs/architecture_inventory/inventory_nats_topics.csv", "details": "no matching publish/subscribe call found"}, recommended_fix="Verify if subject is still active; mark as deprecated in inventory if not.", )) stats = { "inventory_subjects": len(inventory), "code_subjects": len(code_subjects), "findings": len(findings), } return findings, stats, False # ─── Category 4: Tools ──────────────────────────────────────────────────────── def _load_rollout_tools(repo_root: str) -> Set[str]: """Extract all tool names mentioned in tools_rollout.yml groups.""" rollout_path = os.path.join(repo_root, "config", "tools_rollout.yml") tools: Set[str] = set() try: with open(rollout_path, "r") as f: data = yaml.safe_load(f) or {} except Exception: return tools # Collect all values from group lists (non-@group entries are tool names) def _collect(obj): if isinstance(obj, list): for item in obj: if isinstance(item, str) and not item.startswith("@"): tools.add(item) elif isinstance(item, str) and item.startswith("@"): group_name = item[1:] if group_name in data: _collect(data[group_name]) elif isinstance(obj, dict): for v in obj.values(): _collect(v) for key, value in data.items(): if key not in ("role_map", "agent_roles"): # these are role configs, not tool lists _collect(value) # Also scan role_map tool lists role_map = data.get("role_map", {}) for role_cfg in role_map.values(): _collect(role_cfg.get("tools", [])) return tools def _load_rbac_tools(repo_root: str) -> Dict[str, Set[str]]: """Load tool→{actions} from rbac_tools_matrix.yml.""" matrix_path = os.path.join(repo_root, "config", "rbac_tools_matrix.yml") result: Dict[str, Set[str]] = {} try: with open(matrix_path, "r") as f: data = yaml.safe_load(f) or {} for tool, cfg in (data.get("tools") or {}).items(): actions = set((cfg.get("actions") or {}).keys()) result[tool] = actions except Exception: pass return result def _get_effective_tools_for_roles(repo_root: str) -> Dict[str, Set[str]]: """Get effective tools for agent_default and agent_cto roles.""" result = {} try: import sys router_path = os.path.join(repo_root, "services", "router") if router_path not in sys.path: sys.path.insert(0, router_path) if repo_root not in sys.path: sys.path.insert(0, repo_root) from agent_tools_config import get_agent_tools, reload_rollout_config reload_rollout_config() # Use representative agents per role result["agent_default"] = set(get_agent_tools("brand_new_agent_xyz_test")) result["agent_cto"] = set(get_agent_tools("sofiia")) except Exception as e: logger.warning(f"Could not load effective tools: {e}") return result def _analyze_tools(repo_root: str) -> Tuple[List[Finding], Dict]: findings = [] rollout_tools = _load_rollout_tools(repo_root) rbac_tools = _load_rbac_tools(repo_root) role_tools = _get_effective_tools_for_roles(repo_root) all_role_tools: Set[str] = set() for tools in role_tools.values(): all_role_tools.update(tools) # DRIFT-TOOLS-001: Tool in rollout but no handler in tool_manager.py for tool in sorted(rollout_tools): if tool not in KNOWN_TOOL_HANDLERS: findings.append(Finding( category="tools", severity="error", id="DRIFT-TOOLS-001", title=f"Tool '{tool}' in tools_rollout.yml but no handler in tool_manager.py", evidence={"path": "config/tools_rollout.yml", "details": f"'{tool}' referenced in rollout groups but missing from KNOWN_TOOL_HANDLERS"}, recommended_fix=f"Add handler for '{tool}' in tool_manager.py execute_tool dispatch, or remove from rollout.", )) # DRIFT-TOOLS-002: Handler exists but not in RBAC matrix # Severity = error if tool is in rollout/standard_stack (actively used, no RBAC gate) # Severity = warning if tool appears experimental / not yet rolled out for tool in sorted(KNOWN_TOOL_HANDLERS): if tool not in rbac_tools: # Escalate to error if tool is actively distributed to agents is_rollouted = tool in rollout_tools or tool in all_role_tools severity = "error" if is_rollouted else "warning" findings.append(Finding( category="tools", severity=severity, id="DRIFT-TOOLS-002", title=f"Tool '{tool}' has a handler but is absent from rbac_tools_matrix.yml", evidence={"path": "config/rbac_tools_matrix.yml", "details": ( f"'{tool}' not found in matrix.tools section. " + ("In rollout → no RBAC gate applied." if is_rollouted else "Not in rollout (experimental/legacy).") )}, recommended_fix=f"Add '{tool}' with actions and entitlements to rbac_tools_matrix.yml.", )) # DRIFT-TOOLS-003: Tool in RBAC matrix but never appears in effective_tools if all_role_tools: for tool in sorted(rbac_tools.keys()): if tool not in all_role_tools: findings.append(Finding( category="tools", severity="warning", id="DRIFT-TOOLS-003", title=f"Tool '{tool}' is in RBAC matrix but never appears in effective_tools (dead config?)", evidence={"path": "config/rbac_tools_matrix.yml", "details": f"'{tool}' in matrix but not in any role's effective tool list"}, recommended_fix=f"Add '{tool}' to a role in tools_rollout.yml or remove from matrix.", )) stats = { "rollout_tools": len(rollout_tools), "rbac_tools": len(rbac_tools), "handlers": len(KNOWN_TOOL_HANDLERS), "role_tools": {role: len(tools) for role, tools in role_tools.items()}, "findings": len(findings), } return findings, stats # ─── Main Analyzer ──────────────────────────────────────────────────────────── def analyze_drift( repo_root: str, categories: Optional[List[str]] = None, timeout_sec: float = TIMEOUT_SEC, ) -> DriftReport: """ Run drift analysis across requested categories. Args: repo_root: absolute path to repository root categories: subset of ["services", "openapi", "nats", "tools"] (all if None) timeout_sec: hard deadline for full analysis Returns: DriftReport with pass/fail verdict """ all_categories = {"services", "openapi", "nats", "tools"} if categories: run_cats = {c for c in categories if c in all_categories} else: run_cats = all_categories deadline = time.monotonic() + timeout_sec all_findings: List[Finding] = [] skipped: List[str] = [] items_checked: Dict[str, int] = {} cat_stats: Dict[str, Any] = {} if "services" in run_cats: findings, stats = _analyze_services(repo_root, deadline) all_findings.extend(findings) cat_stats["services"] = stats items_checked["services"] = stats.get("catalog_entries", 0) + stats.get("compose_services", 0) if "openapi" in run_cats: findings, stats = _analyze_openapi(repo_root, deadline) all_findings.extend(findings) cat_stats["openapi"] = stats items_checked["openapi"] = stats.get("spec_paths", 0) + stats.get("code_routes", 0) if "nats" in run_cats: findings, stats, was_skipped = _analyze_nats(repo_root, deadline) if was_skipped: skipped.append("nats") else: all_findings.extend(findings) cat_stats["nats"] = stats items_checked["nats"] = stats.get("inventory_subjects", 0) + stats.get("code_subjects", 0) if "tools" in run_cats: findings, stats = _analyze_tools(repo_root) all_findings.extend(findings) cat_stats["tools"] = stats items_checked["tools"] = stats.get("rollout_tools", 0) + stats.get("rbac_tools", 0) # Sort findings: severity desc (error > warning > info), then category, then id severity_order = {"error": 0, "warning": 1, "info": 2} all_findings.sort(key=lambda f: (severity_order.get(f.severity, 9), f.category, f.id)) # Redact evidence for f in all_findings: if f.evidence.get("details"): f.evidence["details"] = _redact_evidence(f.evidence["details"]) errors = sum(1 for f in all_findings if f.severity == "error") warnings = sum(1 for f in all_findings if f.severity == "warning") infos = sum(1 for f in all_findings if f.severity == "info") pass_ = errors == 0 if pass_: summary = f"✅ Drift analysis PASSED. {len(all_findings)} findings ({warnings} warnings, {infos} infos)." else: summary = ( f"❌ Drift analysis FAILED. {errors} error(s), {warnings} warning(s). " f"Categories checked: {sorted(run_cats - {'nats'} if 'nats' in skipped else run_cats)}." ) if skipped: summary += f" Skipped (no inventory): {skipped}." elapsed_ms = round((time.monotonic() - (deadline - timeout_sec)) * 1000, 1) return DriftReport( pass_=pass_, summary=summary, stats={ "errors": errors, "warnings": warnings, "infos": infos, "skipped": skipped, "items_checked": items_checked, "elapsed_ms": elapsed_ms, "by_category": cat_stats, }, findings=[f.to_dict() for f in all_findings], ) def analyze_drift_dict(repo_root: str, **kwargs) -> Dict: """Convenience wrapper that returns a plain dict (for ToolResult).""" report = analyze_drift(repo_root, **kwargs) return { "pass": report.pass_, "summary": report.summary, "stats": report.stats, "findings": report.findings, }