docs(platform): add policy configs, runbooks, ops scripts and platform documentation
Config policies (16 files): alert_routing, architecture_pressure, backlog, cost_weights, data_governance, incident_escalation, incident_intelligence, network_allowlist, nodes_registry, observability_sources, rbac_tools_matrix, release_gate, risk_attribution, risk_policy, slo_policy, tool_limits, tools_rollout Ops (22 files): Caddyfile, calendar compose, grafana voice dashboard, deployments/incidents logs, runbooks for alerts/audit/backlog/incidents/sofiia/voice, cron jobs, scripts (alert_triage, audit_cleanup, migrate_*, governance, schedule), task_registry, voice alerts/ha/latency/policy Docs (30+ files): HUMANIZED_STEPAN v2.7-v3 changelogs and runbooks, NODA1/NODA2 status and setup, audit index and traces, backlog, incident, supervisor, tools, voice, opencode, release, risk, aistalk, spacebot Made-with: Cursor
This commit is contained in:
186
ops/scripts/audit_compact.py
Normal file
186
ops/scripts/audit_compact.py
Normal file
@@ -0,0 +1,186 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
audit_compact.py — Audit JSONL Compaction
|
||||
|
||||
Merges individual daily JSONL files from the last `window_days` into a single
|
||||
compressed artifact: ops/audit/compact/tool_audit_last_{window_days}d.jsonl.gz
|
||||
|
||||
Useful for:
|
||||
- Faster forensic analysis (single file to read)
|
||||
- Archival before cleanup
|
||||
- Offline cost_analyzer runs
|
||||
|
||||
Usage:
|
||||
python3 ops/scripts/audit_compact.py \
|
||||
--window-days 7 \
|
||||
[--output-path ops/audit/compact] \
|
||||
[--dry-run] [--verbose]
|
||||
|
||||
Callable programmatically via run_compact().
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
import gzip
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DATE_PAT = re.compile(r"tool_audit_(\d{4}-\d{2}-\d{2})\.jsonl$")
|
||||
|
||||
|
||||
def run_compact(
|
||||
window_days: int = 7,
|
||||
audit_dir: str = "ops/audit",
|
||||
output_path: Optional[str] = None,
|
||||
dry_run: bool = True,
|
||||
repo_root: Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
) -> Dict:
|
||||
"""
|
||||
Compact last `window_days` JSONL audit files into one .jsonl.gz.
|
||||
|
||||
Returns:
|
||||
{source_files, lines_written, output_file, bytes_written, dry_run, errors}
|
||||
"""
|
||||
if window_days < 1 or window_days > 30:
|
||||
raise ValueError(f"window_days must be 1–30, got {window_days}")
|
||||
|
||||
root = Path(repo_root or os.getenv("REPO_ROOT", ".")).resolve()
|
||||
dir_path = (root / audit_dir).resolve()
|
||||
if not str(dir_path).startswith(str(root)):
|
||||
raise ValueError("audit_dir resolves outside repo root")
|
||||
|
||||
today = datetime.date.today()
|
||||
cutoff = today - datetime.timedelta(days=window_days)
|
||||
|
||||
# Find files within window
|
||||
source_files: List[Path] = []
|
||||
for fpath in sorted(dir_path.glob("tool_audit_*.jsonl")):
|
||||
m = _DATE_PAT.search(fpath.name)
|
||||
if not m:
|
||||
continue
|
||||
try:
|
||||
file_date = datetime.date.fromisoformat(m.group(1))
|
||||
except ValueError:
|
||||
continue
|
||||
if file_date >= cutoff:
|
||||
source_files.append(fpath)
|
||||
|
||||
out_dir = (root / (output_path or f"{audit_dir}/compact")).resolve()
|
||||
if not str(out_dir).startswith(str(root)):
|
||||
raise ValueError("output_path resolves outside repo root")
|
||||
|
||||
out_name = f"tool_audit_last_{window_days}d.jsonl.gz"
|
||||
out_file = out_dir / out_name
|
||||
|
||||
lines_written = 0
|
||||
bytes_written = 0
|
||||
errors: List[str] = []
|
||||
|
||||
if dry_run:
|
||||
# Count lines without writing
|
||||
for fpath in source_files:
|
||||
try:
|
||||
with open(fpath, "r", encoding="utf-8", errors="replace") as f:
|
||||
lines_written += sum(1 for line in f if line.strip())
|
||||
except Exception as e:
|
||||
errors.append(f"{fpath.name}: {e}")
|
||||
if verbose:
|
||||
logger.info(
|
||||
"[dry_run] Would compact %d files → %s (%d lines)",
|
||||
len(source_files), out_file, lines_written,
|
||||
)
|
||||
else:
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
with gzip.open(out_file, "wt", encoding="utf-8") as gz:
|
||||
for fpath in source_files:
|
||||
try:
|
||||
with open(fpath, "r", encoding="utf-8", errors="replace") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
gz.write(line + "\n")
|
||||
lines_written += 1
|
||||
except Exception as e:
|
||||
msg = f"Error reading {fpath.name}: {e}"
|
||||
logger.warning(msg)
|
||||
errors.append(msg)
|
||||
bytes_written = out_file.stat().st_size
|
||||
if verbose:
|
||||
logger.info(
|
||||
"Compacted %d files → %s (%d lines, %d bytes compressed)",
|
||||
len(source_files), out_file.name, lines_written, bytes_written,
|
||||
)
|
||||
except Exception as e:
|
||||
errors.append(f"Write error: {e}")
|
||||
logger.error("audit_compact failed: %s", e)
|
||||
|
||||
return {
|
||||
"source_files": len(source_files),
|
||||
"window_days": window_days,
|
||||
"lines_written": lines_written,
|
||||
"output_file": str(out_file) if not dry_run else str(out_file) + " [not created]",
|
||||
"bytes_written": bytes_written,
|
||||
"dry_run": dry_run,
|
||||
"errors": errors,
|
||||
}
|
||||
|
||||
|
||||
def _parse_args(argv=None) -> argparse.Namespace:
|
||||
p = argparse.ArgumentParser(
|
||||
description="Compact audit JSONL files into a single .gz archive",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
p.add_argument("--window-days", type=int, default=7,
|
||||
help="Compact files from last N days")
|
||||
p.add_argument("--audit-dir", default="ops/audit",
|
||||
help="Relative path to audit directory")
|
||||
p.add_argument("--output-path", default=None,
|
||||
help="Output directory (default: ops/audit/compact)")
|
||||
p.add_argument("--repo-root", default=None)
|
||||
p.add_argument("--dry-run", action="store_true")
|
||||
p.add_argument("--verbose", action="store_true")
|
||||
p.add_argument("--output-json", action="store_true")
|
||||
return p.parse_args(argv)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s audit_compact %(message)s",
|
||||
stream=sys.stderr,
|
||||
)
|
||||
args = _parse_args(argv)
|
||||
result = run_compact(
|
||||
window_days=args.window_days,
|
||||
audit_dir=args.audit_dir,
|
||||
output_path=args.output_path,
|
||||
dry_run=args.dry_run,
|
||||
repo_root=args.repo_root,
|
||||
verbose=args.verbose,
|
||||
)
|
||||
if args.output_json:
|
||||
print(json.dumps(result, indent=2))
|
||||
else:
|
||||
status = "DRY RUN" if result["dry_run"] else "DONE"
|
||||
print(
|
||||
f"[{status}] sources={result['source_files']} "
|
||||
f"lines={result['lines_written']} bytes={result['bytes_written']} "
|
||||
f"→ {result['output_file']}"
|
||||
)
|
||||
if result["errors"]:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user