Files
microdao-daarion/ops/scripts/audit_compact.py
Apple 67225a39fa docs(platform): add policy configs, runbooks, ops scripts and platform documentation
Config policies (16 files): alert_routing, architecture_pressure, backlog,
cost_weights, data_governance, incident_escalation, incident_intelligence,
network_allowlist, nodes_registry, observability_sources, rbac_tools_matrix,
release_gate, risk_attribution, risk_policy, slo_policy, tool_limits, tools_rollout

Ops (22 files): Caddyfile, calendar compose, grafana voice dashboard,
deployments/incidents logs, runbooks for alerts/audit/backlog/incidents/sofiia/voice,
cron jobs, scripts (alert_triage, audit_cleanup, migrate_*, governance, schedule),
task_registry, voice alerts/ha/latency/policy

Docs (30+ files): HUMANIZED_STEPAN v2.7-v3 changelogs and runbooks,
NODA1/NODA2 status and setup, audit index and traces, backlog, incident,
supervisor, tools, voice, opencode, release, risk, aistalk, spacebot

Made-with: Cursor
2026-03-03 07:14:53 -08:00

187 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
audit_compact.py — Audit JSONL Compaction
Merges individual daily JSONL files from the last `window_days` into a single
compressed artifact: ops/audit/compact/tool_audit_last_{window_days}d.jsonl.gz
Useful for:
- Faster forensic analysis (single file to read)
- Archival before cleanup
- Offline cost_analyzer runs
Usage:
python3 ops/scripts/audit_compact.py \
--window-days 7 \
[--output-path ops/audit/compact] \
[--dry-run] [--verbose]
Callable programmatically via run_compact().
"""
from __future__ import annotations
import argparse
import datetime
import gzip
import json
import logging
import os
import re
import sys
from pathlib import Path
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
_DATE_PAT = re.compile(r"tool_audit_(\d{4}-\d{2}-\d{2})\.jsonl$")
def run_compact(
window_days: int = 7,
audit_dir: str = "ops/audit",
output_path: Optional[str] = None,
dry_run: bool = True,
repo_root: Optional[str] = None,
verbose: bool = False,
) -> Dict:
"""
Compact last `window_days` JSONL audit files into one .jsonl.gz.
Returns:
{source_files, lines_written, output_file, bytes_written, dry_run, errors}
"""
if window_days < 1 or window_days > 30:
raise ValueError(f"window_days must be 130, got {window_days}")
root = Path(repo_root or os.getenv("REPO_ROOT", ".")).resolve()
dir_path = (root / audit_dir).resolve()
if not str(dir_path).startswith(str(root)):
raise ValueError("audit_dir resolves outside repo root")
today = datetime.date.today()
cutoff = today - datetime.timedelta(days=window_days)
# Find files within window
source_files: List[Path] = []
for fpath in sorted(dir_path.glob("tool_audit_*.jsonl")):
m = _DATE_PAT.search(fpath.name)
if not m:
continue
try:
file_date = datetime.date.fromisoformat(m.group(1))
except ValueError:
continue
if file_date >= cutoff:
source_files.append(fpath)
out_dir = (root / (output_path or f"{audit_dir}/compact")).resolve()
if not str(out_dir).startswith(str(root)):
raise ValueError("output_path resolves outside repo root")
out_name = f"tool_audit_last_{window_days}d.jsonl.gz"
out_file = out_dir / out_name
lines_written = 0
bytes_written = 0
errors: List[str] = []
if dry_run:
# Count lines without writing
for fpath in source_files:
try:
with open(fpath, "r", encoding="utf-8", errors="replace") as f:
lines_written += sum(1 for line in f if line.strip())
except Exception as e:
errors.append(f"{fpath.name}: {e}")
if verbose:
logger.info(
"[dry_run] Would compact %d files → %s (%d lines)",
len(source_files), out_file, lines_written,
)
else:
out_dir.mkdir(parents=True, exist_ok=True)
try:
with gzip.open(out_file, "wt", encoding="utf-8") as gz:
for fpath in source_files:
try:
with open(fpath, "r", encoding="utf-8", errors="replace") as f:
for line in f:
line = line.strip()
if line:
gz.write(line + "\n")
lines_written += 1
except Exception as e:
msg = f"Error reading {fpath.name}: {e}"
logger.warning(msg)
errors.append(msg)
bytes_written = out_file.stat().st_size
if verbose:
logger.info(
"Compacted %d files → %s (%d lines, %d bytes compressed)",
len(source_files), out_file.name, lines_written, bytes_written,
)
except Exception as e:
errors.append(f"Write error: {e}")
logger.error("audit_compact failed: %s", e)
return {
"source_files": len(source_files),
"window_days": window_days,
"lines_written": lines_written,
"output_file": str(out_file) if not dry_run else str(out_file) + " [not created]",
"bytes_written": bytes_written,
"dry_run": dry_run,
"errors": errors,
}
def _parse_args(argv=None) -> argparse.Namespace:
p = argparse.ArgumentParser(
description="Compact audit JSONL files into a single .gz archive",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
p.add_argument("--window-days", type=int, default=7,
help="Compact files from last N days")
p.add_argument("--audit-dir", default="ops/audit",
help="Relative path to audit directory")
p.add_argument("--output-path", default=None,
help="Output directory (default: ops/audit/compact)")
p.add_argument("--repo-root", default=None)
p.add_argument("--dry-run", action="store_true")
p.add_argument("--verbose", action="store_true")
p.add_argument("--output-json", action="store_true")
return p.parse_args(argv)
def main(argv=None):
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s audit_compact %(message)s",
stream=sys.stderr,
)
args = _parse_args(argv)
result = run_compact(
window_days=args.window_days,
audit_dir=args.audit_dir,
output_path=args.output_path,
dry_run=args.dry_run,
repo_root=args.repo_root,
verbose=args.verbose,
)
if args.output_json:
print(json.dumps(result, indent=2))
else:
status = "DRY RUN" if result["dry_run"] else "DONE"
print(
f"[{status}] sources={result['source_files']} "
f"lines={result['lines_written']} bytes={result['bytes_written']} "
f"{result['output_file']}"
)
if result["errors"]:
sys.exit(1)
if __name__ == "__main__":
main()