164 lines
4.6 KiB
Python
Executable File
164 lines
4.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
from typing import Tuple
|
|
|
|
|
|
def _utc_now() -> datetime:
|
|
return datetime.now(timezone.utc)
|
|
|
|
|
|
def _fmt_ts(dt: datetime) -> str:
|
|
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
|
|
def _resolve_data_dir(cli_data_dir: str | None) -> Path:
|
|
raw = (cli_data_dir or os.getenv("SOFIIA_DATA_DIR") or "/app/data").strip()
|
|
return Path(raw).expanduser().resolve()
|
|
|
|
|
|
def _parse_args() -> argparse.Namespace:
|
|
p = argparse.ArgumentParser(
|
|
description="Prune old audit_events records from sofiia SQLite DB."
|
|
)
|
|
p.add_argument(
|
|
"--data-dir",
|
|
default=None,
|
|
help="Path to SOFIIA_DATA_DIR. Defaults to env SOFIIA_DATA_DIR or /app/data.",
|
|
)
|
|
p.add_argument(
|
|
"--retention-days",
|
|
type=int,
|
|
default=None,
|
|
help="Retention period in days. Defaults to SOFIIA_AUDIT_RETENTION_DAYS or 90.",
|
|
)
|
|
p.add_argument(
|
|
"--batch-size",
|
|
type=int,
|
|
default=5000,
|
|
help="Delete batch size (default: 5000).",
|
|
)
|
|
p.add_argument(
|
|
"--dry-run",
|
|
action="store_true",
|
|
help="Report candidates only, do not delete.",
|
|
)
|
|
p.add_argument(
|
|
"--vacuum",
|
|
action="store_true",
|
|
help="Run VACUUM after deletion.",
|
|
)
|
|
p.add_argument(
|
|
"--yes",
|
|
action="store_true",
|
|
help="Reserved for non-interactive confirmation (no-op in current script).",
|
|
)
|
|
return p.parse_args()
|
|
|
|
|
|
def _check_table_exists(conn: sqlite3.Connection, table_name: str) -> bool:
|
|
row = conn.execute(
|
|
"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
|
|
(table_name,),
|
|
).fetchone()
|
|
return bool(row)
|
|
|
|
|
|
def _candidate_stats(conn: sqlite3.Connection, cutoff_ts: str) -> Tuple[int, str | None, str | None]:
|
|
row = conn.execute(
|
|
"SELECT COUNT(*), MIN(ts), MAX(ts) FROM audit_events WHERE ts < ?",
|
|
(cutoff_ts,),
|
|
).fetchone()
|
|
if not row:
|
|
return 0, None, None
|
|
return int(row[0] or 0), row[1], row[2]
|
|
|
|
|
|
def main() -> int:
|
|
args = _parse_args()
|
|
|
|
data_dir = _resolve_data_dir(args.data_dir)
|
|
db_path = (data_dir / "sofiia.db").resolve()
|
|
retention_days = args.retention_days
|
|
if retention_days is None:
|
|
retention_days = int(os.getenv("SOFIIA_AUDIT_RETENTION_DAYS", "90"))
|
|
retention_days = max(1, int(retention_days))
|
|
batch_size = max(1, int(args.batch_size))
|
|
|
|
cutoff_dt = _utc_now() - timedelta(days=retention_days)
|
|
cutoff_ts = _fmt_ts(cutoff_dt)
|
|
|
|
print("Audit retention pruning")
|
|
print(f" db_path: {db_path}")
|
|
print(f" retention_days: {retention_days}")
|
|
print(f" cutoff_ts: {cutoff_ts}")
|
|
print(f" batch_size: {batch_size}")
|
|
print(f" dry_run: {bool(args.dry_run)}")
|
|
print(f" vacuum: {bool(args.vacuum)}")
|
|
|
|
if not db_path.exists():
|
|
print(f"ERROR: DB file not found: {db_path}")
|
|
return 1
|
|
|
|
try:
|
|
conn = sqlite3.connect(str(db_path))
|
|
conn.execute("PRAGMA busy_timeout = 5000")
|
|
except Exception as exc:
|
|
print(f"ERROR: cannot open DB: {exc}")
|
|
return 1
|
|
|
|
try:
|
|
if not _check_table_exists(conn, "audit_events"):
|
|
print("ERROR: table 'audit_events' not found in DB schema")
|
|
return 1
|
|
|
|
total_candidates, min_ts, max_ts = _candidate_stats(conn, cutoff_ts)
|
|
print(f" candidates: {total_candidates}")
|
|
print(f" candidates_min_ts: {min_ts or '-'}")
|
|
print(f" candidates_max_ts: {max_ts or '-'}")
|
|
|
|
if args.dry_run:
|
|
print("Dry-run complete. No rows were deleted.")
|
|
return 0
|
|
|
|
deleted_total = 0
|
|
batch_no = 0
|
|
while True:
|
|
cur = conn.execute(
|
|
"DELETE FROM audit_events WHERE id IN ("
|
|
"SELECT id FROM audit_events WHERE ts < ? ORDER BY ts ASC LIMIT ?"
|
|
")",
|
|
(cutoff_ts, batch_size),
|
|
)
|
|
deleted = int(cur.rowcount or 0)
|
|
if deleted <= 0:
|
|
break
|
|
conn.commit()
|
|
deleted_total += deleted
|
|
batch_no += 1
|
|
print(f" batch {batch_no}: deleted {deleted} (total={deleted_total})")
|
|
|
|
print(f"Deletion complete. Total deleted: {deleted_total}")
|
|
|
|
if args.vacuum:
|
|
print("Running VACUUM...")
|
|
conn.execute("VACUUM")
|
|
print("VACUUM complete.")
|
|
|
|
return 0
|
|
except Exception as exc:
|
|
print(f"ERROR: prune failed: {exc}")
|
|
return 1
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|