#!/usr/bin/env python3 import csv import fnmatch import os from pathlib import Path from typing import Dict, List EXTS = {".md", ".mdx", ".txt", ".ipynb", ".pdf", ".docx"} EXCLUDE_DIR_PATTERNS = [ "*/.git/*", "*/node_modules/*", "*/venv/*", "*/.venv/*", "*/site-packages/*", "*/__pycache__/*", "*/rollback_backups/*", "*/.pytest_cache/*", ] def excluded(path: str) -> bool: norm = path.replace("\\", "/") return any(fnmatch.fnmatch(norm, pat) for pat in EXCLUDE_DIR_PATTERNS) def classify(path: str, source_key: str) -> str: p = path.lower().replace("\\", "/") if source_key.startswith("node1_runtime"): return "runtime-fact" if "/.worktrees/" in p: return "legacy-worktree" if "/desktop/microdao/microdao 3/" in p: return "legacy-desktop" if "/docs/runbooks/" in p or p.endswith("/project-master-index.md") or p.endswith("/noda1-safe-deploy.md"): return "new-canonical" return "needs-triage" def status(path: str, source_key: str) -> str: c = classify(path, source_key) if c in {"runtime-fact", "new-canonical"}: return "active" if c.startswith("legacy"): return "legacy" return "unknown" def scan_root(root: Path, source_key: str) -> List[Dict[str, str]]: rows: List[Dict[str, str]] = [] if not root.exists(): return rows for fp in root.rglob("*"): if not fp.is_file(): continue if fp.suffix.lower() not in EXTS: continue sp = str(fp) if excluded(sp): continue rows.append( { "source_key": source_key, "path": sp, "class": classify(sp, source_key), "status": status(sp, source_key), "mtime": str(int(fp.stat().st_mtime)), } ) return rows def main() -> int: roots = { "canonical_repo": Path("/Users/apple/github-projects/microdao-daarion"), "legacy_repo": Path("/Users/apple/Desktop/MicroDAO/MicroDAO 3"), "worktrees": Path("/Users/apple/github-projects/microdao-daarion/.worktrees"), "notebooks": Path("/Users/apple/notebooks"), "node1_runtime_snapshot": Path("/Users/apple/github-projects/microdao-daarion/docs/consolidation/_node1_runtime_docs"), } all_rows: List[Dict[str, str]] = [] for key, root in roots.items(): all_rows.extend(scan_root(root, key)) all_rows.sort(key=lambda r: (r["source_key"], r["path"])) out_csv = Path("/Users/apple/github-projects/microdao-daarion/docs/consolidation/docs_inventory.csv") out_csv.parent.mkdir(parents=True, exist_ok=True) with out_csv.open("w", newline="", encoding="utf-8") as f: w = csv.DictWriter(f, fieldnames=["source_key", "path", "class", "status", "mtime"]) w.writeheader() for row in all_rows: w.writerow(row) # Tiny summary for quick review summary: Dict[str, int] = {} for row in all_rows: summary[row["class"]] = summary.get(row["class"], 0) + 1 out_sum = Path("/Users/apple/github-projects/microdao-daarion/docs/consolidation/docs_inventory_summary.txt") with out_sum.open("w", encoding="utf-8") as f: f.write(f"total={len(all_rows)}\n") for k in sorted(summary.keys()): f.write(f"{k}={summary[k]}\n") print(f"Wrote: {out_csv}") print(f"Wrote: {out_sum}") return 0 if __name__ == "__main__": raise SystemExit(main())