Files
microdao-daarion/scripts/docs/build_docs_hub_inventory.py

113 lines
3.4 KiB
Python

#!/usr/bin/env python3
import csv
import fnmatch
import os
from pathlib import Path
from typing import Dict, List
EXTS = {".md", ".mdx", ".txt", ".ipynb", ".pdf", ".docx"}
EXCLUDE_DIR_PATTERNS = [
"*/.git/*",
"*/node_modules/*",
"*/venv/*",
"*/.venv/*",
"*/site-packages/*",
"*/__pycache__/*",
"*/rollback_backups/*",
"*/.pytest_cache/*",
]
def excluded(path: str) -> bool:
norm = path.replace("\\", "/")
return any(fnmatch.fnmatch(norm, pat) for pat in EXCLUDE_DIR_PATTERNS)
def classify(path: str, source_key: str) -> str:
p = path.lower().replace("\\", "/")
if source_key.startswith("node1_runtime"):
return "runtime-fact"
if "/.worktrees/" in p:
return "legacy-worktree"
if "/desktop/microdao/microdao 3/" in p:
return "legacy-desktop"
if "/docs/runbooks/" in p or p.endswith("/project-master-index.md") or p.endswith("/noda1-safe-deploy.md"):
return "new-canonical"
return "needs-triage"
def status(path: str, source_key: str) -> str:
c = classify(path, source_key)
if c in {"runtime-fact", "new-canonical"}:
return "active"
if c.startswith("legacy"):
return "legacy"
return "unknown"
def scan_root(root: Path, source_key: str) -> List[Dict[str, str]]:
rows: List[Dict[str, str]] = []
if not root.exists():
return rows
for fp in root.rglob("*"):
if not fp.is_file():
continue
if fp.suffix.lower() not in EXTS:
continue
sp = str(fp)
if excluded(sp):
continue
rows.append(
{
"source_key": source_key,
"path": sp,
"class": classify(sp, source_key),
"status": status(sp, source_key),
"mtime": str(int(fp.stat().st_mtime)),
}
)
return rows
def main() -> int:
roots = {
"canonical_repo": Path("/Users/apple/github-projects/microdao-daarion"),
"legacy_repo": Path("/Users/apple/Desktop/MicroDAO/MicroDAO 3"),
"worktrees": Path("/Users/apple/github-projects/microdao-daarion/.worktrees"),
"notebooks": Path("/Users/apple/notebooks"),
"node1_runtime_snapshot": Path("/Users/apple/github-projects/microdao-daarion/docs/consolidation/_node1_runtime_docs"),
}
all_rows: List[Dict[str, str]] = []
for key, root in roots.items():
all_rows.extend(scan_root(root, key))
all_rows.sort(key=lambda r: (r["source_key"], r["path"]))
out_csv = Path("/Users/apple/github-projects/microdao-daarion/docs/consolidation/docs_inventory.csv")
out_csv.parent.mkdir(parents=True, exist_ok=True)
with out_csv.open("w", newline="", encoding="utf-8") as f:
w = csv.DictWriter(f, fieldnames=["source_key", "path", "class", "status", "mtime"])
w.writeheader()
for row in all_rows:
w.writerow(row)
# Tiny summary for quick review
summary: Dict[str, int] = {}
for row in all_rows:
summary[row["class"]] = summary.get(row["class"], 0) + 1
out_sum = Path("/Users/apple/github-projects/microdao-daarion/docs/consolidation/docs_inventory_summary.txt")
with out_sum.open("w", encoding="utf-8") as f:
f.write(f"total={len(all_rows)}\n")
for k in sorted(summary.keys()):
f.write(f"{k}={summary[k]}\n")
print(f"Wrote: {out_csv}")
print(f"Wrote: {out_sum}")
return 0
if __name__ == "__main__":
raise SystemExit(main())