#!/usr/bin/env python3
import csv
import fnmatch
import os
from pathlib import Path
from typing import Dict, List


EXTS = {".md", ".mdx", ".txt", ".ipynb", ".pdf", ".docx"}
EXCLUDE_DIR_PATTERNS = [
    "*/.git/*",
    "*/node_modules/*",
    "*/venv/*",
    "*/.venv/*",
    "*/site-packages/*",
    "*/__pycache__/*",
    "*/rollback_backups/*",
    "*/.pytest_cache/*",
]


def excluded(path: str) -> bool:
    norm = path.replace("\\", "/")
    return any(fnmatch.fnmatch(norm, pat) for pat in EXCLUDE_DIR_PATTERNS)


def classify(path: str, source_key: str) -> str:
    p = path.lower().replace("\\", "/")
    if source_key.startswith("node1_runtime"):
        return "runtime-fact"
    if "/.worktrees/" in p:
        return "legacy-worktree"
    if "/desktop/microdao/microdao 3/" in p:
        return "legacy-desktop"
    if "/docs/runbooks/" in p or p.endswith("/project-master-index.md") or p.endswith("/noda1-safe-deploy.md"):
        return "new-canonical"
    return "needs-triage"


def status(path: str, source_key: str) -> str:
    c = classify(path, source_key)
    if c in {"runtime-fact", "new-canonical"}:
        return "active"
    if c.startswith("legacy"):
        return "legacy"
    return "unknown"


def scan_root(root: Path, source_key: str) -> List[Dict[str, str]]:
    rows: List[Dict[str, str]] = []
    if not root.exists():
        return rows
    for fp in root.rglob("*"):
        if not fp.is_file():
            continue
        if fp.suffix.lower() not in EXTS:
            continue
        sp = str(fp)
        if excluded(sp):
            continue
        rows.append(
            {
                "source_key": source_key,
                "path": sp,
                "class": classify(sp, source_key),
                "status": status(sp, source_key),
                "mtime": str(int(fp.stat().st_mtime)),
            }
        )
    return rows


def main() -> int:
    roots = {
        "canonical_repo": Path("/Users/apple/github-projects/microdao-daarion"),
        "legacy_repo": Path("/Users/apple/Desktop/MicroDAO/MicroDAO 3"),
        "worktrees": Path("/Users/apple/github-projects/microdao-daarion/.worktrees"),
        "notebooks": Path("/Users/apple/notebooks"),
        "node1_runtime_snapshot": Path("/Users/apple/github-projects/microdao-daarion/docs/consolidation/_node1_runtime_docs"),
    }

    all_rows: List[Dict[str, str]] = []
    for key, root in roots.items():
        all_rows.extend(scan_root(root, key))

    all_rows.sort(key=lambda r: (r["source_key"], r["path"]))

    out_csv = Path("/Users/apple/github-projects/microdao-daarion/docs/consolidation/docs_inventory.csv")
    out_csv.parent.mkdir(parents=True, exist_ok=True)
    with out_csv.open("w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=["source_key", "path", "class", "status", "mtime"])
        w.writeheader()
        for row in all_rows:
            w.writerow(row)

    # Tiny summary for quick review
    summary: Dict[str, int] = {}
    for row in all_rows:
        summary[row["class"]] = summary.get(row["class"], 0) + 1
    out_sum = Path("/Users/apple/github-projects/microdao-daarion/docs/consolidation/docs_inventory_summary.txt")
    with out_sum.open("w", encoding="utf-8") as f:
        f.write(f"total={len(all_rows)}\n")
        for k in sorted(summary.keys()):
            f.write(f"{k}={summary[k]}\n")

    print(f"Wrote: {out_csv}")
    print(f"Wrote: {out_sum}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())