""" Docs index builder — scan docs/**/*.md, chunk, and populate docs_files / docs_chunks / docs_chunks_fts. Run manually: python -m app.docs_index --rebuild Or from repo root: SOFIIA_DOCS_ROOT=./docs python -m app.docs_index --rebuild """ from __future__ import annotations import asyncio import logging import os import sys from datetime import datetime, timezone from pathlib import Path from typing import Optional from . import db as _db from .docs_store import clear_docs_index, insert_docs_file, set_docs_index_meta logger = logging.getLogger(__name__) def get_docs_root() -> Path: """SOFIIA_DOCS_ROOT or repo/docs (sofiia-console lives in repo/services/sofiia-console).""" env = os.getenv("SOFIIA_DOCS_ROOT", "").strip() if env: return Path(env).resolve() # app/docs_index.py -> app -> sofiia-console -> services -> repo repo = Path(__file__).resolve().parent.parent.parent.parent return (repo / "docs").resolve() async def rebuild_index(docs_root: Optional[Path] = None) -> int: """Scan docs_root for **/*.md, clear index, insert all. Returns count of files indexed.""" root = docs_root or get_docs_root() if not root.is_dir(): logger.warning("Docs root not found: %s", root) return 0 await _db.init_db() await clear_docs_index() count = 0 for path in sorted(root.rglob("*.md")): try: content = path.read_text(encoding="utf-8", errors="replace") except Exception as e: logger.warning("Skip %s: %s", path, e) continue # Store path relative to repo (docs/runbook/...) for stable IDs try: rel = path.relative_to(root) except ValueError: rel = path.name path_key = str(rel).replace("\\", "/") mtime = path.stat().st_mtime await insert_docs_file(path_key, mtime, content) count += 1 last_indexed_at = datetime.now(timezone.utc).isoformat() await set_docs_index_meta(str(root), last_indexed_at, sha="") logger.info("Docs index rebuilt: %s files from %s", count, root) return count def main() -> int: import argparse p = argparse.ArgumentParser(description="Rebuild docs FTS index") p.add_argument("--rebuild", action="store_true", help="Clear and rebuild index") p.add_argument("--docs-root", type=str, default=None, help="Override docs directory") args = p.parse_args() if not args.rebuild: print("Use --rebuild to rebuild index.", file=sys.stderr) return 1 logging.basicConfig(level=logging.INFO) root = Path(args.docs_root).resolve() if args.docs_root else None n = asyncio.run(rebuild_index(root)) print(f"Indexed {n} files.") return 0 if __name__ == "__main__": sys.exit(main())