GET /api/runbooks/status returns docs_root, indexed_files, indexed_chunks, last_indexed_at, fts_available; docs_index_meta table and set on rebuild Made-with: Cursor
80 lines
2.7 KiB
Python
80 lines
2.7 KiB
Python
"""
|
|
Docs index builder — scan docs/**/*.md, chunk, and populate docs_files / docs_chunks / docs_chunks_fts.
|
|
Run manually: python -m app.docs_index --rebuild
|
|
Or from repo root: SOFIIA_DOCS_ROOT=./docs python -m app.docs_index --rebuild
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from . import db as _db
|
|
from .docs_store import clear_docs_index, insert_docs_file, set_docs_index_meta
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def get_docs_root() -> Path:
|
|
"""SOFIIA_DOCS_ROOT or repo/docs (sofiia-console lives in repo/services/sofiia-console)."""
|
|
env = os.getenv("SOFIIA_DOCS_ROOT", "").strip()
|
|
if env:
|
|
return Path(env).resolve()
|
|
# app/docs_index.py -> app -> sofiia-console -> services -> repo
|
|
repo = Path(__file__).resolve().parent.parent.parent.parent
|
|
return (repo / "docs").resolve()
|
|
|
|
|
|
async def rebuild_index(docs_root: Optional[Path] = None) -> int:
|
|
"""Scan docs_root for **/*.md, clear index, insert all. Returns count of files indexed."""
|
|
root = docs_root or get_docs_root()
|
|
if not root.is_dir():
|
|
logger.warning("Docs root not found: %s", root)
|
|
return 0
|
|
await _db.init_db()
|
|
await clear_docs_index()
|
|
count = 0
|
|
for path in sorted(root.rglob("*.md")):
|
|
try:
|
|
content = path.read_text(encoding="utf-8", errors="replace")
|
|
except Exception as e:
|
|
logger.warning("Skip %s: %s", path, e)
|
|
continue
|
|
# Store path relative to repo (docs/runbook/...) for stable IDs
|
|
try:
|
|
rel = path.relative_to(root)
|
|
except ValueError:
|
|
rel = path.name
|
|
path_key = str(rel).replace("\\", "/")
|
|
mtime = path.stat().st_mtime
|
|
await insert_docs_file(path_key, mtime, content)
|
|
count += 1
|
|
last_indexed_at = datetime.now(timezone.utc).isoformat()
|
|
await set_docs_index_meta(str(root), last_indexed_at, sha="")
|
|
logger.info("Docs index rebuilt: %s files from %s", count, root)
|
|
return count
|
|
|
|
|
|
def main() -> int:
|
|
import argparse
|
|
p = argparse.ArgumentParser(description="Rebuild docs FTS index")
|
|
p.add_argument("--rebuild", action="store_true", help="Clear and rebuild index")
|
|
p.add_argument("--docs-root", type=str, default=None, help="Override docs directory")
|
|
args = p.parse_args()
|
|
if not args.rebuild:
|
|
print("Use --rebuild to rebuild index.", file=sys.stderr)
|
|
return 1
|
|
logging.basicConfig(level=logging.INFO)
|
|
root = Path(args.docs_root).resolve() if args.docs_root else None
|
|
n = asyncio.run(rebuild_index(root))
|
|
print(f"Indexed {n} files.")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|