Files
microdao-daarion/tests/test_sofiia_docs_search.py
Apple 4db1774a34 feat(sofiia-console): rank runbook search results with bm25
FTS path: score = bm25(docs_chunks_fts), ORDER BY score ASC; LIKE fallback: score null; test asserts score key present

Made-with: Cursor
2026-03-03 04:36:52 -08:00

118 lines
4.6 KiB
Python

"""
Tests for runbooks/docs search API (PR1.1): search and preview.
Uses tmp docs dir and rebuild_index; no network.
"""
from __future__ import annotations
import asyncio
from pathlib import Path
import httpx
import pytest
from httpx import ASGITransport
@pytest.fixture
def tmp_docs_with_rehearsal(tmp_path):
"""Create tmp_path/docs/runbook with a rehearsal checklist file."""
docs_root = tmp_path / "docs"
runbook_dir = docs_root / "runbook"
runbook_dir.mkdir(parents=True)
(runbook_dir / "rehearsal-v1-30min-checklist.md").write_text(
"# Rehearsal v1 — 30-minute execution plan\n\n"
"## Preflight\n\n"
"Run STRICT=1 bash ops/preflight_sofiia_console.sh\n\n"
"## Smoke\n\n"
"Idempotency and audit auth checks.",
encoding="utf-8",
)
return docs_root
def test_runbooks_search_finds_rehearsal(sofiia_module, tmp_path, tmp_docs_with_rehearsal, monkeypatch):
"""Search for 'rehearsal' returns the checklist path and snippet."""
import app.docs_index as docs_index_mod
import app.docs_store as docs_store_mod
monkeypatch.setenv("SOFIIA_DATA_DIR", str(tmp_path / "sofiia-data"))
loop = asyncio.get_event_loop()
async def run():
await docs_index_mod.rebuild_index(tmp_docs_with_rehearsal)
# Direct store call (same loop/conn) to verify index
items = await docs_store_mod.search_docs("rehearsal", limit=5)
return items
items = loop.run_until_complete(run())
assert len(items) >= 1, "search_docs should return at least one hit for 'rehearsal'"
paths = [x["path"] for x in items]
assert any("rehearsal" in p for p in paths), f"Expected path containing 'rehearsal', got {paths}"
first = items[0]
assert "path" in first and "title" in first and "snippet" in first
assert "score" in first
assert first["score"] is None or isinstance(first["score"], (int, float))
def test_runbooks_preview_returns_headings(sofiia_module, sofiia_client, tmp_path, tmp_docs_with_rehearsal, monkeypatch):
"""Preview returns path, title, sections with heading and excerpt."""
import app.docs_index as docs_index_mod
monkeypatch.setenv("SOFIIA_DATA_DIR", str(tmp_path / "sofiia-data"))
loop = asyncio.get_event_loop()
loop.run_until_complete(docs_index_mod.rebuild_index(tmp_docs_with_rehearsal))
r = sofiia_client.get("/api/runbooks/preview?path=runbook/rehearsal-v1-30min-checklist.md")
assert r.status_code == 200, r.text
data = r.json()
assert data["path"] == "runbook/rehearsal-v1-30min-checklist.md"
assert "Rehearsal" in (data.get("title") or "")
assert "sections" in data
assert len(data["sections"]) >= 1
assert any("Preflight" in (s.get("heading") or "") for s in data["sections"])
def test_runbooks_search_filter_doc_type(sofiia_module, sofiia_client, tmp_path, tmp_docs_with_rehearsal, monkeypatch):
"""Search with doc_type=runbook returns only runbook paths."""
import app.docs_index as docs_index_mod
monkeypatch.setenv("SOFIIA_DATA_DIR", str(tmp_path / "sofiia-data"))
loop = asyncio.get_event_loop()
loop.run_until_complete(docs_index_mod.rebuild_index(tmp_docs_with_rehearsal))
r = sofiia_client.get("/api/runbooks/search?q=rehearsal&doc_type=runbook&limit=5")
assert r.status_code == 200, r.text
for item in r.json().get("items", []):
assert "runbook" in item["path"] or item["path"].startswith("runbook/")
def test_runbooks_preview_404_for_unknown_path(sofiia_client):
"""Preview returns 404 for path not in index."""
r = sofiia_client.get("/api/runbooks/preview?path=runbook/nonexistent-file.md")
assert r.status_code == 404
def test_runbooks_raw_400_for_invalid_path(sofiia_client):
"""Raw returns 400 for path traversal attempt."""
r = sofiia_client.get("/api/runbooks/raw?path=../../../etc/passwd")
assert r.status_code == 400
def test_runbooks_status_after_rebuild(sofiia_module, tmp_path, tmp_docs_with_rehearsal, monkeypatch):
"""After rebuild, status shows indexed_files > 0, indexed_chunks > 0, last_indexed_at set."""
import app.docs_index as docs_index_mod
import app.docs_store as docs_store_mod
monkeypatch.setenv("SOFIIA_DATA_DIR", str(tmp_path / "sofiia-data"))
loop = asyncio.get_event_loop()
async def run():
await docs_index_mod.rebuild_index(tmp_docs_with_rehearsal)
return await docs_store_mod.get_docs_index_status()
status = loop.run_until_complete(run())
assert status["indexed_files"] >= 1, status
assert status["indexed_chunks"] >= 1, status
assert status.get("last_indexed_at") is not None, status
assert "docs_root" in status
assert "fts_available" in status