feat(sofiia-console): rank runbook search results with bm25

FTS path: score = bm25(docs_chunks_fts), ORDER BY score ASC; LIKE fallback: score null; test asserts score key present

Made-with: Cursor
This commit is contained in:
Apple
2026-03-03 04:36:52 -08:00
parent 63fec4371a
commit 4db1774a34
2 changed files with 16 additions and 8 deletions

View File

@@ -181,14 +181,15 @@ async def search_docs(
params.append(limit)
try:
# FTS5: snippet(build, col_idx, left, right, ellipsis, max_tokens)
# columns: 0=chunk_id, 1=path, 2=heading, 3=content → snippet column 3
# FTS5: bm25 (lower = better), snippet; ORDER BY bm25 ASC for best-first
async with conn.execute(
f"""
SELECT f.path, f.title, snippet(docs_chunks_fts, 3, '**', '**', '...', {_SNIPPET_LEN//5}) AS snippet
SELECT f.path, f.title, snippet(docs_chunks_fts, 3, '**', '**', '...', {_SNIPPET_LEN//5}) AS snippet,
bm25(docs_chunks_fts) AS score
FROM docs_chunks_fts AS fts
JOIN docs_files f ON f.path = fts.path
WHERE docs_chunks_fts MATCH ? {type_filter}
ORDER BY bm25(docs_chunks_fts) ASC
LIMIT ?
""",
params,
@@ -198,10 +199,15 @@ async def search_docs(
logger.warning("FTS search failed, fallback to LIKE: %s", e)
return await _search_docs_like(q_clean, doc_type, limit)
result = [
{"path": r[0], "title": r[1] or "", "snippet": (r[2] or "").strip(), "score": 1.0}
for r in (rows or [])
]
result = []
for r in (rows or []):
score_val = float(r[3]) if r[3] is not None and len(r) > 3 else 0.0
result.append({
"path": r[0],
"title": r[1] or "",
"snippet": (r[2] or "").strip(),
"score": score_val,
})
if not result:
return await _search_docs_like(q_clean, doc_type, limit)
return result
@@ -232,7 +238,7 @@ async def _search_docs_like(
) as cur:
rows = await cur.fetchall()
return [
{"path": r[0], "title": r[1] or "", "snippet": (r[2] or "").strip(), "score": 0.5}
{"path": r[0], "title": r[1] or "", "snippet": (r[2] or "").strip(), "score": None}
for r in (rows or [])
]

View File

@@ -49,6 +49,8 @@ def test_runbooks_search_finds_rehearsal(sofiia_module, tmp_path, tmp_docs_with_
assert any("rehearsal" in p for p in paths), f"Expected path containing 'rehearsal', got {paths}"
first = items[0]
assert "path" in first and "title" in first and "snippet" in first
assert "score" in first
assert first["score"] is None or isinstance(first["score"], (int, float))
def test_runbooks_preview_returns_headings(sofiia_module, sofiia_client, tmp_path, tmp_docs_with_rehearsal, monkeypatch):