feat(sofiia-console): rank runbook search results with bm25
FTS path: score = bm25(docs_chunks_fts), ORDER BY score ASC; LIKE fallback: score null; test asserts score key present Made-with: Cursor
This commit is contained in:
@@ -181,14 +181,15 @@ async def search_docs(
|
||||
params.append(limit)
|
||||
|
||||
try:
|
||||
# FTS5: snippet(build, col_idx, left, right, ellipsis, max_tokens)
|
||||
# columns: 0=chunk_id, 1=path, 2=heading, 3=content → snippet column 3
|
||||
# FTS5: bm25 (lower = better), snippet; ORDER BY bm25 ASC for best-first
|
||||
async with conn.execute(
|
||||
f"""
|
||||
SELECT f.path, f.title, snippet(docs_chunks_fts, 3, '**', '**', '...', {_SNIPPET_LEN//5}) AS snippet
|
||||
SELECT f.path, f.title, snippet(docs_chunks_fts, 3, '**', '**', '...', {_SNIPPET_LEN//5}) AS snippet,
|
||||
bm25(docs_chunks_fts) AS score
|
||||
FROM docs_chunks_fts AS fts
|
||||
JOIN docs_files f ON f.path = fts.path
|
||||
WHERE docs_chunks_fts MATCH ? {type_filter}
|
||||
ORDER BY bm25(docs_chunks_fts) ASC
|
||||
LIMIT ?
|
||||
""",
|
||||
params,
|
||||
@@ -198,10 +199,15 @@ async def search_docs(
|
||||
logger.warning("FTS search failed, fallback to LIKE: %s", e)
|
||||
return await _search_docs_like(q_clean, doc_type, limit)
|
||||
|
||||
result = [
|
||||
{"path": r[0], "title": r[1] or "", "snippet": (r[2] or "").strip(), "score": 1.0}
|
||||
for r in (rows or [])
|
||||
]
|
||||
result = []
|
||||
for r in (rows or []):
|
||||
score_val = float(r[3]) if r[3] is not None and len(r) > 3 else 0.0
|
||||
result.append({
|
||||
"path": r[0],
|
||||
"title": r[1] or "",
|
||||
"snippet": (r[2] or "").strip(),
|
||||
"score": score_val,
|
||||
})
|
||||
if not result:
|
||||
return await _search_docs_like(q_clean, doc_type, limit)
|
||||
return result
|
||||
@@ -232,7 +238,7 @@ async def _search_docs_like(
|
||||
) as cur:
|
||||
rows = await cur.fetchall()
|
||||
return [
|
||||
{"path": r[0], "title": r[1] or "", "snippet": (r[2] or "").strip(), "score": 0.5}
|
||||
{"path": r[0], "title": r[1] or "", "snippet": (r[2] or "").strip(), "score": None}
|
||||
for r in (rows or [])
|
||||
]
|
||||
|
||||
|
||||
@@ -49,6 +49,8 @@ def test_runbooks_search_finds_rehearsal(sofiia_module, tmp_path, tmp_docs_with_
|
||||
assert any("rehearsal" in p for p in paths), f"Expected path containing 'rehearsal', got {paths}"
|
||||
first = items[0]
|
||||
assert "path" in first and "title" in first and "snippet" in first
|
||||
assert "score" in first
|
||||
assert first["score"] is None or isinstance(first["score"], (int, float))
|
||||
|
||||
|
||||
def test_runbooks_preview_returns_headings(sofiia_module, sofiia_client, tmp_path, tmp_docs_with_rehearsal, monkeypatch):
|
||||
|
||||
Reference in New Issue
Block a user