feat(sofiia-console): rank runbook search results with bm25
FTS path: score = bm25(docs_chunks_fts), ORDER BY score ASC; LIKE fallback: score null; test asserts score key present Made-with: Cursor
This commit is contained in:
@@ -181,14 +181,15 @@ async def search_docs(
|
|||||||
params.append(limit)
|
params.append(limit)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# FTS5: snippet(build, col_idx, left, right, ellipsis, max_tokens)
|
# FTS5: bm25 (lower = better), snippet; ORDER BY bm25 ASC for best-first
|
||||||
# columns: 0=chunk_id, 1=path, 2=heading, 3=content → snippet column 3
|
|
||||||
async with conn.execute(
|
async with conn.execute(
|
||||||
f"""
|
f"""
|
||||||
SELECT f.path, f.title, snippet(docs_chunks_fts, 3, '**', '**', '...', {_SNIPPET_LEN//5}) AS snippet
|
SELECT f.path, f.title, snippet(docs_chunks_fts, 3, '**', '**', '...', {_SNIPPET_LEN//5}) AS snippet,
|
||||||
|
bm25(docs_chunks_fts) AS score
|
||||||
FROM docs_chunks_fts AS fts
|
FROM docs_chunks_fts AS fts
|
||||||
JOIN docs_files f ON f.path = fts.path
|
JOIN docs_files f ON f.path = fts.path
|
||||||
WHERE docs_chunks_fts MATCH ? {type_filter}
|
WHERE docs_chunks_fts MATCH ? {type_filter}
|
||||||
|
ORDER BY bm25(docs_chunks_fts) ASC
|
||||||
LIMIT ?
|
LIMIT ?
|
||||||
""",
|
""",
|
||||||
params,
|
params,
|
||||||
@@ -198,10 +199,15 @@ async def search_docs(
|
|||||||
logger.warning("FTS search failed, fallback to LIKE: %s", e)
|
logger.warning("FTS search failed, fallback to LIKE: %s", e)
|
||||||
return await _search_docs_like(q_clean, doc_type, limit)
|
return await _search_docs_like(q_clean, doc_type, limit)
|
||||||
|
|
||||||
result = [
|
result = []
|
||||||
{"path": r[0], "title": r[1] or "", "snippet": (r[2] or "").strip(), "score": 1.0}
|
for r in (rows or []):
|
||||||
for r in (rows or [])
|
score_val = float(r[3]) if r[3] is not None and len(r) > 3 else 0.0
|
||||||
]
|
result.append({
|
||||||
|
"path": r[0],
|
||||||
|
"title": r[1] or "",
|
||||||
|
"snippet": (r[2] or "").strip(),
|
||||||
|
"score": score_val,
|
||||||
|
})
|
||||||
if not result:
|
if not result:
|
||||||
return await _search_docs_like(q_clean, doc_type, limit)
|
return await _search_docs_like(q_clean, doc_type, limit)
|
||||||
return result
|
return result
|
||||||
@@ -232,7 +238,7 @@ async def _search_docs_like(
|
|||||||
) as cur:
|
) as cur:
|
||||||
rows = await cur.fetchall()
|
rows = await cur.fetchall()
|
||||||
return [
|
return [
|
||||||
{"path": r[0], "title": r[1] or "", "snippet": (r[2] or "").strip(), "score": 0.5}
|
{"path": r[0], "title": r[1] or "", "snippet": (r[2] or "").strip(), "score": None}
|
||||||
for r in (rows or [])
|
for r in (rows or [])
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -49,6 +49,8 @@ def test_runbooks_search_finds_rehearsal(sofiia_module, tmp_path, tmp_docs_with_
|
|||||||
assert any("rehearsal" in p for p in paths), f"Expected path containing 'rehearsal', got {paths}"
|
assert any("rehearsal" in p for p in paths), f"Expected path containing 'rehearsal', got {paths}"
|
||||||
first = items[0]
|
first = items[0]
|
||||||
assert "path" in first and "title" in first and "snippet" in first
|
assert "path" in first and "title" in first and "snippet" in first
|
||||||
|
assert "score" in first
|
||||||
|
assert first["score"] is None or isinstance(first["score"], (int, float))
|
||||||
|
|
||||||
|
|
||||||
def test_runbooks_preview_returns_headings(sofiia_module, sofiia_client, tmp_path, tmp_docs_with_rehearsal, monkeypatch):
|
def test_runbooks_preview_returns_headings(sofiia_module, sofiia_client, tmp_path, tmp_docs_with_rehearsal, monkeypatch):
|
||||||
|
|||||||
Reference in New Issue
Block a user