diff --git a/services/sofiia-console/app/docs_store.py b/services/sofiia-console/app/docs_store.py index 408059d0..86041a3a 100644 --- a/services/sofiia-console/app/docs_store.py +++ b/services/sofiia-console/app/docs_store.py @@ -181,14 +181,15 @@ async def search_docs( params.append(limit) try: - # FTS5: snippet(build, col_idx, left, right, ellipsis, max_tokens) - # columns: 0=chunk_id, 1=path, 2=heading, 3=content → snippet column 3 + # FTS5: bm25 (lower = better), snippet; ORDER BY bm25 ASC for best-first async with conn.execute( f""" - SELECT f.path, f.title, snippet(docs_chunks_fts, 3, '**', '**', '...', {_SNIPPET_LEN//5}) AS snippet + SELECT f.path, f.title, snippet(docs_chunks_fts, 3, '**', '**', '...', {_SNIPPET_LEN//5}) AS snippet, + bm25(docs_chunks_fts) AS score FROM docs_chunks_fts AS fts JOIN docs_files f ON f.path = fts.path WHERE docs_chunks_fts MATCH ? {type_filter} + ORDER BY bm25(docs_chunks_fts) ASC LIMIT ? """, params, @@ -198,10 +199,15 @@ async def search_docs( logger.warning("FTS search failed, fallback to LIKE: %s", e) return await _search_docs_like(q_clean, doc_type, limit) - result = [ - {"path": r[0], "title": r[1] or "", "snippet": (r[2] or "").strip(), "score": 1.0} - for r in (rows or []) - ] + result = [] + for r in (rows or []): + score_val = float(r[3]) if r[3] is not None and len(r) > 3 else 0.0 + result.append({ + "path": r[0], + "title": r[1] or "", + "snippet": (r[2] or "").strip(), + "score": score_val, + }) if not result: return await _search_docs_like(q_clean, doc_type, limit) return result @@ -232,7 +238,7 @@ async def _search_docs_like( ) as cur: rows = await cur.fetchall() return [ - {"path": r[0], "title": r[1] or "", "snippet": (r[2] or "").strip(), "score": 0.5} + {"path": r[0], "title": r[1] or "", "snippet": (r[2] or "").strip(), "score": None} for r in (rows or []) ] diff --git a/tests/test_sofiia_docs_search.py b/tests/test_sofiia_docs_search.py index d892df59..ad160b53 100644 --- a/tests/test_sofiia_docs_search.py +++ b/tests/test_sofiia_docs_search.py @@ -49,6 +49,8 @@ def test_runbooks_search_finds_rehearsal(sofiia_module, tmp_path, tmp_docs_with_ assert any("rehearsal" in p for p in paths), f"Expected path containing 'rehearsal', got {paths}" first = items[0] assert "path" in first and "title" in first and "snippet" in first + assert "score" in first + assert first["score"] is None or isinstance(first["score"], (int, float)) def test_runbooks_preview_returns_headings(sofiia_module, sofiia_client, tmp_path, tmp_docs_with_rehearsal, monkeypatch):