feat(platform): add new services, tools, tests and crews modules

New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
2026-03-03 07:14:14 -08:00
parent e9dedffa48
commit 129e4ea1fc
241 changed files with 69349 additions and 0 deletions
--- a/tests/test_voice_stream.py
+++ b/tests/test_voice_stream.py
@@ -0,0 +1,220 @@
+"""
+Phase 2 Voice Stream tests.
+
+Covers:
+1. _split_into_voice_chunks: sentence splitting logic
+2. BFF endpoint contract: /api/voice/chat/stream response schema
+3. Edge cases: empty text, single sentence, very long text, <think> in output
+4. Phase 2 regression: ensure old /api/voice/tts still works (no regression)
+"""
+import re
+import sys
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+REPO_ROOT = Path(__file__).parent.parent
+BFF_PATH = REPO_ROOT / "services" / "sofiia-console" / "app"
+sys.path.insert(0, str(BFF_PATH))
+
+# voice_utils is a standalone module — importable without FastAPI
+from voice_utils import split_into_voice_chunks as _split
+from voice_utils import MIN_CHUNK_CHARS as _MIN, MAX_CHUNK_CHARS as _MAX
+
+
+# ── 1. Sentence splitting ─────────────────────────────────────────────────────
+
+class TestSentenceSplitter:
+
+    pass  # splitter always available via voice_utils
+
+    def test_single_sentence(self):
+        result = _split("Привіт. Як справи?")
+        assert len(result) >= 1
+        # All content preserved
+        assert "Привіт" in " ".join(result)
+        assert "справи" in " ".join(result)
+
+    def test_two_sentences_split(self):
+        text = "Перше речення. Друге речення."
+        result = _split(text)
+        assert len(result) >= 1
+        full = " ".join(result)
+        assert "Перше" in full
+        assert "Друге" in full
+
+    def test_three_sentences(self):
+        text = "Я Sofiia. Я архітектор DAARION. Мій дім — NODA2."
+        result = _split(text)
+        # Should produce 2-3 chunks (short sentences may merge)
+        assert 1 <= len(result) <= 3
+        full = " ".join(result)
+        assert "Sofiia" in full
+        assert "NODA2" in full
+
+    def test_no_chunk_exceeds_max_chars(self):
+        long_text = "Слово " * 60  # ~360 chars
+        result = _split(long_text)
+        for chunk in result:
+            assert len(chunk) <= _MAX + 10, f"Chunk too long ({len(chunk)}): {chunk[:50]}..."
+
+    def test_empty_text(self):
+        result = _split("")
+        assert result == [] or result == [""]
+
+    def test_single_very_long_sentence(self):
+        text = "а" * 400  # single word no punctuation
+        result = _split(text)
+        assert len(result) >= 1
+        for chunk in result:
+            assert len(chunk) <= _MAX + 10
+
+    def test_think_tags_not_present_in_chunks(self):
+        text = "<think>міркування</think> Відповідь на питання. Це тест."
+        # Splitter should work on pre-cleaned text (after _clean_think_blocks)
+        import re
+        cleaned = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
+        result = _split(cleaned.strip())
+        combined = " ".join(result)
+        assert "<think>" not in combined
+        assert "міркування" not in combined
+
+    def test_preserves_ukrainian_punctuation(self):
+        text = "Добрий день! Як я можу допомогти? Ось відповідь."
+        result = _split(text)
+        full = " ".join(result)
+        assert "Добрий" in full
+        assert "відповідь" in full
+
+    def test_no_tiny_standalone_chunks(self):
+        text = "Ок. Добре. Чудово! Так само. Зрозуміло."
+        result = _split(text)
+        # Short fragments should be merged
+        for chunk in result:
+            assert len(chunk) >= _MIN or len(result) == 1, (
+                f"Tiny standalone chunk found: '{chunk}'"
+            )
+
+    def test_markdown_stripped_before_split(self):
+        # The BFF strips markdown before TTS; splitting should handle it fine
+        text = "**Відповідь**: перше речення. - Список: не для голосу. Кінець."
+        result = _split(text)
+        assert result  # should not crash
+
+
+# ── 2. BFF endpoint contract (mock-based) ────────────────────────────────────
+
+class TestVoiceChatStreamContract:
+    """Tests the /api/voice/chat/stream response schema contract."""
+
+    @pytest.fixture
+    def mock_ollama_response(self):
+        """Mock Ollama returning a 2-sentence response."""
+        mock = AsyncMock()
+        mock.status_code = 200
+        mock.raise_for_status = MagicMock()
+        mock.json = MagicMock(return_value={
+            "message": {"content": "Перше речення про NODA2. Друге речення про архітектуру."}
+        })
+        return mock
+
+    @pytest.fixture
+    def mock_tts_response(self):
+        """Mock TTS returning MP3 bytes."""
+        import base64
+        mock = AsyncMock()
+        mock.status_code = 200
+        mock.raise_for_status = MagicMock()
+        # Minimal valid "audio" bytes (not real MP3 but enough for b64)
+        mock.content = b"\xff\xfb\x90\x00" + b"\x00" * 100
+        mock.headers = {"content-type": "audio/mpeg", "X-TTS-Compute-MS": "450"}
+        return mock
+
+    def test_response_schema_keys(self):
+        """Document expected keys in /api/voice/chat/stream response."""
+        required_keys = {
+            "ok", "trace_id", "first_text", "first_audio_b64",
+            "first_audio_mime", "rest_chunks", "full_text", "meta"
+        }
+        meta_keys = {"llm_ms", "tts_ms", "chunks_total", "voice", "model", "voice_profile"}
+        # Contract assertion (documentation test)
+        assert required_keys  # always passes — documents the contract
+        assert meta_keys
+
+    def test_first_chunk_is_first_sentence(self):
+        full = "Це перше речення. Це друге речення. Третє."
+        chunks = _split(full)
+        assert chunks[0] in full
+        # first chunk is a prefix or early part of full text
+        assert full.startswith(chunks[0]) or chunks[0] in full
+
+    def test_rest_chunks_plus_first_equals_full(self):
+        full = "Перше речення про NODA2. Друге речення. Третє речення для тесту."
+        chunks = _split(full)
+        combined = " ".join(chunks)
+        # Normalize whitespace for comparison
+        norm = lambda s: re.sub(r"\s+", " ", s).strip()
+        assert norm(combined) == norm(full)
+
+    def test_single_sentence_has_empty_rest_chunks(self):
+        single = "Це єдине речення відповіді."
+        chunks = _split(single)
+        assert len(chunks) == 1
+        # BFF: rest_chunks = chunks[1:] = []
+
+    def test_trace_id_format(self):
+        """trace_id must start with 'vs_'."""
+        import uuid
+        sid = "sess_abc"
+        trace_id = f"vs_{sid}_{uuid.uuid4().hex[:8]}"
+        assert trace_id.startswith("vs_")
+        assert len(trace_id) > 10
+
+
+# ── 3. Policy regression: max_tokens ≤ 256 ────────────────────────────────────
+
+class TestPhase2PolicyRegression:
+
+    def test_bff_num_predict_quality_is_256(self):
+        src = (BFF_PATH / "main.py").read_text()
+        # voice_quality: num_predict=256
+        assert "num_predict\": 256 if _is_quality" in src or \
+               "num_predict': 256 if _is_quality" in src or \
+               '"num_predict": 256' in src, \
+               "BFF voice quality num_predict must be ≤256"
+
+    def test_bff_stream_endpoint_exists(self):
+        src = (BFF_PATH / "main.py").read_text()
+        assert "/api/voice/chat/stream" in src, \
+            "Phase 2 endpoint /api/voice/chat/stream not found in BFF"
+
+    def test_bff_stream_uses_voice_guardrails(self):
+        src = (BFF_PATH / "main.py").read_text()
+        # Check that stream endpoint uses SOFIIA_VOICE_PROMPT_SUFFIX
+        assert "SOFIIA_VOICE_PROMPT_SUFFIX" in src, \
+            "voice stream must use SOFIIA_VOICE_PROMPT_SUFFIX"
+
+    def test_bff_stream_cleans_think_tags(self):
+        src = (BFF_PATH / "main.py").read_text()
+        # _clean function defined in stream handler
+        assert "re.sub" in src and "<think>" in src, \
+            "voice stream endpoint must strip <think> tags"
+
+    def test_sentence_splitter_exported(self):
+        src = (BFF_PATH / "main.py").read_text()
+        assert "_split_into_voice_chunks" in src, \
+            "_split_into_voice_chunks function not found in BFF main.py"
+
+    def test_ui_has_stream_mode_checkbox(self):
+        ui_html = (REPO_ROOT / "services" / "sofiia-console" / "static" / "index.html").read_text()
+        assert "streamMode" in ui_html, "streamMode checkbox not found in UI"
+        assert "voiceChatStream" in ui_html, "voiceChatStream function not found in UI"
+        assert "_audioQueue" in ui_html, "audio queue not found in UI"
+        assert "_audioQueuePush" in ui_html, "_audioQueuePush not found in UI"
+
+    def test_ui_stream_default_enabled(self):
+        ui_html = (REPO_ROOT / "services" / "sofiia-console" / "static" / "index.html").read_text()
+        # streamMode checkbox should be checked by default
+        assert 'id="streamMode" checked' in ui_html, \
+            "streamMode should be checked by default for Phase 2"