microdao-daarion/tests/test_voice_stream.py

"""
Phase 2 Voice Stream tests.

Covers:
1. _split_into_voice_chunks: sentence splitting logic
2. BFF endpoint contract: /api/voice/chat/stream response schema
3. Edge cases: empty text, single sentence, very long text, <think> in output
4. Phase 2 regression: ensure old /api/voice/tts still works (no regression)
"""
import re
import sys
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

REPO_ROOT = Path(__file__).parent.parent
BFF_PATH = REPO_ROOT / "services" / "sofiia-console" / "app"
sys.path.insert(0, str(BFF_PATH))

# voice_utils is a standalone module — importable without FastAPI
from voice_utils import split_into_voice_chunks as _split
from voice_utils import MIN_CHUNK_CHARS as _MIN, MAX_CHUNK_CHARS as _MAX


# ── 1. Sentence splitting ─────────────────────────────────────────────────────

class TestSentenceSplitter:

    pass  # splitter always available via voice_utils

    def test_single_sentence(self):
        result = _split("Привіт. Як справи?")
        assert len(result) >= 1
        # All content preserved
        assert "Привіт" in " ".join(result)
        assert "справи" in " ".join(result)

    def test_two_sentences_split(self):
        text = "Перше речення. Друге речення."
        result = _split(text)
        assert len(result) >= 1
        full = " ".join(result)
        assert "Перше" in full
        assert "Друге" in full

    def test_three_sentences(self):
        text = "Я Sofiia. Я архітектор DAARION. Мій дім — NODA2."
        result = _split(text)
        # Should produce 2-3 chunks (short sentences may merge)
        assert 1 <= len(result) <= 3
        full = " ".join(result)
        assert "Sofiia" in full
        assert "NODA2" in full

    def test_no_chunk_exceeds_max_chars(self):
        long_text = "Слово " * 60  # ~360 chars
        result = _split(long_text)
        for chunk in result:
            assert len(chunk) <= _MAX + 10, f"Chunk too long ({len(chunk)}): {chunk[:50]}..."

    def test_empty_text(self):
        result = _split("")
        assert result == [] or result == [""]

    def test_single_very_long_sentence(self):
        text = "а" * 400  # single word no punctuation
        result = _split(text)
        assert len(result) >= 1
        for chunk in result:
            assert len(chunk) <= _MAX + 10

    def test_think_tags_not_present_in_chunks(self):
        text = "<think>міркування</think> Відповідь на питання. Це тест."
        # Splitter should work on pre-cleaned text (after _clean_think_blocks)
        import re
        cleaned = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
        result = _split(cleaned.strip())
        combined = " ".join(result)
        assert "<think>" not in combined
        assert "міркування" not in combined

    def test_preserves_ukrainian_punctuation(self):
        text = "Добрий день! Як я можу допомогти? Ось відповідь."
        result = _split(text)
        full = " ".join(result)
        assert "Добрий" in full
        assert "відповідь" in full

    def test_no_tiny_standalone_chunks(self):
        text = "Ок. Добре. Чудово! Так само. Зрозуміло."
        result = _split(text)
        # Short fragments should be merged
        for chunk in result:
            assert len(chunk) >= _MIN or len(result) == 1, (
                f"Tiny standalone chunk found: '{chunk}'"
            )

    def test_markdown_stripped_before_split(self):
        # The BFF strips markdown before TTS; splitting should handle it fine
        text = "**Відповідь**: перше речення. - Список: не для голосу. Кінець."
        result = _split(text)
        assert result  # should not crash


# ── 2. BFF endpoint contract (mock-based) ────────────────────────────────────

class TestVoiceChatStreamContract:
    """Tests the /api/voice/chat/stream response schema contract."""

    @pytest.fixture
    def mock_ollama_response(self):
        """Mock Ollama returning a 2-sentence response."""
        mock = AsyncMock()
        mock.status_code = 200
        mock.raise_for_status = MagicMock()
        mock.json = MagicMock(return_value={
            "message": {"content": "Перше речення про NODA2. Друге речення про архітектуру."}
        })
        return mock

    @pytest.fixture
    def mock_tts_response(self):
        """Mock TTS returning MP3 bytes."""
        import base64
        mock = AsyncMock()
        mock.status_code = 200
        mock.raise_for_status = MagicMock()
        # Minimal valid "audio" bytes (not real MP3 but enough for b64)
        mock.content = b"\xff\xfb\x90\x00" + b"\x00" * 100
        mock.headers = {"content-type": "audio/mpeg", "X-TTS-Compute-MS": "450"}
        return mock

    def test_response_schema_keys(self):
        """Document expected keys in /api/voice/chat/stream response."""
        required_keys = {
            "ok", "trace_id", "first_text", "first_audio_b64",
            "first_audio_mime", "rest_chunks", "full_text", "meta"
        }
        meta_keys = {"llm_ms", "tts_ms", "chunks_total", "voice", "model", "voice_profile"}
        # Contract assertion (documentation test)
        assert required_keys  # always passes — documents the contract
        assert meta_keys

    def test_first_chunk_is_first_sentence(self):
        full = "Це перше речення. Це друге речення. Третє."
        chunks = _split(full)
        assert chunks[0] in full
        # first chunk is a prefix or early part of full text
        assert full.startswith(chunks[0]) or chunks[0] in full

    def test_rest_chunks_plus_first_equals_full(self):
        full = "Перше речення про NODA2. Друге речення. Третє речення для тесту."
        chunks = _split(full)
        combined = " ".join(chunks)
        # Normalize whitespace for comparison
        norm = lambda s: re.sub(r"\s+", " ", s).strip()
        assert norm(combined) == norm(full)

    def test_single_sentence_has_empty_rest_chunks(self):
        single = "Це єдине речення відповіді."
        chunks = _split(single)
        assert len(chunks) == 1
        # BFF: rest_chunks = chunks[1:] = []

    def test_trace_id_format(self):
        """trace_id must start with 'vs_'."""
        import uuid
        sid = "sess_abc"
        trace_id = f"vs_{sid}_{uuid.uuid4().hex[:8]}"
        assert trace_id.startswith("vs_")
        assert len(trace_id) > 10


# ── 3. Policy regression: max_tokens ≤ 256 ────────────────────────────────────

class TestPhase2PolicyRegression:

    def test_bff_num_predict_quality_is_256(self):
        src = (BFF_PATH / "main.py").read_text()
        # voice_quality: num_predict=256
        assert "num_predict\": 256 if _is_quality" in src or \
               "num_predict': 256 if _is_quality" in src or \
               '"num_predict": 256' in src, \
               "BFF voice quality num_predict must be ≤256"

    def test_bff_stream_endpoint_exists(self):
        src = (BFF_PATH / "main.py").read_text()
        assert "/api/voice/chat/stream" in src, \
            "Phase 2 endpoint /api/voice/chat/stream not found in BFF"

    def test_bff_stream_uses_voice_guardrails(self):
        src = (BFF_PATH / "main.py").read_text()
        # Check that stream endpoint uses SOFIIA_VOICE_PROMPT_SUFFIX
        assert "SOFIIA_VOICE_PROMPT_SUFFIX" in src, \
            "voice stream must use SOFIIA_VOICE_PROMPT_SUFFIX"

    def test_bff_stream_cleans_think_tags(self):
        src = (BFF_PATH / "main.py").read_text()
        # _clean function defined in stream handler
        assert "re.sub" in src and "<think>" in src, \
            "voice stream endpoint must strip <think> tags"

    def test_sentence_splitter_exported(self):
        src = (BFF_PATH / "main.py").read_text()
        assert "_split_into_voice_chunks" in src, \
            "_split_into_voice_chunks function not found in BFF main.py"

    def test_ui_has_stream_mode_checkbox(self):
        ui_html = (REPO_ROOT / "services" / "sofiia-console" / "static" / "index.html").read_text()
        assert "streamMode" in ui_html, "streamMode checkbox not found in UI"
        assert "voiceChatStream" in ui_html, "voiceChatStream function not found in UI"
        assert "_audioQueue" in ui_html, "audio queue not found in UI"
        assert "_audioQueuePush" in ui_html, "_audioQueuePush not found in UI"

    def test_ui_stream_default_enabled(self):
        ui_html = (REPO_ROOT / "services" / "sofiia-console" / "static" / "index.html").read_text()
        # streamMode checkbox should be checked by default
        assert 'id="streamMode" checked' in ui_html, \
            "streamMode should be checked by default for Phase 2"