"""STT provider: delegates to existing Memory Service /voice/stt. Memory Service accepts: multipart/form-data audio file upload. Returns: {text, model, language} Fabric contract output: {text, segments[], language, meta} """ import base64 import logging import os from typing import Any, Dict import httpx logger = logging.getLogger("provider.stt_memory_service") MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000") MAX_AUDIO_BYTES = int(os.getenv("STT_MAX_AUDIO_BYTES", str(25 * 1024 * 1024))) async def _resolve_audio_bytes(payload: Dict[str, Any]) -> tuple[bytes, str, str, str]: """Return (raw_bytes, filename, source, content_type) from audio_b64 or audio_url.""" audio_b64 = payload.get("audio_b64", "") audio_url = payload.get("audio_url", "") filename = payload.get("filename", "audio.wav") if audio_b64: raw = base64.b64decode(audio_b64) if len(raw) > MAX_AUDIO_BYTES: raise ValueError(f"Audio exceeds {MAX_AUDIO_BYTES} bytes") return raw, filename, "b64", "audio/wav" if audio_url: if audio_url.startswith(("file://", "/")): path = audio_url.replace("file://", "") with open(path, "rb") as f: raw = f.read() if len(raw) > MAX_AUDIO_BYTES: raise ValueError(f"Audio exceeds {MAX_AUDIO_BYTES} bytes") ext = path.rsplit(".", 1)[-1] if "." in path else "wav" return raw, f"audio.{ext}", "file", f"audio/{ext}" # HTTP URL — check Content-Length header first if available async with httpx.AsyncClient(timeout=30) as c: try: head_resp = await c.head(audio_url) content_length = int(head_resp.headers.get("content-length", 0)) if content_length > MAX_AUDIO_BYTES: raise ValueError(f"Audio URL Content-Length {content_length} exceeds {MAX_AUDIO_BYTES} bytes") content_type = head_resp.headers.get("content-type", "audio/wav") except httpx.HTTPError: content_type = "audio/wav" resp = await c.get(audio_url) resp.raise_for_status() raw = resp.content content_type = resp.headers.get("content-type", content_type) if len(raw) > MAX_AUDIO_BYTES: raise ValueError(f"Audio exceeds {MAX_AUDIO_BYTES} bytes") ext = content_type.split("/")[-1].split(";")[0] or "wav" return raw, f"audio.{ext}", "url", content_type raise ValueError("audio_b64 or audio_url is required") async def transcribe(payload: Dict[str, Any]) -> Dict[str, Any]: """Fabric STT entry point — delegates to Memory Service. Payload: audio_url: str (http/file) — OR — audio_b64: str (base64) language: str (optional, e.g. "uk", "en") filename: str (optional, helps whisper detect format) Returns Fabric contract: {text, segments[], language, meta, provider, model} """ language = payload.get("language") raw_bytes, filename, source, content_type = await _resolve_audio_bytes(payload) params = {} if language: params["language"] = language async with httpx.AsyncClient(timeout=90) as c: resp = await c.post( f"{MEMORY_SERVICE_URL}/voice/stt", files={"audio": (filename, raw_bytes, "audio/wav")}, params=params, ) resp.raise_for_status() data = resp.json() text = data.get("text", "") model_used = data.get("model", "faster-whisper") lang_detected = data.get("language", language or "") return { "text": text, "segments": [], "language": lang_detected, "meta": { "model": model_used, "provider": "memory_service", "engine": model_used, "service_url": MEMORY_SERVICE_URL, "source": source, "bytes": len(raw_bytes), "filename": filename, "content_type": content_type, }, "provider": "memory_service", "model": model_used, }