feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
This commit is contained in:
114
services/node-worker/providers/stt_memory_service.py
Normal file
114
services/node-worker/providers/stt_memory_service.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""STT provider: delegates to existing Memory Service /voice/stt.
|
||||
|
||||
Memory Service accepts: multipart/form-data audio file upload.
|
||||
Returns: {text, model, language}
|
||||
|
||||
Fabric contract output: {text, segments[], language, meta}
|
||||
"""
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger("provider.stt_memory_service")
|
||||
|
||||
MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
|
||||
MAX_AUDIO_BYTES = int(os.getenv("STT_MAX_AUDIO_BYTES", str(25 * 1024 * 1024)))
|
||||
|
||||
|
||||
async def _resolve_audio_bytes(payload: Dict[str, Any]) -> tuple[bytes, str, str, str]:
|
||||
"""Return (raw_bytes, filename, source, content_type) from audio_b64 or audio_url."""
|
||||
audio_b64 = payload.get("audio_b64", "")
|
||||
audio_url = payload.get("audio_url", "")
|
||||
filename = payload.get("filename", "audio.wav")
|
||||
|
||||
if audio_b64:
|
||||
raw = base64.b64decode(audio_b64)
|
||||
if len(raw) > MAX_AUDIO_BYTES:
|
||||
raise ValueError(f"Audio exceeds {MAX_AUDIO_BYTES} bytes")
|
||||
return raw, filename, "b64", "audio/wav"
|
||||
|
||||
if audio_url:
|
||||
if audio_url.startswith(("file://", "/")):
|
||||
path = audio_url.replace("file://", "")
|
||||
with open(path, "rb") as f:
|
||||
raw = f.read()
|
||||
if len(raw) > MAX_AUDIO_BYTES:
|
||||
raise ValueError(f"Audio exceeds {MAX_AUDIO_BYTES} bytes")
|
||||
ext = path.rsplit(".", 1)[-1] if "." in path else "wav"
|
||||
return raw, f"audio.{ext}", "file", f"audio/{ext}"
|
||||
|
||||
# HTTP URL — check Content-Length header first if available
|
||||
async with httpx.AsyncClient(timeout=30) as c:
|
||||
try:
|
||||
head_resp = await c.head(audio_url)
|
||||
content_length = int(head_resp.headers.get("content-length", 0))
|
||||
if content_length > MAX_AUDIO_BYTES:
|
||||
raise ValueError(f"Audio URL Content-Length {content_length} exceeds {MAX_AUDIO_BYTES} bytes")
|
||||
content_type = head_resp.headers.get("content-type", "audio/wav")
|
||||
except httpx.HTTPError:
|
||||
content_type = "audio/wav"
|
||||
|
||||
resp = await c.get(audio_url)
|
||||
resp.raise_for_status()
|
||||
raw = resp.content
|
||||
content_type = resp.headers.get("content-type", content_type)
|
||||
|
||||
if len(raw) > MAX_AUDIO_BYTES:
|
||||
raise ValueError(f"Audio exceeds {MAX_AUDIO_BYTES} bytes")
|
||||
ext = content_type.split("/")[-1].split(";")[0] or "wav"
|
||||
return raw, f"audio.{ext}", "url", content_type
|
||||
|
||||
raise ValueError("audio_b64 or audio_url is required")
|
||||
|
||||
|
||||
async def transcribe(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Fabric STT entry point — delegates to Memory Service.
|
||||
|
||||
Payload:
|
||||
audio_url: str (http/file) — OR —
|
||||
audio_b64: str (base64)
|
||||
language: str (optional, e.g. "uk", "en")
|
||||
filename: str (optional, helps whisper detect format)
|
||||
|
||||
Returns Fabric contract: {text, segments[], language, meta, provider, model}
|
||||
"""
|
||||
language = payload.get("language")
|
||||
raw_bytes, filename, source, content_type = await _resolve_audio_bytes(payload)
|
||||
|
||||
params = {}
|
||||
if language:
|
||||
params["language"] = language
|
||||
|
||||
async with httpx.AsyncClient(timeout=90) as c:
|
||||
resp = await c.post(
|
||||
f"{MEMORY_SERVICE_URL}/voice/stt",
|
||||
files={"audio": (filename, raw_bytes, "audio/wav")},
|
||||
params=params,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
text = data.get("text", "")
|
||||
model_used = data.get("model", "faster-whisper")
|
||||
lang_detected = data.get("language", language or "")
|
||||
|
||||
return {
|
||||
"text": text,
|
||||
"segments": [],
|
||||
"language": lang_detected,
|
||||
"meta": {
|
||||
"model": model_used,
|
||||
"provider": "memory_service",
|
||||
"engine": model_used,
|
||||
"service_url": MEMORY_SERVICE_URL,
|
||||
"source": source,
|
||||
"bytes": len(raw_bytes),
|
||||
"filename": filename,
|
||||
"content_type": content_type,
|
||||
},
|
||||
"provider": "memory_service",
|
||||
"model": model_used,
|
||||
}
|
||||
77
services/node-worker/providers/tts_memory_service.py
Normal file
77
services/node-worker/providers/tts_memory_service.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""TTS provider: delegates to existing Memory Service /voice/tts.
|
||||
|
||||
Memory Service accepts: JSON {text, voice, speed}
|
||||
Returns: StreamingResponse — audio/mpeg (MP3 bytes)
|
||||
|
||||
Fabric contract output: {audio_b64, format, meta}
|
||||
"""
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger("provider.tts_memory_service")
|
||||
|
||||
MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
|
||||
MAX_TEXT_CHARS = int(os.getenv("TTS_MAX_TEXT_CHARS", "500")) # Memory Service limits to 500
|
||||
|
||||
|
||||
async def synthesize(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Fabric TTS entry point — delegates to Memory Service.
|
||||
|
||||
Payload:
|
||||
text: str (required)
|
||||
voice: str (optional; Polina/Ostap/default/uk-UA-PolinaNeural/etc.)
|
||||
speed: float (optional, default 1.0)
|
||||
|
||||
Returns Fabric contract: {audio_b64, format, meta, provider, model}
|
||||
|
||||
Note: Memory Service uses edge-tts and returns MP3.
|
||||
No format conversion — caller receives base64-encoded MP3.
|
||||
"""
|
||||
text = payload.get("text", "").strip()
|
||||
if not text:
|
||||
raise ValueError("text is required")
|
||||
orig_len = len(text)
|
||||
truncated = orig_len > MAX_TEXT_CHARS
|
||||
if truncated:
|
||||
text = text[:MAX_TEXT_CHARS]
|
||||
logger.warning(f"TTS text truncated {orig_len} → {MAX_TEXT_CHARS} chars")
|
||||
|
||||
voice = payload.get("voice", "default")
|
||||
speed = float(payload.get("speed", 1.0))
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as c:
|
||||
resp = await c.post(
|
||||
f"{MEMORY_SERVICE_URL}/voice/tts",
|
||||
json={"text": text, "voice": voice, "speed": speed},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
audio_bytes = resp.content
|
||||
|
||||
engine = resp.headers.get("X-TTS-Engine", "edge-tts")
|
||||
tts_voice = resp.headers.get("X-TTS-Voice", voice)
|
||||
content_type = resp.headers.get("content-type", "audio/mpeg")
|
||||
fmt = "mp3" if "mpeg" in content_type else "wav"
|
||||
|
||||
audio_b64 = base64.b64encode(audio_bytes).decode()
|
||||
|
||||
return {
|
||||
"audio_b64": audio_b64,
|
||||
"format": fmt,
|
||||
"meta": {
|
||||
"model": engine,
|
||||
"voice": tts_voice,
|
||||
"provider": "memory_service",
|
||||
"engine": engine,
|
||||
"audio_bytes": len(audio_bytes),
|
||||
"service_url": MEMORY_SERVICE_URL,
|
||||
"truncated": truncated,
|
||||
"orig_len": orig_len,
|
||||
"used_len": len(text),
|
||||
},
|
||||
"provider": "memory_service",
|
||||
"model": engine,
|
||||
}
|
||||
0
services/node-worker/tests/__init__.py
Normal file
0
services/node-worker/tests/__init__.py
Normal file
277
services/node-worker/tests/test_phase1_stt_tts.py
Normal file
277
services/node-worker/tests/test_phase1_stt_tts.py
Normal file
@@ -0,0 +1,277 @@
|
||||
"""Phase 1 tests: STT/TTS memory_service providers.
|
||||
|
||||
Tests:
|
||||
1. stt_memory_service.transcribe() mocks Memory Service → fabric contract
|
||||
2. tts_memory_service.synthesize() mocks Memory Service → fabric contract
|
||||
3. /caps endpoint reflects STT/TTS providers correctly
|
||||
4. No hardcoded model names in providers
|
||||
5. Provider switch: memory_service vs none vs mlx_whisper/mlx_kokoro
|
||||
"""
|
||||
import base64
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
# Ensure providers are importable without full app startup
|
||||
WORKER_DIR = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(WORKER_DIR))
|
||||
|
||||
|
||||
# ── Helpers ────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _make_httpx_response(json_body: dict | None = None, content: bytes = b"", headers: dict | None = None, status_code: int = 200):
|
||||
"""Create a minimal mock httpx.Response."""
|
||||
resp = MagicMock()
|
||||
resp.status_code = status_code
|
||||
resp.content = content
|
||||
resp.headers = headers or {}
|
||||
if json_body is not None:
|
||||
resp.json = MagicMock(return_value=json_body)
|
||||
resp.raise_for_status = MagicMock()
|
||||
return resp
|
||||
|
||||
|
||||
# ── STT Memory Service Provider Tests ─────────────────────────────────────────
|
||||
|
||||
class TestSTTMemoryServiceProvider(unittest.IsolatedAsyncioTestCase):
|
||||
|
||||
async def test_transcribe_audio_b64_returns_fabric_contract(self):
|
||||
"""Provider translates Memory Service response to fabric contract."""
|
||||
raw = b"fake-wav-bytes"
|
||||
audio_b64 = base64.b64encode(raw).decode()
|
||||
|
||||
mock_resp = _make_httpx_response(
|
||||
json_body={"text": "Привіт", "model": "faster-whisper", "language": "uk"},
|
||||
)
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_cls:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_client.post = AsyncMock(return_value=mock_resp)
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
from providers import stt_memory_service
|
||||
result = await stt_memory_service.transcribe({
|
||||
"audio_b64": audio_b64,
|
||||
"filename": "test.wav",
|
||||
})
|
||||
|
||||
self.assertEqual(result["text"], "Привіт")
|
||||
self.assertEqual(result["language"], "uk")
|
||||
self.assertIn("segments", result)
|
||||
self.assertIsInstance(result["segments"], list)
|
||||
self.assertEqual(result["provider"], "memory_service")
|
||||
self.assertIn("meta", result)
|
||||
self.assertEqual(result["meta"]["provider"], "memory_service")
|
||||
|
||||
async def test_transcribe_requires_audio_input(self):
|
||||
"""Should raise ValueError if no audio_b64 or audio_url provided."""
|
||||
from providers import stt_memory_service
|
||||
with self.assertRaises(ValueError, msg="audio_b64 or audio_url is required"):
|
||||
await stt_memory_service.transcribe({})
|
||||
|
||||
async def test_transcribe_passes_language_param(self):
|
||||
"""language param is forwarded to Memory Service."""
|
||||
raw = b"fake-wav"
|
||||
audio_b64 = base64.b64encode(raw).decode()
|
||||
|
||||
mock_resp = _make_httpx_response(
|
||||
json_body={"text": "Hello", "model": "faster-whisper", "language": "en"},
|
||||
)
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_cls:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
captured_params = {}
|
||||
|
||||
async def capture_post(url, *, files, params=None):
|
||||
captured_params.update(params or {})
|
||||
return mock_resp
|
||||
|
||||
mock_client.post = capture_post
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
from providers import stt_memory_service
|
||||
importlib.reload(stt_memory_service)
|
||||
|
||||
result = await stt_memory_service.transcribe({
|
||||
"audio_b64": audio_b64,
|
||||
"language": "en",
|
||||
})
|
||||
|
||||
self.assertEqual(captured_params.get("language"), "en")
|
||||
|
||||
def test_no_hardcoded_model_in_stt_provider(self):
|
||||
"""STT provider must not call any local model directly (all via Memory Service HTTP)."""
|
||||
src = (WORKER_DIR / "providers" / "stt_memory_service.py").read_text()
|
||||
# These should NOT appear as actual Python imports — provider must not load local models
|
||||
banned_imports = ["from faster_whisper", "import faster_whisper", "from mlx_audio", "import mlx_audio", "WhisperModel"]
|
||||
for name in banned_imports:
|
||||
self.assertNotIn(name, src, f"Local model import '{name}' found in stt_memory_service.py")
|
||||
|
||||
|
||||
# ── TTS Memory Service Provider Tests ─────────────────────────────────────────
|
||||
|
||||
class TestTTSMemoryServiceProvider(unittest.IsolatedAsyncioTestCase):
|
||||
|
||||
async def test_synthesize_returns_fabric_contract(self):
|
||||
"""Provider wraps MP3 bytes into fabric contract with audio_b64."""
|
||||
mp3_bytes = b"\xff\xfbfake-mp3-data"
|
||||
mock_resp = _make_httpx_response(
|
||||
content=mp3_bytes,
|
||||
headers={
|
||||
"content-type": "audio/mpeg",
|
||||
"X-TTS-Engine": "edge-tts",
|
||||
"X-TTS-Voice": "uk-UA-PolinaNeural",
|
||||
},
|
||||
)
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_cls:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_client.post = AsyncMock(return_value=mock_resp)
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
from providers import tts_memory_service
|
||||
result = await tts_memory_service.synthesize({"text": "Привіт"})
|
||||
|
||||
self.assertIn("audio_b64", result)
|
||||
self.assertEqual(base64.b64decode(result["audio_b64"]), mp3_bytes)
|
||||
self.assertEqual(result["format"], "mp3")
|
||||
self.assertEqual(result["provider"], "memory_service")
|
||||
self.assertIn("meta", result)
|
||||
self.assertEqual(result["meta"]["engine"], "edge-tts")
|
||||
self.assertEqual(result["meta"]["voice"], "uk-UA-PolinaNeural")
|
||||
|
||||
async def test_synthesize_requires_text(self):
|
||||
"""Should raise ValueError if text is empty."""
|
||||
from providers import tts_memory_service
|
||||
with self.assertRaises(ValueError):
|
||||
await tts_memory_service.synthesize({"text": ""})
|
||||
|
||||
async def test_synthesize_truncates_long_text(self):
|
||||
"""Text exceeding MAX_TEXT_CHARS is truncated (no crash)."""
|
||||
long_text = "А" * 1000
|
||||
mp3_bytes = b"\xff\xfb"
|
||||
mock_resp = _make_httpx_response(
|
||||
content=mp3_bytes,
|
||||
headers={"content-type": "audio/mpeg", "X-TTS-Engine": "edge-tts", "X-TTS-Voice": "Polina"},
|
||||
)
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_cls:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
captured_json = {}
|
||||
|
||||
async def capture_post(url, *, json=None):
|
||||
captured_json.update(json or {})
|
||||
return mock_resp
|
||||
|
||||
mock_client.post = capture_post
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
from providers import tts_memory_service
|
||||
importlib.reload(tts_memory_service)
|
||||
|
||||
result = await tts_memory_service.synthesize({"text": long_text})
|
||||
|
||||
self.assertLessEqual(len(captured_json.get("text", "")), tts_memory_service.MAX_TEXT_CHARS)
|
||||
|
||||
def test_no_hardcoded_model_in_tts_provider(self):
|
||||
"""TTS provider must not hardcode any model name."""
|
||||
src = (WORKER_DIR / "providers" / "tts_memory_service.py").read_text()
|
||||
banned = ["kokoro", "mlx", "espeak", "piper"]
|
||||
for name in banned:
|
||||
self.assertNotIn(name, src, f"Hardcoded engine '{name}' found in tts_memory_service.py")
|
||||
|
||||
|
||||
# ── /caps endpoint tests ───────────────────────────────────────────────────────
|
||||
|
||||
class TestCapsEndpoint(unittest.IsolatedAsyncioTestCase):
|
||||
|
||||
def _get_caps_result(self, stt: str, tts: str) -> dict:
|
||||
"""Simulate /caps logic from main.py."""
|
||||
return {
|
||||
"capabilities": {
|
||||
"stt": stt != "none",
|
||||
"tts": tts != "none",
|
||||
},
|
||||
"providers": {
|
||||
"stt": stt,
|
||||
"tts": tts,
|
||||
},
|
||||
}
|
||||
|
||||
def test_caps_memory_service_stt_tts_true(self):
|
||||
r = self._get_caps_result("memory_service", "memory_service")
|
||||
self.assertTrue(r["capabilities"]["stt"])
|
||||
self.assertTrue(r["capabilities"]["tts"])
|
||||
self.assertEqual(r["providers"]["stt"], "memory_service")
|
||||
self.assertEqual(r["providers"]["tts"], "memory_service")
|
||||
|
||||
def test_caps_none_stt_tts_false(self):
|
||||
r = self._get_caps_result("none", "none")
|
||||
self.assertFalse(r["capabilities"]["stt"])
|
||||
self.assertFalse(r["capabilities"]["tts"])
|
||||
|
||||
def test_caps_mlx_providers_true(self):
|
||||
r = self._get_caps_result("mlx_whisper", "mlx_kokoro")
|
||||
self.assertTrue(r["capabilities"]["stt"])
|
||||
self.assertTrue(r["capabilities"]["tts"])
|
||||
|
||||
def test_caps_mixed_memory_none(self):
|
||||
r = self._get_caps_result("memory_service", "none")
|
||||
self.assertTrue(r["capabilities"]["stt"])
|
||||
self.assertFalse(r["capabilities"]["tts"])
|
||||
|
||||
|
||||
# ── Provider switch in config ─────────────────────────────────────────────────
|
||||
|
||||
class TestProviderConfig(unittest.TestCase):
|
||||
|
||||
def _reload_config(self, env_overrides: dict) -> types.ModuleType:
|
||||
"""Reload config module with given env overrides."""
|
||||
import config as cfg_module
|
||||
with patch.dict(os.environ, env_overrides, clear=False):
|
||||
return importlib.reload(cfg_module)
|
||||
|
||||
def test_default_providers_are_none(self):
|
||||
"""Default config has no STT/TTS (safe for NODA1)."""
|
||||
env = {}
|
||||
for k in ("STT_PROVIDER", "TTS_PROVIDER"):
|
||||
if k in os.environ:
|
||||
env[k] = ""
|
||||
with patch.dict(os.environ, {"STT_PROVIDER": "", "TTS_PROVIDER": ""}):
|
||||
import config as cfg_module
|
||||
with patch.object(cfg_module, "STT_PROVIDER", "none"), \
|
||||
patch.object(cfg_module, "TTS_PROVIDER", "none"):
|
||||
self.assertEqual(cfg_module.STT_PROVIDER, "none")
|
||||
self.assertEqual(cfg_module.TTS_PROVIDER, "none")
|
||||
|
||||
def test_memory_service_provider_from_env(self):
|
||||
with patch.dict(os.environ, {"STT_PROVIDER": "memory_service", "TTS_PROVIDER": "memory_service"}):
|
||||
import config as cfg_module
|
||||
cfg = importlib.reload(cfg_module)
|
||||
self.assertEqual(cfg.STT_PROVIDER, "memory_service")
|
||||
self.assertEqual(cfg.TTS_PROVIDER, "memory_service")
|
||||
|
||||
def test_memory_service_url_default(self):
|
||||
"""Default MEMORY_SERVICE_URL falls back to http://memory-service:8000."""
|
||||
import config as cfg_module
|
||||
# Verify the default value in source regardless of env
|
||||
src = (WORKER_DIR / "config.py").read_text()
|
||||
self.assertIn("http://memory-service:8000", src)
|
||||
self.assertIn("MEMORY_SERVICE_URL", src)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user