feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
This commit is contained in:
116
services/mlx-stt-service/main.py
Normal file
116
services/mlx-stt-service/main.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""MLX Whisper STT Service — lightweight HTTP wrapper for mlx-whisper on Apple Silicon.
|
||||
|
||||
Runs natively on host (not in Docker) to access Metal/MPS acceleration.
|
||||
Port: 8200
|
||||
"""
|
||||
import asyncio
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
import uvicorn
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("mlx-stt")
|
||||
|
||||
app = FastAPI(title="MLX Whisper STT", version="1.0.0")
|
||||
|
||||
MODEL = os.getenv("MLX_WHISPER_MODEL", "mlx-community/whisper-large-v3-turbo")
|
||||
MAX_AUDIO_BYTES = int(os.getenv("STT_MAX_AUDIO_BYTES", str(25 * 1024 * 1024)))
|
||||
|
||||
_whisper = None
|
||||
_lock = asyncio.Lock()
|
||||
|
||||
|
||||
def _load_model():
|
||||
global _whisper
|
||||
if _whisper is not None:
|
||||
return
|
||||
logger.info(f"Loading MLX Whisper model: {MODEL}")
|
||||
t0 = time.time()
|
||||
import mlx_whisper
|
||||
_whisper = mlx_whisper
|
||||
_whisper.transcribe("", path_or_hf_repo=MODEL) # warm up / download
|
||||
logger.info(f"MLX Whisper ready in {time.time()-t0:.1f}s")
|
||||
|
||||
|
||||
class TranscribeRequest(BaseModel):
|
||||
audio_b64: str = ""
|
||||
audio_url: str = ""
|
||||
language: Optional[str] = None
|
||||
format: str = Field(default="json", description="text|segments|json")
|
||||
|
||||
|
||||
class TranscribeResponse(BaseModel):
|
||||
text: str = ""
|
||||
segments: list = Field(default_factory=list)
|
||||
language: str = ""
|
||||
meta: dict = Field(default_factory=dict)
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup():
|
||||
_load_model()
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok", "model": MODEL, "ready": _whisper is not None}
|
||||
|
||||
|
||||
@app.post("/transcribe", response_model=TranscribeResponse)
|
||||
async def transcribe(req: TranscribeRequest):
|
||||
if not req.audio_b64 and not req.audio_url:
|
||||
raise HTTPException(400, "audio_b64 or audio_url required")
|
||||
|
||||
if req.audio_b64:
|
||||
raw = base64.b64decode(req.audio_b64)
|
||||
elif req.audio_url.startswith(("file://", "/")):
|
||||
path = req.audio_url.replace("file://", "")
|
||||
with open(path, "rb") as f:
|
||||
raw = f.read()
|
||||
else:
|
||||
import httpx
|
||||
async with httpx.AsyncClient(timeout=30) as c:
|
||||
resp = await c.get(req.audio_url)
|
||||
resp.raise_for_status()
|
||||
raw = resp.content
|
||||
|
||||
if len(raw) > MAX_AUDIO_BYTES:
|
||||
raise HTTPException(413, f"Audio exceeds {MAX_AUDIO_BYTES} bytes")
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
||||
tmp.write(raw)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
async with _lock:
|
||||
t0 = time.time()
|
||||
kwargs = {"path_or_hf_repo": MODEL}
|
||||
if req.language:
|
||||
kwargs["language"] = req.language
|
||||
result = _whisper.transcribe(tmp_path, **kwargs)
|
||||
duration_ms = int((time.time() - t0) * 1000)
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
segments = [
|
||||
{"start": s.get("start", 0), "end": s.get("end", 0), "text": s.get("text", "")}
|
||||
for s in result.get("segments", [])
|
||||
]
|
||||
|
||||
return TranscribeResponse(
|
||||
text=result.get("text", ""),
|
||||
segments=segments,
|
||||
language=result.get("language", ""),
|
||||
meta={"model": MODEL, "duration_ms": duration_ms, "device": "apple_silicon"},
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "8200")))
|
||||
4
services/mlx-stt-service/requirements.txt
Normal file
4
services/mlx-stt-service/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
fastapi>=0.110.0
|
||||
uvicorn>=0.29.0
|
||||
httpx>=0.27.0
|
||||
mlx-whisper>=0.4.0
|
||||
Reference in New Issue
Block a user