New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
110 lines
2.9 KiB
Python
110 lines
2.9 KiB
Python
"""Kokoro TTS Service — lightweight HTTP wrapper for kokoro on Apple Silicon.
|
|
|
|
Runs natively on host (not in Docker) to access Metal/MPS acceleration.
|
|
Port: 8201
|
|
"""
|
|
import asyncio
|
|
import base64
|
|
import io
|
|
import logging
|
|
import os
|
|
import time
|
|
from typing import Optional
|
|
|
|
from fastapi import FastAPI, HTTPException
|
|
from pydantic import BaseModel, Field
|
|
import uvicorn
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger("mlx-tts")
|
|
|
|
app = FastAPI(title="Kokoro TTS", version="1.0.0")
|
|
|
|
DEFAULT_VOICE = os.getenv("TTS_DEFAULT_VOICE", "af_heart")
|
|
MAX_TEXT_CHARS = int(os.getenv("TTS_MAX_TEXT_CHARS", "5000"))
|
|
DEFAULT_SAMPLE_RATE = int(os.getenv("TTS_SAMPLE_RATE", "24000"))
|
|
|
|
_pipeline = None
|
|
_lock = asyncio.Lock()
|
|
|
|
|
|
def _load_pipeline():
|
|
global _pipeline
|
|
if _pipeline is not None:
|
|
return
|
|
logger.info("Loading Kokoro pipeline...")
|
|
t0 = time.time()
|
|
from kokoro import KPipeline
|
|
_pipeline = KPipeline(lang_code="a")
|
|
logger.info(f"Kokoro ready in {time.time()-t0:.1f}s")
|
|
|
|
|
|
class SynthesizeRequest(BaseModel):
|
|
text: str
|
|
voice: str = Field(default="af_heart")
|
|
format: str = Field(default="wav", description="wav|mp3")
|
|
sample_rate: int = Field(default=24000)
|
|
|
|
|
|
class SynthesizeResponse(BaseModel):
|
|
audio_b64: str = ""
|
|
format: str = "wav"
|
|
meta: dict = Field(default_factory=dict)
|
|
|
|
|
|
@app.on_event("startup")
|
|
async def startup():
|
|
_load_pipeline()
|
|
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
return {"status": "ok", "model": "kokoro-v1.0", "ready": _pipeline is not None}
|
|
|
|
|
|
@app.post("/synthesize", response_model=SynthesizeResponse)
|
|
async def synthesize(req: SynthesizeRequest):
|
|
if not req.text:
|
|
raise HTTPException(400, "text is required")
|
|
if len(req.text) > MAX_TEXT_CHARS:
|
|
raise HTTPException(413, f"Text exceeds {MAX_TEXT_CHARS} chars")
|
|
|
|
voice = req.voice or DEFAULT_VOICE
|
|
sample_rate = req.sample_rate or DEFAULT_SAMPLE_RATE
|
|
|
|
async with _lock:
|
|
t0 = time.time()
|
|
import numpy as np
|
|
import soundfile as sf
|
|
|
|
all_audio = []
|
|
for _, _, audio in _pipeline(req.text, voice=voice):
|
|
all_audio.append(audio)
|
|
|
|
if not all_audio:
|
|
raise HTTPException(500, "Kokoro produced no audio")
|
|
|
|
combined = np.concatenate(all_audio)
|
|
buf = io.BytesIO()
|
|
sf.write(buf, combined, sample_rate, format="WAV")
|
|
wav_bytes = buf.getvalue()
|
|
duration_ms = int((time.time() - t0) * 1000)
|
|
|
|
audio_b64 = base64.b64encode(wav_bytes).decode()
|
|
|
|
return SynthesizeResponse(
|
|
audio_b64=audio_b64,
|
|
format="wav",
|
|
meta={
|
|
"model": "kokoro-v1.0",
|
|
"voice": voice,
|
|
"duration_ms": duration_ms,
|
|
"audio_bytes": len(wav_bytes),
|
|
"device": "apple_silicon",
|
|
},
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "8201")))
|