- Node Worker: replace swapper_vision with ollama_vision (direct Ollama API) - Node Worker: add NATS subjects for stt/tts/image (stubs ready) - Node Worker: remove SWAPPER_URL dependency from config - Router: vision calls go directly to Ollama /api/generate with images - Router: local LLM calls go directly to Ollama /api/generate - Router: add OLLAMA_URL and PREFER_NODE_WORKER=true feature flag - Router: /v1/models now uses NCS global capabilities pool - NCS: SWAPPER_URL="" -> skip Swapper probing (status=disabled) - Swapper configs: remove all hardcoded model lists, keep only runtime URLs, timeouts, limits - docker-compose.node1.yml: add OLLAMA_URL, PREFER_NODE_WORKER for router; SWAPPER_URL= for NCS; remove swapper-service from node-worker depends_on - docker-compose.node2-sofiia.yml: same changes for NODA2 Swapper service still runs but is NOT in the critical inference path. Source of truth for models is now NCS -> Ollama /api/tags. Made-with: Cursor
50 lines
1.4 KiB
Python
50 lines
1.4 KiB
Python
"""Ollama vision provider — direct Ollama API with images, no Swapper dependency."""
|
|
import logging
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
import httpx
|
|
|
|
from config import OLLAMA_BASE_URL, DEFAULT_VISION
|
|
|
|
logger = logging.getLogger("provider.ollama_vision")
|
|
|
|
|
|
async def infer(
|
|
images: Optional[List[str]] = None,
|
|
prompt: str = "",
|
|
model: str = "",
|
|
system: str = "",
|
|
max_tokens: int = 1024,
|
|
temperature: float = 0.2,
|
|
timeout_s: float = 60.0,
|
|
) -> Dict[str, Any]:
|
|
model = model or DEFAULT_VISION
|
|
|
|
payload: Dict[str, Any] = {
|
|
"model": model,
|
|
"prompt": prompt or "Describe this image.",
|
|
"stream": False,
|
|
"options": {"num_predict": max_tokens, "temperature": temperature},
|
|
}
|
|
if images:
|
|
clean = []
|
|
for img in images:
|
|
if "," in img and img.startswith("data:"):
|
|
clean.append(img.split(",", 1)[1])
|
|
else:
|
|
clean.append(img)
|
|
payload["images"] = clean
|
|
if system:
|
|
payload["system"] = system
|
|
|
|
async with httpx.AsyncClient(timeout=timeout_s) as c:
|
|
resp = await c.post(f"{OLLAMA_BASE_URL}/api/generate", json=payload)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
return {
|
|
"text": data.get("response", ""),
|
|
"model": model,
|
|
"provider": "ollama_vision",
|
|
"eval_count": data.get("eval_count", 0),
|
|
}
|