"""Ollama vision provider — direct Ollama API with images, no Swapper dependency.""" import logging from typing import Any, Dict, List, Optional import httpx from config import OLLAMA_BASE_URL, DEFAULT_VISION logger = logging.getLogger("provider.ollama_vision") async def infer( images: Optional[List[str]] = None, prompt: str = "", model: str = "", system: str = "", max_tokens: int = 1024, temperature: float = 0.2, timeout_s: float = 60.0, ) -> Dict[str, Any]: model = model or DEFAULT_VISION payload: Dict[str, Any] = { "model": model, "prompt": prompt or "Describe this image.", "stream": False, "options": {"num_predict": max_tokens, "temperature": temperature}, } if images: clean = [] for img in images: if "," in img and img.startswith("data:"): clean.append(img.split(",", 1)[1]) else: clean.append(img) payload["images"] = clean if system: payload["system"] = system async with httpx.AsyncClient(timeout=timeout_s) as c: resp = await c.post(f"{OLLAMA_BASE_URL}/api/generate", json=payload) resp.raise_for_status() data = resp.json() return { "text": data.get("response", ""), "model": model, "provider": "ollama_vision", "eval_count": data.get("eval_count", 0), }