"""Ollama LLM provider for node-worker.""" import logging from typing import Any, Dict, List, Optional import httpx from config import OLLAMA_BASE_URL, DEFAULT_LLM logger = logging.getLogger("provider.ollama") async def infer( messages: Optional[List[Dict[str, str]]] = None, prompt: str = "", model: str = "", system: str = "", max_tokens: int = 2048, temperature: float = 0.2, timeout_s: float = 25.0, ) -> Dict[str, Any]: model = model or DEFAULT_LLM if messages: return await _chat(messages, model, max_tokens, temperature, timeout_s) return await _generate(prompt, system, model, max_tokens, temperature, timeout_s) async def _chat( messages: List[Dict[str, str]], model: str, max_tokens: int, temperature: float, timeout_s: float, ) -> Dict[str, Any]: async with httpx.AsyncClient(timeout=timeout_s) as c: resp = await c.post( f"{OLLAMA_BASE_URL}/api/chat", json={ "model": model, "messages": messages, "stream": False, "options": {"num_predict": max_tokens, "temperature": temperature}, }, ) resp.raise_for_status() data = resp.json() return { "text": data.get("message", {}).get("content", ""), "model": model, "provider": "ollama", "eval_count": data.get("eval_count", 0), } async def _generate( prompt: str, system: str, model: str, max_tokens: int, temperature: float, timeout_s: float, ) -> Dict[str, Any]: async with httpx.AsyncClient(timeout=timeout_s) as c: resp = await c.post( f"{OLLAMA_BASE_URL}/api/generate", json={ "model": model, "prompt": prompt, "system": system, "stream": False, "options": {"num_predict": max_tokens, "temperature": temperature}, }, ) resp.raise_for_status() data = resp.json() return { "text": data.get("response", ""), "model": model, "provider": "ollama", "eval_count": data.get("eval_count", 0), }