microdao-daarion/services/node-worker/providers/ollama.py

"""Ollama LLM provider for node-worker."""
import logging
from typing import Any, Dict, List, Optional

import httpx

from config import OLLAMA_BASE_URL, DEFAULT_LLM

logger = logging.getLogger("provider.ollama")


async def infer(
    messages: Optional[List[Dict[str, str]]] = None,
    prompt: str = "",
    model: str = "",
    system: str = "",
    max_tokens: int = 2048,
    temperature: float = 0.2,
    timeout_s: float = 25.0,
) -> Dict[str, Any]:
    model = model or DEFAULT_LLM

    if messages:
        return await _chat(messages, model, max_tokens, temperature, timeout_s)
    return await _generate(prompt, system, model, max_tokens, temperature, timeout_s)


async def _chat(
    messages: List[Dict[str, str]],
    model: str,
    max_tokens: int,
    temperature: float,
    timeout_s: float,
) -> Dict[str, Any]:
    async with httpx.AsyncClient(timeout=timeout_s) as c:
        resp = await c.post(
            f"{OLLAMA_BASE_URL}/api/chat",
            json={
                "model": model,
                "messages": messages,
                "stream": False,
                "options": {"num_predict": max_tokens, "temperature": temperature},
            },
        )
        resp.raise_for_status()
        data = resp.json()
        return {
            "text": data.get("message", {}).get("content", ""),
            "model": model,
            "provider": "ollama",
            "eval_count": data.get("eval_count", 0),
        }


async def _generate(
    prompt: str,
    system: str,
    model: str,
    max_tokens: int,
    temperature: float,
    timeout_s: float,
) -> Dict[str, Any]:
    async with httpx.AsyncClient(timeout=timeout_s) as c:
        resp = await c.post(
            f"{OLLAMA_BASE_URL}/api/generate",
            json={
                "model": model,
                "prompt": prompt,
                "system": system,
                "stream": False,
                "options": {"num_predict": max_tokens, "temperature": temperature},
            },
        )
        resp.raise_for_status()
        data = resp.json()
        return {
            "text": data.get("response", ""),
            "model": model,
            "provider": "ollama",
            "eval_count": data.get("eval_count", 0),
        }