feat(fabric): decommission Swapper from critical path, NCS = source of truth
- Node Worker: replace swapper_vision with ollama_vision (direct Ollama API) - Node Worker: add NATS subjects for stt/tts/image (stubs ready) - Node Worker: remove SWAPPER_URL dependency from config - Router: vision calls go directly to Ollama /api/generate with images - Router: local LLM calls go directly to Ollama /api/generate - Router: add OLLAMA_URL and PREFER_NODE_WORKER=true feature flag - Router: /v1/models now uses NCS global capabilities pool - NCS: SWAPPER_URL="" -> skip Swapper probing (status=disabled) - Swapper configs: remove all hardcoded model lists, keep only runtime URLs, timeouts, limits - docker-compose.node1.yml: add OLLAMA_URL, PREFER_NODE_WORKER for router; SWAPPER_URL= for NCS; remove swapper-service from node-worker depends_on - docker-compose.node2-sofiia.yml: same changes for NODA2 Swapper service still runs but is NOT in the critical inference path. Source of truth for models is now NCS -> Ollama /api/tags. Made-with: Cursor
This commit is contained in:
@@ -4,7 +4,6 @@ import os
|
||||
NODE_ID = os.getenv("NODE_ID", "noda2")
|
||||
NATS_URL = os.getenv("NATS_URL", "nats://dagi-nats:4222")
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
|
||||
SWAPPER_URL = os.getenv("SWAPPER_URL", "http://swapper-service:8890")
|
||||
DEFAULT_LLM = os.getenv("NODE_DEFAULT_LLM", "qwen3:14b")
|
||||
DEFAULT_VISION = os.getenv("NODE_DEFAULT_VISION", "llava:13b")
|
||||
MAX_CONCURRENCY = int(os.getenv("NODE_WORKER_MAX_CONCURRENCY", "2"))
|
||||
|
||||
49
services/node-worker/providers/ollama_vision.py
Normal file
49
services/node-worker/providers/ollama_vision.py
Normal file
@@ -0,0 +1,49 @@
|
||||
"""Ollama vision provider — direct Ollama API with images, no Swapper dependency."""
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from config import OLLAMA_BASE_URL, DEFAULT_VISION
|
||||
|
||||
logger = logging.getLogger("provider.ollama_vision")
|
||||
|
||||
|
||||
async def infer(
|
||||
images: Optional[List[str]] = None,
|
||||
prompt: str = "",
|
||||
model: str = "",
|
||||
system: str = "",
|
||||
max_tokens: int = 1024,
|
||||
temperature: float = 0.2,
|
||||
timeout_s: float = 60.0,
|
||||
) -> Dict[str, Any]:
|
||||
model = model or DEFAULT_VISION
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": model,
|
||||
"prompt": prompt or "Describe this image.",
|
||||
"stream": False,
|
||||
"options": {"num_predict": max_tokens, "temperature": temperature},
|
||||
}
|
||||
if images:
|
||||
clean = []
|
||||
for img in images:
|
||||
if "," in img and img.startswith("data:"):
|
||||
clean.append(img.split(",", 1)[1])
|
||||
else:
|
||||
clean.append(img)
|
||||
payload["images"] = clean
|
||||
if system:
|
||||
payload["system"] = system
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout_s) as c:
|
||||
resp = await c.post(f"{OLLAMA_BASE_URL}/api/generate", json=payload)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return {
|
||||
"text": data.get("response", ""),
|
||||
"model": model,
|
||||
"provider": "ollama_vision",
|
||||
"eval_count": data.get("eval_count", 0),
|
||||
}
|
||||
@@ -9,7 +9,7 @@ from typing import Any, Dict
|
||||
import config
|
||||
from models import JobRequest, JobResponse, JobError
|
||||
from idempotency import IdempotencyStore
|
||||
from providers import ollama, swapper_vision
|
||||
from providers import ollama, ollama_vision
|
||||
import fabric_metrics as fm
|
||||
|
||||
logger = logging.getLogger("node-worker")
|
||||
@@ -27,9 +27,13 @@ async def start(nats_client):
|
||||
global _nats_client
|
||||
_nats_client = nats_client
|
||||
|
||||
nid = config.NODE_ID.lower()
|
||||
subjects = [
|
||||
f"node.{config.NODE_ID.lower()}.llm.request",
|
||||
f"node.{config.NODE_ID.lower()}.vision.request",
|
||||
f"node.{nid}.llm.request",
|
||||
f"node.{nid}.vision.request",
|
||||
f"node.{nid}.stt.request",
|
||||
f"node.{nid}.tts.request",
|
||||
f"node.{nid}.image.request",
|
||||
]
|
||||
for subj in subjects:
|
||||
await nats_client.subscribe(subj, cb=_handle_request)
|
||||
@@ -160,7 +164,7 @@ async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse:
|
||||
)
|
||||
elif job.required_type == "vision":
|
||||
result = await asyncio.wait_for(
|
||||
swapper_vision.infer(
|
||||
ollama_vision.infer(
|
||||
images=payload.get("images"),
|
||||
prompt=payload.get("prompt", ""),
|
||||
model=model,
|
||||
@@ -171,11 +175,20 @@ async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse:
|
||||
),
|
||||
timeout=timeout_s,
|
||||
)
|
||||
elif job.required_type in ("stt", "tts", "image"):
|
||||
return JobResponse(
|
||||
job_id=job.job_id, trace_id=job.trace_id, node_id=config.NODE_ID,
|
||||
status="error",
|
||||
error=JobError(
|
||||
code="NOT_YET_IMPLEMENTED",
|
||||
message=f"{job.required_type} adapter coming soon; use direct runtime API for now",
|
||||
),
|
||||
)
|
||||
else:
|
||||
return JobResponse(
|
||||
job_id=job.job_id, trace_id=job.trace_id, node_id=config.NODE_ID,
|
||||
status="error",
|
||||
error=JobError(code="UNSUPPORTED_TYPE", message=f"{job.required_type} not implemented"),
|
||||
error=JobError(code="UNSUPPORTED_TYPE", message=f"{job.required_type} not supported"),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
|
||||
Reference in New Issue
Block a user