Files
microdao-daarion/infrastructure/deployment/swapper-service-node3.yaml
Apple 5290287058 feat: implement TTS, Document processing, and Memory Service /facts API
- TTS: xtts-v2 integration with voice cloning support
- Document: docling integration for PDF/DOCX/PPTX processing
- Memory Service: added /facts/upsert, /facts/{key}, /facts endpoints
- Added required dependencies (TTS, docling)
2026-01-17 08:16:37 -08:00

146 lines
3.5 KiB
YAML

---
# Swapper Service Deployment для NODE3 (K8s)
# Threadripper PRO + RTX 3090 24GB - GPU-intensive workloads
apiVersion: apps/v1
kind: Deployment
metadata:
name: swapper-service-node3
namespace: daarion
labels:
app: swapper-service
component: llm-manager
node: node-3
spec:
replicas: 1
selector:
matchLabels:
app: swapper-service
node: node-3
template:
metadata:
labels:
app: swapper-service
component: llm-manager
node: node-3
spec:
nodeSelector:
kubernetes.io/hostname: node3-daarion
containers:
- name: swapper
image: ghcr.io/daarion-dao/swapper-service:latest
ports:
- containerPort: 8890
name: http
- containerPort: 8891
name: metrics
env:
- name: OLLAMA_HOST
value: "http://ollama-service:11434"
- name: SWAPPER_CONFIG_PATH
value: "/etc/swapper/swapper_config.yaml"
- name: NODE_ID
value: "node-3-threadripper-rtx3090"
- name: GPU_ENABLED
value: "true"
volumeMounts:
- name: swapper-config
mountPath: /etc/swapper
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "4Gi"
cpu: "2000m"
# GPU support for RTX 3090
resources:
requests:
nvidia.com/gpu: 1
limits:
nvidia.com/gpu: 1
livenessProbe:
httpGet:
path: /health
port: 8890
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 8890
initialDelaySeconds: 10
periodSeconds: 5
volumes:
- name: swapper-config
configMap:
name: swapper-service-config-node3
---
apiVersion: v1
kind: ConfigMap
metadata:
name: swapper-service-config-node3
namespace: daarion
data:
swapper_config.yaml: |
# Swapper Configuration for Node #3 (AI/ML Workstation)
# Threadripper PRO + RTX 3090 24GB
swapper:
mode: single-active
max_concurrent_models: 1
model_swap_timeout: 300
gpu_enabled: true
metal_acceleration: false # NVIDIA GPU
default_model: qwen3-8b
models:
# Primary LLM - Qwen3 8B (High Priority)
qwen3-8b:
path: ollama:qwen3:8b
type: llm
size_gb: 4.87
priority: high
description: "Primary LLM for general tasks"
# Vision Model - Qwen3-VL 8B (High Priority)
qwen3-vl-8b:
path: ollama:qwen3-vl:8b
type: vision
size_gb: 5.72
priority: high
description: "Vision model for image processing"
# Large models for GPU-intensive tasks
qwen2.5-7b-instruct:
path: ollama:qwen2.5:7b-instruct
type: llm
size_gb: 4.36
priority: high
description: "Qwen2.5 7B Instruct"
storage:
models_dir: /app/models
cache_dir: /app/cache
swap_dir: /app/swap
ollama:
url: http://ollama-service:11434
timeout: 300
---
apiVersion: v1
kind: Service
metadata:
name: swapper-service-node3
namespace: daarion
spec:
selector:
app: swapper-service
node: node-3
ports:
- name: http
port: 8890
targetPort: 8890
- name: metrics
port: 8891
targetPort: 8891
type: ClusterIP