- TTS: xtts-v2 integration with voice cloning support
- Document: docling integration for PDF/DOCX/PPTX processing
- Memory Service: added /facts/upsert, /facts/{key}, /facts endpoints
- Added required dependencies (TTS, docling)
146 lines
3.5 KiB
YAML
146 lines
3.5 KiB
YAML
---
|
|
# Swapper Service Deployment для NODE3 (K8s)
|
|
# Threadripper PRO + RTX 3090 24GB - GPU-intensive workloads
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: swapper-service-node3
|
|
namespace: daarion
|
|
labels:
|
|
app: swapper-service
|
|
component: llm-manager
|
|
node: node-3
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: swapper-service
|
|
node: node-3
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: swapper-service
|
|
component: llm-manager
|
|
node: node-3
|
|
spec:
|
|
nodeSelector:
|
|
kubernetes.io/hostname: node3-daarion
|
|
containers:
|
|
- name: swapper
|
|
image: ghcr.io/daarion-dao/swapper-service:latest
|
|
ports:
|
|
- containerPort: 8890
|
|
name: http
|
|
- containerPort: 8891
|
|
name: metrics
|
|
env:
|
|
- name: OLLAMA_HOST
|
|
value: "http://ollama-service:11434"
|
|
- name: SWAPPER_CONFIG_PATH
|
|
value: "/etc/swapper/swapper_config.yaml"
|
|
- name: NODE_ID
|
|
value: "node-3-threadripper-rtx3090"
|
|
- name: GPU_ENABLED
|
|
value: "true"
|
|
volumeMounts:
|
|
- name: swapper-config
|
|
mountPath: /etc/swapper
|
|
resources:
|
|
requests:
|
|
memory: "1Gi"
|
|
cpu: "500m"
|
|
limits:
|
|
memory: "4Gi"
|
|
cpu: "2000m"
|
|
# GPU support for RTX 3090
|
|
resources:
|
|
requests:
|
|
nvidia.com/gpu: 1
|
|
limits:
|
|
nvidia.com/gpu: 1
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: 8890
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: 8890
|
|
initialDelaySeconds: 10
|
|
periodSeconds: 5
|
|
volumes:
|
|
- name: swapper-config
|
|
configMap:
|
|
name: swapper-service-config-node3
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: swapper-service-config-node3
|
|
namespace: daarion
|
|
data:
|
|
swapper_config.yaml: |
|
|
# Swapper Configuration for Node #3 (AI/ML Workstation)
|
|
# Threadripper PRO + RTX 3090 24GB
|
|
swapper:
|
|
mode: single-active
|
|
max_concurrent_models: 1
|
|
model_swap_timeout: 300
|
|
gpu_enabled: true
|
|
metal_acceleration: false # NVIDIA GPU
|
|
default_model: qwen3-8b
|
|
|
|
models:
|
|
# Primary LLM - Qwen3 8B (High Priority)
|
|
qwen3-8b:
|
|
path: ollama:qwen3:8b
|
|
type: llm
|
|
size_gb: 4.87
|
|
priority: high
|
|
description: "Primary LLM for general tasks"
|
|
|
|
# Vision Model - Qwen3-VL 8B (High Priority)
|
|
qwen3-vl-8b:
|
|
path: ollama:qwen3-vl:8b
|
|
type: vision
|
|
size_gb: 5.72
|
|
priority: high
|
|
description: "Vision model for image processing"
|
|
|
|
# Large models for GPU-intensive tasks
|
|
qwen2.5-7b-instruct:
|
|
path: ollama:qwen2.5:7b-instruct
|
|
type: llm
|
|
size_gb: 4.36
|
|
priority: high
|
|
description: "Qwen2.5 7B Instruct"
|
|
|
|
storage:
|
|
models_dir: /app/models
|
|
cache_dir: /app/cache
|
|
swap_dir: /app/swap
|
|
|
|
ollama:
|
|
url: http://ollama-service:11434
|
|
timeout: 300
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: swapper-service-node3
|
|
namespace: daarion
|
|
spec:
|
|
selector:
|
|
app: swapper-service
|
|
node: node-3
|
|
ports:
|
|
- name: http
|
|
port: 8890
|
|
targetPort: 8890
|
|
- name: metrics
|
|
port: 8891
|
|
targetPort: 8891
|
|
type: ClusterIP
|