feat: implement TTS, Document processing, and Memory Service /facts API
- TTS: xtts-v2 integration with voice cloning support
- Document: docling integration for PDF/DOCX/PPTX processing
- Memory Service: added /facts/upsert, /facts/{key}, /facts endpoints
- Added required dependencies (TTS, docling)
This commit is contained in:
145
infrastructure/deployment/swapper-service-node3.yaml
Normal file
145
infrastructure/deployment/swapper-service-node3.yaml
Normal file
@@ -0,0 +1,145 @@
|
||||
---
|
||||
# Swapper Service Deployment для NODE3 (K8s)
|
||||
# Threadripper PRO + RTX 3090 24GB - GPU-intensive workloads
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: swapper-service-node3
|
||||
namespace: daarion
|
||||
labels:
|
||||
app: swapper-service
|
||||
component: llm-manager
|
||||
node: node-3
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: swapper-service
|
||||
node: node-3
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: swapper-service
|
||||
component: llm-manager
|
||||
node: node-3
|
||||
spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: node3-daarion
|
||||
containers:
|
||||
- name: swapper
|
||||
image: ghcr.io/daarion-dao/swapper-service:latest
|
||||
ports:
|
||||
- containerPort: 8890
|
||||
name: http
|
||||
- containerPort: 8891
|
||||
name: metrics
|
||||
env:
|
||||
- name: OLLAMA_HOST
|
||||
value: "http://ollama-service:11434"
|
||||
- name: SWAPPER_CONFIG_PATH
|
||||
value: "/etc/swapper/swapper_config.yaml"
|
||||
- name: NODE_ID
|
||||
value: "node-3-threadripper-rtx3090"
|
||||
- name: GPU_ENABLED
|
||||
value: "true"
|
||||
volumeMounts:
|
||||
- name: swapper-config
|
||||
mountPath: /etc/swapper
|
||||
resources:
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
cpu: "500m"
|
||||
limits:
|
||||
memory: "4Gi"
|
||||
cpu: "2000m"
|
||||
# GPU support for RTX 3090
|
||||
resources:
|
||||
requests:
|
||||
nvidia.com/gpu: 1
|
||||
limits:
|
||||
nvidia.com/gpu: 1
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8890
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8890
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 5
|
||||
volumes:
|
||||
- name: swapper-config
|
||||
configMap:
|
||||
name: swapper-service-config-node3
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: swapper-service-config-node3
|
||||
namespace: daarion
|
||||
data:
|
||||
swapper_config.yaml: |
|
||||
# Swapper Configuration for Node #3 (AI/ML Workstation)
|
||||
# Threadripper PRO + RTX 3090 24GB
|
||||
swapper:
|
||||
mode: single-active
|
||||
max_concurrent_models: 1
|
||||
model_swap_timeout: 300
|
||||
gpu_enabled: true
|
||||
metal_acceleration: false # NVIDIA GPU
|
||||
default_model: qwen3-8b
|
||||
|
||||
models:
|
||||
# Primary LLM - Qwen3 8B (High Priority)
|
||||
qwen3-8b:
|
||||
path: ollama:qwen3:8b
|
||||
type: llm
|
||||
size_gb: 4.87
|
||||
priority: high
|
||||
description: "Primary LLM for general tasks"
|
||||
|
||||
# Vision Model - Qwen3-VL 8B (High Priority)
|
||||
qwen3-vl-8b:
|
||||
path: ollama:qwen3-vl:8b
|
||||
type: vision
|
||||
size_gb: 5.72
|
||||
priority: high
|
||||
description: "Vision model for image processing"
|
||||
|
||||
# Large models for GPU-intensive tasks
|
||||
qwen2.5-7b-instruct:
|
||||
path: ollama:qwen2.5:7b-instruct
|
||||
type: llm
|
||||
size_gb: 4.36
|
||||
priority: high
|
||||
description: "Qwen2.5 7B Instruct"
|
||||
|
||||
storage:
|
||||
models_dir: /app/models
|
||||
cache_dir: /app/cache
|
||||
swap_dir: /app/swap
|
||||
|
||||
ollama:
|
||||
url: http://ollama-service:11434
|
||||
timeout: 300
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: swapper-service-node3
|
||||
namespace: daarion
|
||||
spec:
|
||||
selector:
|
||||
app: swapper-service
|
||||
node: node-3
|
||||
ports:
|
||||
- name: http
|
||||
port: 8890
|
||||
targetPort: 8890
|
||||
- name: metrics
|
||||
port: 8891
|
||||
targetPort: 8891
|
||||
type: ClusterIP
|
||||
Reference in New Issue
Block a user