feat: implement TTS, Document processing, and Memory Service /facts API

- TTS: xtts-v2 integration with voice cloning support
- Document: docling integration for PDF/DOCX/PPTX processing
- Memory Service: added /facts/upsert, /facts/{key}, /facts endpoints
- Added required dependencies (TTS, docling)
This commit is contained in:
Apple
2026-01-17 08:16:37 -08:00
parent a9fcadc6e2
commit 5290287058
121 changed files with 17071 additions and 436 deletions

View File

@@ -0,0 +1,113 @@
---
# DAGI Router Deployment для NODE3 (K8s)
apiVersion: apps/v1
kind: Deployment
metadata:
name: dagi-router
namespace: daarion
labels:
app: dagi-router
component: router
node: node-3
spec:
replicas: 1
selector:
matchLabels:
app: dagi-router
node: node-3
template:
metadata:
labels:
app: dagi-router
component: router
node: node-3
spec:
nodeSelector:
kubernetes.io/hostname: node3-daarion
containers:
- name: router
image: ghcr.io/daarion-dao/dagi-router:latest
ports:
- containerPort: 9102
name: http
env:
- name: NATS_URL
value: "nats://nats-client.nats:4222"
- name: ROUTER_CONFIG_PATH
value: "/etc/router/router_config.yaml"
- name: LOG_LEVEL
value: "info"
- name: NODE_ID
value: "node-3-threadripper-rtx3090"
volumeMounts:
- name: router-config
mountPath: /etc/router
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 9102
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 9102
initialDelaySeconds: 10
periodSeconds: 5
volumes:
- name: router-config
configMap:
name: dagi-router-config-node3
---
apiVersion: v1
kind: ConfigMap
metadata:
name: dagi-router-config-node3
namespace: daarion
data:
router_config.yaml: |
routing:
target_subject: "router.invoke.agent"
nats_url: "nats://nats-client.nats:4222"
services:
memory_service: "http://memory-service.daarion:8000"
swapper_service: "http://swapper-service.daarion:8890"
node_id: "node-3-threadripper-rtx3090"
---
apiVersion: v1
kind: Service
metadata:
name: dagi-router-node3
namespace: daarion
spec:
selector:
app: dagi-router
node: node-3
ports:
- name: http
port: 9102
targetPort: 9102
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
name: dagi-router-node3-external
namespace: daarion
spec:
selector:
app: dagi-router
node: node-3
ports:
- name: http
port: 9102
targetPort: 9102
nodePort: 30103
type: NodePort

View File

@@ -0,0 +1,145 @@
---
# Swapper Service Deployment для NODE3 (K8s)
# Threadripper PRO + RTX 3090 24GB - GPU-intensive workloads
apiVersion: apps/v1
kind: Deployment
metadata:
name: swapper-service-node3
namespace: daarion
labels:
app: swapper-service
component: llm-manager
node: node-3
spec:
replicas: 1
selector:
matchLabels:
app: swapper-service
node: node-3
template:
metadata:
labels:
app: swapper-service
component: llm-manager
node: node-3
spec:
nodeSelector:
kubernetes.io/hostname: node3-daarion
containers:
- name: swapper
image: ghcr.io/daarion-dao/swapper-service:latest
ports:
- containerPort: 8890
name: http
- containerPort: 8891
name: metrics
env:
- name: OLLAMA_HOST
value: "http://ollama-service:11434"
- name: SWAPPER_CONFIG_PATH
value: "/etc/swapper/swapper_config.yaml"
- name: NODE_ID
value: "node-3-threadripper-rtx3090"
- name: GPU_ENABLED
value: "true"
volumeMounts:
- name: swapper-config
mountPath: /etc/swapper
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "4Gi"
cpu: "2000m"
# GPU support for RTX 3090
resources:
requests:
nvidia.com/gpu: 1
limits:
nvidia.com/gpu: 1
livenessProbe:
httpGet:
path: /health
port: 8890
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 8890
initialDelaySeconds: 10
periodSeconds: 5
volumes:
- name: swapper-config
configMap:
name: swapper-service-config-node3
---
apiVersion: v1
kind: ConfigMap
metadata:
name: swapper-service-config-node3
namespace: daarion
data:
swapper_config.yaml: |
# Swapper Configuration for Node #3 (AI/ML Workstation)
# Threadripper PRO + RTX 3090 24GB
swapper:
mode: single-active
max_concurrent_models: 1
model_swap_timeout: 300
gpu_enabled: true
metal_acceleration: false # NVIDIA GPU
default_model: qwen3-8b
models:
# Primary LLM - Qwen3 8B (High Priority)
qwen3-8b:
path: ollama:qwen3:8b
type: llm
size_gb: 4.87
priority: high
description: "Primary LLM for general tasks"
# Vision Model - Qwen3-VL 8B (High Priority)
qwen3-vl-8b:
path: ollama:qwen3-vl:8b
type: vision
size_gb: 5.72
priority: high
description: "Vision model for image processing"
# Large models for GPU-intensive tasks
qwen2.5-7b-instruct:
path: ollama:qwen2.5:7b-instruct
type: llm
size_gb: 4.36
priority: high
description: "Qwen2.5 7B Instruct"
storage:
models_dir: /app/models
cache_dir: /app/cache
swap_dir: /app/swap
ollama:
url: http://ollama-service:11434
timeout: 300
---
apiVersion: v1
kind: Service
metadata:
name: swapper-service-node3
namespace: daarion
spec:
selector:
app: swapper-service
node: node-3
ports:
- name: http
port: 8890
targetPort: 8890
- name: metrics
port: 8891
targetPort: 8891
type: ClusterIP