--- # Swapper Service Deployment для NODE3 (K8s) # Threadripper PRO + RTX 3090 24GB - GPU-intensive workloads apiVersion: apps/v1 kind: Deployment metadata: name: swapper-service-node3 namespace: daarion labels: app: swapper-service component: llm-manager node: node-3 spec: replicas: 1 selector: matchLabels: app: swapper-service node: node-3 template: metadata: labels: app: swapper-service component: llm-manager node: node-3 spec: nodeSelector: kubernetes.io/hostname: node3-daarion containers: - name: swapper image: ghcr.io/daarion-dao/swapper-service:latest ports: - containerPort: 8890 name: http - containerPort: 8891 name: metrics env: - name: OLLAMA_HOST value: "http://ollama-service:11434" - name: SWAPPER_CONFIG_PATH value: "/etc/swapper/swapper_config.yaml" - name: NODE_ID value: "node-3-threadripper-rtx3090" - name: GPU_ENABLED value: "true" volumeMounts: - name: swapper-config mountPath: /etc/swapper resources: requests: memory: "1Gi" cpu: "500m" limits: memory: "4Gi" cpu: "2000m" # GPU support for RTX 3090 resources: requests: nvidia.com/gpu: 1 limits: nvidia.com/gpu: 1 livenessProbe: httpGet: path: /health port: 8890 initialDelaySeconds: 30 periodSeconds: 10 readinessProbe: httpGet: path: /health port: 8890 initialDelaySeconds: 10 periodSeconds: 5 volumes: - name: swapper-config configMap: name: swapper-service-config-node3 --- apiVersion: v1 kind: ConfigMap metadata: name: swapper-service-config-node3 namespace: daarion data: swapper_config.yaml: | # Swapper Configuration for Node #3 (AI/ML Workstation) # Threadripper PRO + RTX 3090 24GB swapper: mode: single-active max_concurrent_models: 1 model_swap_timeout: 300 gpu_enabled: true metal_acceleration: false # NVIDIA GPU default_model: qwen3-8b models: # Primary LLM - Qwen3 8B (High Priority) qwen3-8b: path: ollama:qwen3:8b type: llm size_gb: 4.87 priority: high description: "Primary LLM for general tasks" # Vision Model - Qwen3-VL 8B (High Priority) qwen3-vl-8b: path: ollama:qwen3-vl:8b type: vision size_gb: 5.72 priority: high description: "Vision model for image processing" # Large models for GPU-intensive tasks qwen2.5-7b-instruct: path: ollama:qwen2.5:7b-instruct type: llm size_gb: 4.36 priority: high description: "Qwen2.5 7B Instruct" storage: models_dir: /app/models cache_dir: /app/cache swap_dir: /app/swap ollama: url: http://ollama-service:11434 timeout: 300 --- apiVersion: v1 kind: Service metadata: name: swapper-service-node3 namespace: daarion spec: selector: app: swapper-service node: node-3 ports: - name: http port: 8890 targetPort: 8890 - name: metrics port: 8891 targetPort: 8891 type: ClusterIP