🏗️ Add DAARION Infrastructure Stack
- Terraform + Ansible + K3s + Vault + Consul + Observability - Decentralized network architecture (own datacenters) - Complete Ansible playbooks: - bootstrap.yml: OS setup, packages, SSH - hardening.yml: Security (UFW, fail2ban, auditd, Trivy) - k3s-install.yml: Lightweight Kubernetes cluster - Production inventory with NODE1, NODE3 - Group variables for all nodes - Security check cron script - Multi-DC ready with Consul support
This commit is contained in:
1
infrastructure/ansible/.vault_pass.example
Normal file
1
infrastructure/ansible/.vault_pass.example
Normal file
@@ -0,0 +1 @@
|
||||
# Create .vault_pass file with your vault password
|
||||
31
infrastructure/ansible/ansible.cfg
Normal file
31
infrastructure/ansible/ansible.cfg
Normal file
@@ -0,0 +1,31 @@
|
||||
# DAARION Network - Ansible Configuration
|
||||
[defaults]
|
||||
inventory = inventory/production.yml
|
||||
remote_user = root
|
||||
host_key_checking = False
|
||||
retry_files_enabled = False
|
||||
gathering = smart
|
||||
fact_caching = jsonfile
|
||||
fact_caching_connection = /tmp/ansible_facts
|
||||
fact_caching_timeout = 86400
|
||||
|
||||
# Parallelism
|
||||
forks = 20
|
||||
|
||||
# Output
|
||||
stdout_callback = yaml
|
||||
callback_whitelist = profile_tasks
|
||||
|
||||
# Vault
|
||||
vault_password_file = .vault_pass
|
||||
|
||||
[ssh_connection]
|
||||
pipelining = True
|
||||
control_path = /tmp/ansible-%%h-%%p-%%r
|
||||
ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o StrictHostKeyChecking=no
|
||||
|
||||
[privilege_escalation]
|
||||
become = True
|
||||
become_method = sudo
|
||||
become_user = root
|
||||
become_ask_pass = False
|
||||
93
infrastructure/ansible/inventory/group_vars/all.yml
Normal file
93
infrastructure/ansible/inventory/group_vars/all.yml
Normal file
@@ -0,0 +1,93 @@
|
||||
# DAARION Network - Global Variables
|
||||
# These variables apply to all hosts
|
||||
|
||||
# =============================================================================
|
||||
# SECURITY
|
||||
# =============================================================================
|
||||
security_packages:
|
||||
- fail2ban
|
||||
- ufw
|
||||
- auditd
|
||||
- rkhunter
|
||||
- unattended-upgrades
|
||||
- ca-certificates
|
||||
|
||||
# Firewall - allowed ports (in addition to SSH)
|
||||
firewall_allowed_tcp_ports:
|
||||
- 6443 # K3s API
|
||||
- 10250 # Kubelet
|
||||
- 8200 # Vault
|
||||
- 8500 # Consul HTTP
|
||||
- 8600 # Consul DNS
|
||||
- 9090 # Prometheus
|
||||
- 3000 # Grafana
|
||||
- 5432 # PostgreSQL
|
||||
|
||||
firewall_allowed_outgoing:
|
||||
- { port: 53, proto: udp } # DNS
|
||||
- { port: 80, proto: tcp } # HTTP
|
||||
- { port: 443, proto: tcp } # HTTPS
|
||||
- { port: 123, proto: udp } # NTP
|
||||
|
||||
# Blocked networks (internal/private)
|
||||
firewall_blocked_networks:
|
||||
- 10.0.0.0/8
|
||||
- 172.16.0.0/12
|
||||
|
||||
# =============================================================================
|
||||
# DOCKER
|
||||
# =============================================================================
|
||||
docker_users:
|
||||
- "{{ ansible_user }}"
|
||||
|
||||
docker_daemon_options:
|
||||
storage-driver: "overlay2"
|
||||
log-driver: "json-file"
|
||||
log-opts:
|
||||
max-size: "100m"
|
||||
max-file: "3"
|
||||
|
||||
# =============================================================================
|
||||
# K3S / KUBERNETES
|
||||
# =============================================================================
|
||||
k3s_version: "v1.29.0+k3s1"
|
||||
k3s_disable:
|
||||
- traefik
|
||||
- servicelb
|
||||
|
||||
# =============================================================================
|
||||
# VAULT
|
||||
# =============================================================================
|
||||
vault_version: "1.15.4"
|
||||
vault_addr: "http://node1:8200"
|
||||
vault_data_dir: "/opt/vault/data"
|
||||
|
||||
# =============================================================================
|
||||
# CONSUL
|
||||
# =============================================================================
|
||||
consul_version: "1.17.1"
|
||||
consul_data_dir: "/opt/consul/data"
|
||||
consul_enable_connect: true
|
||||
|
||||
# =============================================================================
|
||||
# OBSERVABILITY
|
||||
# =============================================================================
|
||||
prometheus_retention: "30d"
|
||||
prometheus_storage_size: "50Gi"
|
||||
loki_retention: "168h" # 7 days
|
||||
tempo_retention: "168h" # 7 days
|
||||
|
||||
# =============================================================================
|
||||
# POSTGRESQL
|
||||
# =============================================================================
|
||||
postgres_image: "postgres@sha256:23e88eb049fd5d54894d70100df61d38a49ed97909263f79d4ff4c30a5d5fca2"
|
||||
postgres_user: "daarion"
|
||||
postgres_db: "daarion_main"
|
||||
|
||||
# =============================================================================
|
||||
# PATHS
|
||||
# =============================================================================
|
||||
scripts_dir: "/opt/scripts"
|
||||
config_dir: "/opt/config"
|
||||
logs_dir: "/var/log/daarion"
|
||||
backup_dir: "/opt/backups"
|
||||
65
infrastructure/ansible/inventory/production.yml
Normal file
65
infrastructure/ansible/inventory/production.yml
Normal file
@@ -0,0 +1,65 @@
|
||||
# DAARION Network - Production Inventory
|
||||
# Version: 1.0.0
|
||||
# Updated: 2026-01-10
|
||||
|
||||
all:
|
||||
vars:
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
timezone: "UTC"
|
||||
|
||||
# K3s configuration
|
||||
k3s_version: "v1.29.0+k3s1"
|
||||
k3s_token: "{{ vault_k3s_token }}"
|
||||
|
||||
# Network
|
||||
daarion_network_cidr: "10.42.0.0/16"
|
||||
daarion_service_cidr: "10.43.0.0/16"
|
||||
|
||||
children:
|
||||
# Master nodes - control plane
|
||||
masters:
|
||||
hosts:
|
||||
node1:
|
||||
ansible_host: 144.76.224.179
|
||||
ansible_user: root
|
||||
ansible_ssh_pass: "{{ vault_node1_password }}"
|
||||
node_role: master
|
||||
datacenter: hetzner-de
|
||||
location: "Nuremberg, Germany"
|
||||
|
||||
# Worker nodes - compute
|
||||
workers:
|
||||
hosts:
|
||||
node3:
|
||||
ansible_host: 80.77.35.151
|
||||
ansible_port: 33147
|
||||
ansible_user: zevs
|
||||
ansible_become: yes
|
||||
ansible_become_pass: "{{ vault_node3_password }}"
|
||||
node_role: worker
|
||||
datacenter: remote-dc
|
||||
location: "Remote Datacenter"
|
||||
gpu: true
|
||||
gpu_type: "rtx3090"
|
||||
gpu_memory: "24GB"
|
||||
|
||||
# GPU nodes (subset of workers)
|
||||
gpu_nodes:
|
||||
hosts:
|
||||
node3:
|
||||
|
||||
# Database nodes
|
||||
database_nodes:
|
||||
hosts:
|
||||
node1:
|
||||
node3:
|
||||
|
||||
# Local development
|
||||
local_dev:
|
||||
hosts:
|
||||
node2:
|
||||
ansible_host: localhost
|
||||
ansible_connection: local
|
||||
node_role: development
|
||||
datacenter: local
|
||||
location: "MacBook Pro M4"
|
||||
2
infrastructure/ansible/kubeconfig/.gitignore
vendored
Normal file
2
infrastructure/ansible/kubeconfig/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
*.yaml
|
||||
!.gitkeep
|
||||
0
infrastructure/ansible/kubeconfig/.gitkeep
Normal file
0
infrastructure/ansible/kubeconfig/.gitkeep
Normal file
143
infrastructure/ansible/playbooks/bootstrap.yml
Normal file
143
infrastructure/ansible/playbooks/bootstrap.yml
Normal file
@@ -0,0 +1,143 @@
|
||||
# DAARION Network - Bootstrap Playbook
|
||||
# Initial setup for all nodes: packages, SSH, hostname, etc.
|
||||
---
|
||||
- name: Bootstrap all nodes
|
||||
hosts: all
|
||||
become: yes
|
||||
|
||||
vars:
|
||||
common_packages:
|
||||
- curl
|
||||
- wget
|
||||
- git
|
||||
- htop
|
||||
- vim
|
||||
- jq
|
||||
- unzip
|
||||
- ca-certificates
|
||||
- gnupg
|
||||
- lsb-release
|
||||
- net-tools
|
||||
- dnsutils
|
||||
- bc
|
||||
|
||||
tasks:
|
||||
# =========================================================================
|
||||
# BASIC SETUP
|
||||
# =========================================================================
|
||||
- name: Set timezone
|
||||
timezone:
|
||||
name: "{{ timezone }}"
|
||||
|
||||
- name: Set hostname
|
||||
hostname:
|
||||
name: "{{ inventory_hostname }}"
|
||||
|
||||
- name: Update /etc/hosts with all nodes
|
||||
lineinfile:
|
||||
path: /etc/hosts
|
||||
line: "{{ hostvars[item].ansible_host }} {{ item }}"
|
||||
state: present
|
||||
loop: "{{ groups['all'] }}"
|
||||
when:
|
||||
- hostvars[item].ansible_host is defined
|
||||
- hostvars[item].ansible_host != 'localhost'
|
||||
|
||||
# =========================================================================
|
||||
# PACKAGES
|
||||
# =========================================================================
|
||||
- name: Update apt cache
|
||||
apt:
|
||||
update_cache: yes
|
||||
cache_valid_time: 3600
|
||||
when: ansible_os_family == "Debian"
|
||||
|
||||
- name: Upgrade all packages
|
||||
apt:
|
||||
upgrade: safe
|
||||
when: ansible_os_family == "Debian"
|
||||
|
||||
- name: Install common packages
|
||||
apt:
|
||||
name: "{{ common_packages }}"
|
||||
state: present
|
||||
when: ansible_os_family == "Debian"
|
||||
|
||||
# =========================================================================
|
||||
# USERS & SSH
|
||||
# =========================================================================
|
||||
- name: Create admin group
|
||||
group:
|
||||
name: daarion-admin
|
||||
state: present
|
||||
|
||||
- name: Create directories
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
loop:
|
||||
- "{{ scripts_dir }}"
|
||||
- "{{ config_dir }}"
|
||||
- "{{ logs_dir }}"
|
||||
- "{{ backup_dir }}"
|
||||
|
||||
# =========================================================================
|
||||
# SSH HARDENING
|
||||
# =========================================================================
|
||||
- name: Disable root login via SSH (workers only)
|
||||
lineinfile:
|
||||
path: /etc/ssh/sshd_config
|
||||
regexp: '^#?PermitRootLogin'
|
||||
line: 'PermitRootLogin prohibit-password'
|
||||
notify: restart sshd
|
||||
when: "'workers' in group_names"
|
||||
|
||||
- name: Set SSH MaxAuthTries
|
||||
lineinfile:
|
||||
path: /etc/ssh/sshd_config
|
||||
regexp: '^#?MaxAuthTries'
|
||||
line: 'MaxAuthTries 3'
|
||||
notify: restart sshd
|
||||
|
||||
- name: Set SSH ClientAliveInterval
|
||||
lineinfile:
|
||||
path: /etc/ssh/sshd_config
|
||||
regexp: '^#?ClientAliveInterval'
|
||||
line: 'ClientAliveInterval 300'
|
||||
notify: restart sshd
|
||||
|
||||
# =========================================================================
|
||||
# KERNEL PARAMETERS
|
||||
# =========================================================================
|
||||
- name: Set kernel parameters for containers
|
||||
sysctl:
|
||||
name: "{{ item.name }}"
|
||||
value: "{{ item.value }}"
|
||||
state: present
|
||||
reload: yes
|
||||
loop:
|
||||
- { name: 'net.ipv4.ip_forward', value: '1' }
|
||||
- { name: 'net.bridge.bridge-nf-call-iptables', value: '1' }
|
||||
- { name: 'net.bridge.bridge-nf-call-ip6tables', value: '1' }
|
||||
- { name: 'fs.inotify.max_user_watches', value: '524288' }
|
||||
- { name: 'fs.inotify.max_user_instances', value: '512' }
|
||||
ignore_errors: yes # Some params may not exist on all systems
|
||||
|
||||
# =========================================================================
|
||||
# VERIFICATION
|
||||
# =========================================================================
|
||||
- name: Verify setup
|
||||
debug:
|
||||
msg: |
|
||||
Node: {{ inventory_hostname }}
|
||||
Host: {{ ansible_host }}
|
||||
Datacenter: {{ datacenter | default('unknown') }}
|
||||
Role: {{ node_role | default('unknown') }}
|
||||
GPU: {{ gpu | default(false) }}
|
||||
|
||||
handlers:
|
||||
- name: restart sshd
|
||||
service:
|
||||
name: sshd
|
||||
state: restarted
|
||||
288
infrastructure/ansible/playbooks/hardening.yml
Normal file
288
infrastructure/ansible/playbooks/hardening.yml
Normal file
@@ -0,0 +1,288 @@
|
||||
# DAARION Network - Security Hardening Playbook
|
||||
# Comprehensive security setup for all nodes
|
||||
---
|
||||
- name: Security Hardening
|
||||
hosts: all
|
||||
become: yes
|
||||
|
||||
vars:
|
||||
allowed_ssh_port: "{{ ansible_port | default(22) }}"
|
||||
|
||||
tasks:
|
||||
# =========================================================================
|
||||
# SECURITY PACKAGES
|
||||
# =========================================================================
|
||||
- name: Install security packages
|
||||
apt:
|
||||
name: "{{ security_packages }}"
|
||||
state: present
|
||||
when: ansible_os_family == "Debian"
|
||||
|
||||
- name: Install Trivy (vulnerability scanner)
|
||||
shell: |
|
||||
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
|
||||
args:
|
||||
creates: /usr/local/bin/trivy
|
||||
|
||||
# =========================================================================
|
||||
# UFW FIREWALL
|
||||
# =========================================================================
|
||||
- name: UFW - Reset to defaults
|
||||
ufw:
|
||||
state: reset
|
||||
|
||||
- name: UFW - Default deny incoming
|
||||
ufw:
|
||||
direction: incoming
|
||||
policy: deny
|
||||
|
||||
- name: UFW - Default deny outgoing
|
||||
ufw:
|
||||
direction: outgoing
|
||||
policy: deny
|
||||
|
||||
- name: UFW - Allow SSH
|
||||
ufw:
|
||||
rule: allow
|
||||
port: "{{ allowed_ssh_port }}"
|
||||
proto: tcp
|
||||
|
||||
- name: UFW - Allow necessary TCP ports
|
||||
ufw:
|
||||
rule: allow
|
||||
port: "{{ item }}"
|
||||
proto: tcp
|
||||
loop: "{{ firewall_allowed_tcp_ports }}"
|
||||
when: firewall_allowed_tcp_ports is defined
|
||||
|
||||
- name: UFW - Allow necessary outgoing
|
||||
ufw:
|
||||
rule: allow
|
||||
direction: out
|
||||
port: "{{ item.port }}"
|
||||
proto: "{{ item.proto }}"
|
||||
loop: "{{ firewall_allowed_outgoing }}"
|
||||
|
||||
- name: UFW - Block internal networks
|
||||
ufw:
|
||||
rule: deny
|
||||
direction: out
|
||||
to_ip: "{{ item }}"
|
||||
loop: "{{ firewall_blocked_networks }}"
|
||||
when: firewall_blocked_networks is defined
|
||||
|
||||
- name: UFW - Enable
|
||||
ufw:
|
||||
state: enabled
|
||||
|
||||
# =========================================================================
|
||||
# FAIL2BAN
|
||||
# =========================================================================
|
||||
- name: Configure fail2ban
|
||||
copy:
|
||||
dest: /etc/fail2ban/jail.local
|
||||
content: |
|
||||
[DEFAULT]
|
||||
bantime = 3600
|
||||
findtime = 600
|
||||
maxretry = 3
|
||||
|
||||
[sshd]
|
||||
enabled = true
|
||||
port = {{ allowed_ssh_port }}
|
||||
filter = sshd
|
||||
logpath = /var/log/auth.log
|
||||
maxretry = 3
|
||||
bantime = 86400
|
||||
notify: restart fail2ban
|
||||
|
||||
- name: Enable fail2ban
|
||||
service:
|
||||
name: fail2ban
|
||||
enabled: yes
|
||||
state: started
|
||||
|
||||
# =========================================================================
|
||||
# AUDITD
|
||||
# =========================================================================
|
||||
- name: Configure auditd rules
|
||||
copy:
|
||||
dest: /etc/audit/rules.d/daarion.rules
|
||||
content: |
|
||||
# Monitor file changes in critical directories
|
||||
-w /etc/passwd -p wa -k passwd_changes
|
||||
-w /etc/shadow -p wa -k shadow_changes
|
||||
-w /etc/ssh/sshd_config -p wa -k sshd_config
|
||||
|
||||
# Monitor Docker
|
||||
-w /var/lib/docker -p wa -k docker
|
||||
-w /etc/docker -p wa -k docker_config
|
||||
|
||||
# Monitor cron
|
||||
-w /etc/crontab -p wa -k cron
|
||||
-w /etc/cron.d -p wa -k cron
|
||||
|
||||
# Monitor tmp (malware indicator)
|
||||
-w /tmp -p x -k tmp_exec
|
||||
-w /var/tmp -p x -k var_tmp_exec
|
||||
notify: restart auditd
|
||||
|
||||
- name: Enable auditd
|
||||
service:
|
||||
name: auditd
|
||||
enabled: yes
|
||||
state: started
|
||||
|
||||
# =========================================================================
|
||||
# KERNEL HARDENING
|
||||
# =========================================================================
|
||||
- name: Kernel security parameters
|
||||
sysctl:
|
||||
name: "{{ item.name }}"
|
||||
value: "{{ item.value }}"
|
||||
state: present
|
||||
reload: yes
|
||||
loop:
|
||||
- { name: 'net.ipv4.conf.all.accept_redirects', value: '0' }
|
||||
- { name: 'net.ipv4.conf.default.accept_redirects', value: '0' }
|
||||
- { name: 'net.ipv4.conf.all.send_redirects', value: '0' }
|
||||
- { name: 'net.ipv4.conf.default.send_redirects', value: '0' }
|
||||
- { name: 'net.ipv4.tcp_syncookies', value: '1' }
|
||||
- { name: 'net.ipv4.icmp_echo_ignore_broadcasts', value: '1' }
|
||||
- { name: 'kernel.randomize_va_space', value: '2' }
|
||||
- { name: 'kernel.kptr_restrict', value: '2' }
|
||||
- { name: 'kernel.dmesg_restrict', value: '1' }
|
||||
|
||||
# =========================================================================
|
||||
# SECURITY CHECK SCRIPT
|
||||
# =========================================================================
|
||||
- name: Deploy security check script
|
||||
copy:
|
||||
dest: "{{ scripts_dir }}/security-check.sh"
|
||||
mode: '0755'
|
||||
content: |
|
||||
#!/bin/bash
|
||||
# DAARION Security Check Script
|
||||
# Runs hourly via cron
|
||||
|
||||
LOG="{{ logs_dir }}/security-$(date +%Y%m%d).log"
|
||||
ALERT_FILE="/tmp/security_alert"
|
||||
|
||||
log() {
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG"
|
||||
}
|
||||
|
||||
log "=== Security Check Started ==="
|
||||
|
||||
# Check for suspicious processes
|
||||
SUSPICIOUS=$(ps aux | grep -E "(xmrig|kdevtmp|kinsing|perfctl|httpd.*tmp|mysql.*tmp)" | grep -v grep)
|
||||
if [ -n "$SUSPICIOUS" ]; then
|
||||
log "CRITICAL: Suspicious process detected!"
|
||||
log "$SUSPICIOUS"
|
||||
pkill -9 -f "xmrig|kdevtmp|kinsing|perfctl"
|
||||
touch "$ALERT_FILE"
|
||||
fi
|
||||
|
||||
# Check for executables in /tmp
|
||||
TMP_EXEC=$(find /tmp /var/tmp /dev/shm -type f -executable 2>/dev/null)
|
||||
if [ -n "$TMP_EXEC" ]; then
|
||||
log "WARNING: Executable files in tmp directories!"
|
||||
log "$TMP_EXEC"
|
||||
rm -f $TMP_EXEC 2>/dev/null
|
||||
fi
|
||||
|
||||
# Check CPU usage (potential mining)
|
||||
LOAD=$(cat /proc/loadavg | cut -d' ' -f1)
|
||||
CPU_COUNT=$(nproc)
|
||||
THRESHOLD=$(echo "$CPU_COUNT * 2" | bc)
|
||||
if (( $(echo "$LOAD > $THRESHOLD" | bc -l) )); then
|
||||
log "WARNING: High CPU load: $LOAD (threshold: $THRESHOLD)"
|
||||
fi
|
||||
|
||||
# Check for unauthorized SSH keys
|
||||
for user_home in /root /home/*; do
|
||||
if [ -f "$user_home/.ssh/authorized_keys" ]; then
|
||||
KEY_COUNT=$(wc -l < "$user_home/.ssh/authorized_keys")
|
||||
log "INFO: $user_home has $KEY_COUNT SSH keys"
|
||||
fi
|
||||
done
|
||||
|
||||
# Check failed SSH attempts
|
||||
FAILED_SSH=$(grep "Failed password" /var/log/auth.log 2>/dev/null | wc -l)
|
||||
log "INFO: Failed SSH attempts today: $FAILED_SSH"
|
||||
|
||||
# Check Docker containers
|
||||
if command -v docker &> /dev/null; then
|
||||
CONTAINER_COUNT=$(docker ps -q | wc -l)
|
||||
log "INFO: Running Docker containers: $CONTAINER_COUNT"
|
||||
|
||||
# Check for containers running as root
|
||||
docker ps -q | while read cid; do
|
||||
USER=$(docker inspect --format '{{.Config.User}}' $cid)
|
||||
NAME=$(docker inspect --format '{{.Name}}' $cid)
|
||||
if [ -z "$USER" ] || [ "$USER" = "root" ] || [ "$USER" = "0" ]; then
|
||||
log "WARNING: Container $NAME running as root"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
log "=== Security Check Completed ==="
|
||||
|
||||
- name: Setup security cron
|
||||
cron:
|
||||
name: "Hourly security check"
|
||||
minute: "0"
|
||||
job: "{{ scripts_dir }}/security-check.sh"
|
||||
|
||||
- name: Setup daily rkhunter scan
|
||||
cron:
|
||||
name: "Daily rkhunter scan"
|
||||
hour: "3"
|
||||
minute: "0"
|
||||
job: "rkhunter --update && rkhunter --check --skip-keypress > {{ logs_dir }}/rkhunter.log 2>&1"
|
||||
|
||||
# =========================================================================
|
||||
# AUTO UPDATES
|
||||
# =========================================================================
|
||||
- name: Configure unattended-upgrades
|
||||
copy:
|
||||
dest: /etc/apt/apt.conf.d/50unattended-upgrades
|
||||
content: |
|
||||
Unattended-Upgrade::Allowed-Origins {
|
||||
"${distro_id}:${distro_codename}";
|
||||
"${distro_id}:${distro_codename}-security";
|
||||
"${distro_id}ESMApps:${distro_codename}-apps-security";
|
||||
"${distro_id}ESM:${distro_codename}-infra-security";
|
||||
};
|
||||
Unattended-Upgrade::AutoFixInterruptedDpkg "true";
|
||||
Unattended-Upgrade::Remove-Unused-Dependencies "true";
|
||||
Unattended-Upgrade::Automatic-Reboot "false";
|
||||
when: ansible_os_family == "Debian"
|
||||
|
||||
# =========================================================================
|
||||
# VERIFICATION
|
||||
# =========================================================================
|
||||
- name: Verify security setup
|
||||
shell: |
|
||||
echo "=== Security Status ==="
|
||||
echo "UFW: $(ufw status | head -1)"
|
||||
echo "Fail2ban: $(systemctl is-active fail2ban)"
|
||||
echo "Auditd: $(systemctl is-active auditd)"
|
||||
echo "Trivy: $(trivy --version 2>/dev/null | head -1 || echo 'not installed')"
|
||||
register: security_status
|
||||
changed_when: false
|
||||
|
||||
- name: Show security status
|
||||
debug:
|
||||
var: security_status.stdout_lines
|
||||
|
||||
handlers:
|
||||
- name: restart fail2ban
|
||||
service:
|
||||
name: fail2ban
|
||||
state: restarted
|
||||
|
||||
- name: restart auditd
|
||||
service:
|
||||
name: auditd
|
||||
state: restarted
|
||||
183
infrastructure/ansible/playbooks/k3s-install.yml
Normal file
183
infrastructure/ansible/playbooks/k3s-install.yml
Normal file
@@ -0,0 +1,183 @@
|
||||
# DAARION Network - K3s Installation Playbook
|
||||
# Lightweight Kubernetes cluster setup
|
||||
---
|
||||
# =============================================================================
|
||||
# INSTALL K3S SERVER (MASTERS)
|
||||
# =============================================================================
|
||||
- name: Install K3s Server on Masters
|
||||
hosts: masters
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Check if K3s is already installed
|
||||
stat:
|
||||
path: /etc/rancher/k3s/k3s.yaml
|
||||
register: k3s_installed
|
||||
|
||||
- name: Download K3s installer
|
||||
get_url:
|
||||
url: https://get.k3s.io
|
||||
dest: /tmp/k3s-install.sh
|
||||
mode: '0755'
|
||||
when: not k3s_installed.stat.exists
|
||||
|
||||
- name: Install K3s server
|
||||
shell: |
|
||||
INSTALL_K3S_VERSION={{ k3s_version }} \
|
||||
sh /tmp/k3s-install.sh server \
|
||||
--disable traefik \
|
||||
--disable servicelb \
|
||||
--write-kubeconfig-mode 644 \
|
||||
--tls-san {{ ansible_host }} \
|
||||
--tls-san {{ inventory_hostname }} \
|
||||
--node-label "datacenter={{ datacenter }}" \
|
||||
--node-label "node-role={{ node_role }}" \
|
||||
--cluster-cidr {{ daarion_network_cidr | default('10.42.0.0/16') }} \
|
||||
--service-cidr {{ daarion_service_cidr | default('10.43.0.0/16') }}
|
||||
args:
|
||||
creates: /etc/rancher/k3s/k3s.yaml
|
||||
register: k3s_install
|
||||
|
||||
- name: Wait for K3s to be ready
|
||||
wait_for:
|
||||
port: 6443
|
||||
delay: 10
|
||||
timeout: 300
|
||||
|
||||
- name: Wait for node to be ready
|
||||
shell: |
|
||||
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||||
kubectl wait --for=condition=Ready node/{{ inventory_hostname }} --timeout=300s
|
||||
register: node_ready
|
||||
retries: 10
|
||||
delay: 10
|
||||
until: node_ready.rc == 0
|
||||
|
||||
- name: Get K3s token
|
||||
slurp:
|
||||
src: /var/lib/rancher/k3s/server/node-token
|
||||
register: k3s_token_file
|
||||
|
||||
- name: Save K3s token as fact
|
||||
set_fact:
|
||||
k3s_join_token: "{{ k3s_token_file.content | b64decode | trim }}"
|
||||
|
||||
- name: Fetch kubeconfig
|
||||
fetch:
|
||||
src: /etc/rancher/k3s/k3s.yaml
|
||||
dest: "{{ playbook_dir }}/../kubeconfig/{{ inventory_hostname }}.yaml"
|
||||
flat: yes
|
||||
|
||||
- name: Update kubeconfig with external IP
|
||||
delegate_to: localhost
|
||||
become: no
|
||||
replace:
|
||||
path: "{{ playbook_dir }}/../kubeconfig/{{ inventory_hostname }}.yaml"
|
||||
regexp: '127.0.0.1'
|
||||
replace: "{{ ansible_host }}"
|
||||
|
||||
- name: Show K3s status
|
||||
shell: |
|
||||
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||||
kubectl get nodes -o wide
|
||||
register: k3s_status
|
||||
changed_when: false
|
||||
|
||||
- name: Display K3s status
|
||||
debug:
|
||||
var: k3s_status.stdout_lines
|
||||
|
||||
# =============================================================================
|
||||
# INSTALL K3S AGENT (WORKERS)
|
||||
# =============================================================================
|
||||
- name: Install K3s Agent on Workers
|
||||
hosts: workers
|
||||
become: yes
|
||||
|
||||
vars:
|
||||
k3s_master_host: "{{ hostvars[groups['masters'][0]].ansible_host }}"
|
||||
k3s_master_token: "{{ hostvars[groups['masters'][0]].k3s_join_token }}"
|
||||
|
||||
tasks:
|
||||
- name: Check if K3s agent is already installed
|
||||
stat:
|
||||
path: /var/lib/rancher/k3s/agent
|
||||
register: k3s_agent_installed
|
||||
|
||||
- name: Download K3s installer
|
||||
get_url:
|
||||
url: https://get.k3s.io
|
||||
dest: /tmp/k3s-install.sh
|
||||
mode: '0755'
|
||||
when: not k3s_agent_installed.stat.exists
|
||||
|
||||
- name: Build node labels
|
||||
set_fact:
|
||||
node_labels: >-
|
||||
--node-label datacenter={{ datacenter }}
|
||||
--node-label node-role={{ node_role }}
|
||||
{% if gpu is defined and gpu %}
|
||||
--node-label gpu=true
|
||||
--node-label gpu-type={{ gpu_type | default('unknown') }}
|
||||
--node-label gpu-memory={{ gpu_memory | default('unknown') }}
|
||||
{% endif %}
|
||||
|
||||
- name: Install K3s agent
|
||||
shell: |
|
||||
INSTALL_K3S_VERSION={{ k3s_version }} \
|
||||
K3S_URL=https://{{ k3s_master_host }}:6443 \
|
||||
K3S_TOKEN={{ k3s_master_token }} \
|
||||
sh /tmp/k3s-install.sh agent \
|
||||
{{ node_labels }}
|
||||
args:
|
||||
creates: /var/lib/rancher/k3s/agent
|
||||
register: k3s_agent_install
|
||||
|
||||
- name: Wait for agent to connect
|
||||
pause:
|
||||
seconds: 30
|
||||
when: k3s_agent_install.changed
|
||||
|
||||
# =============================================================================
|
||||
# VERIFY CLUSTER
|
||||
# =============================================================================
|
||||
- name: Verify K3s Cluster
|
||||
hosts: masters
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Get cluster nodes
|
||||
shell: |
|
||||
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||||
kubectl get nodes -o wide
|
||||
register: cluster_nodes
|
||||
changed_when: false
|
||||
|
||||
- name: Display cluster nodes
|
||||
debug:
|
||||
var: cluster_nodes.stdout_lines
|
||||
|
||||
- name: Get cluster info
|
||||
shell: |
|
||||
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||||
kubectl cluster-info
|
||||
register: cluster_info
|
||||
changed_when: false
|
||||
|
||||
- name: Display cluster info
|
||||
debug:
|
||||
var: cluster_info.stdout_lines
|
||||
|
||||
- name: Create daarion namespace
|
||||
shell: |
|
||||
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||||
kubectl create namespace daarion --dry-run=client -o yaml | kubectl apply -f -
|
||||
changed_when: false
|
||||
|
||||
- name: Label GPU nodes
|
||||
shell: |
|
||||
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||||
kubectl label nodes {{ item }} nvidia.com/gpu=true --overwrite
|
||||
loop: "{{ groups['gpu_nodes'] | default([]) }}"
|
||||
when: groups['gpu_nodes'] is defined
|
||||
ignore_errors: yes
|
||||
Reference in New Issue
Block a user