🏗️ Add DAARION Infrastructure Stack

- Terraform + Ansible + K3s + Vault + Consul + Observability
- Decentralized network architecture (own datacenters)
- Complete Ansible playbooks:
  - bootstrap.yml: OS setup, packages, SSH
  - hardening.yml: Security (UFW, fail2ban, auditd, Trivy)
  - k3s-install.yml: Lightweight Kubernetes cluster
- Production inventory with NODE1, NODE3
- Group variables for all nodes
- Security check cron script
- Multi-DC ready with Consul support
This commit is contained in:
Apple
2026-01-10 05:31:51 -08:00
parent 02cfd90b6f
commit 12545a7c76
11 changed files with 1799 additions and 633 deletions

View File

@@ -0,0 +1 @@
# Create .vault_pass file with your vault password

View File

@@ -0,0 +1,31 @@
# DAARION Network - Ansible Configuration
[defaults]
inventory = inventory/production.yml
remote_user = root
host_key_checking = False
retry_files_enabled = False
gathering = smart
fact_caching = jsonfile
fact_caching_connection = /tmp/ansible_facts
fact_caching_timeout = 86400
# Parallelism
forks = 20
# Output
stdout_callback = yaml
callback_whitelist = profile_tasks
# Vault
vault_password_file = .vault_pass
[ssh_connection]
pipelining = True
control_path = /tmp/ansible-%%h-%%p-%%r
ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o StrictHostKeyChecking=no
[privilege_escalation]
become = True
become_method = sudo
become_user = root
become_ask_pass = False

View File

@@ -0,0 +1,93 @@
# DAARION Network - Global Variables
# These variables apply to all hosts
# =============================================================================
# SECURITY
# =============================================================================
security_packages:
- fail2ban
- ufw
- auditd
- rkhunter
- unattended-upgrades
- ca-certificates
# Firewall - allowed ports (in addition to SSH)
firewall_allowed_tcp_ports:
- 6443 # K3s API
- 10250 # Kubelet
- 8200 # Vault
- 8500 # Consul HTTP
- 8600 # Consul DNS
- 9090 # Prometheus
- 3000 # Grafana
- 5432 # PostgreSQL
firewall_allowed_outgoing:
- { port: 53, proto: udp } # DNS
- { port: 80, proto: tcp } # HTTP
- { port: 443, proto: tcp } # HTTPS
- { port: 123, proto: udp } # NTP
# Blocked networks (internal/private)
firewall_blocked_networks:
- 10.0.0.0/8
- 172.16.0.0/12
# =============================================================================
# DOCKER
# =============================================================================
docker_users:
- "{{ ansible_user }}"
docker_daemon_options:
storage-driver: "overlay2"
log-driver: "json-file"
log-opts:
max-size: "100m"
max-file: "3"
# =============================================================================
# K3S / KUBERNETES
# =============================================================================
k3s_version: "v1.29.0+k3s1"
k3s_disable:
- traefik
- servicelb
# =============================================================================
# VAULT
# =============================================================================
vault_version: "1.15.4"
vault_addr: "http://node1:8200"
vault_data_dir: "/opt/vault/data"
# =============================================================================
# CONSUL
# =============================================================================
consul_version: "1.17.1"
consul_data_dir: "/opt/consul/data"
consul_enable_connect: true
# =============================================================================
# OBSERVABILITY
# =============================================================================
prometheus_retention: "30d"
prometheus_storage_size: "50Gi"
loki_retention: "168h" # 7 days
tempo_retention: "168h" # 7 days
# =============================================================================
# POSTGRESQL
# =============================================================================
postgres_image: "postgres@sha256:23e88eb049fd5d54894d70100df61d38a49ed97909263f79d4ff4c30a5d5fca2"
postgres_user: "daarion"
postgres_db: "daarion_main"
# =============================================================================
# PATHS
# =============================================================================
scripts_dir: "/opt/scripts"
config_dir: "/opt/config"
logs_dir: "/var/log/daarion"
backup_dir: "/opt/backups"

View File

@@ -0,0 +1,65 @@
# DAARION Network - Production Inventory
# Version: 1.0.0
# Updated: 2026-01-10
all:
vars:
ansible_python_interpreter: /usr/bin/python3
timezone: "UTC"
# K3s configuration
k3s_version: "v1.29.0+k3s1"
k3s_token: "{{ vault_k3s_token }}"
# Network
daarion_network_cidr: "10.42.0.0/16"
daarion_service_cidr: "10.43.0.0/16"
children:
# Master nodes - control plane
masters:
hosts:
node1:
ansible_host: 144.76.224.179
ansible_user: root
ansible_ssh_pass: "{{ vault_node1_password }}"
node_role: master
datacenter: hetzner-de
location: "Nuremberg, Germany"
# Worker nodes - compute
workers:
hosts:
node3:
ansible_host: 80.77.35.151
ansible_port: 33147
ansible_user: zevs
ansible_become: yes
ansible_become_pass: "{{ vault_node3_password }}"
node_role: worker
datacenter: remote-dc
location: "Remote Datacenter"
gpu: true
gpu_type: "rtx3090"
gpu_memory: "24GB"
# GPU nodes (subset of workers)
gpu_nodes:
hosts:
node3:
# Database nodes
database_nodes:
hosts:
node1:
node3:
# Local development
local_dev:
hosts:
node2:
ansible_host: localhost
ansible_connection: local
node_role: development
datacenter: local
location: "MacBook Pro M4"

View File

@@ -0,0 +1,2 @@
*.yaml
!.gitkeep

View File

@@ -0,0 +1,143 @@
# DAARION Network - Bootstrap Playbook
# Initial setup for all nodes: packages, SSH, hostname, etc.
---
- name: Bootstrap all nodes
hosts: all
become: yes
vars:
common_packages:
- curl
- wget
- git
- htop
- vim
- jq
- unzip
- ca-certificates
- gnupg
- lsb-release
- net-tools
- dnsutils
- bc
tasks:
# =========================================================================
# BASIC SETUP
# =========================================================================
- name: Set timezone
timezone:
name: "{{ timezone }}"
- name: Set hostname
hostname:
name: "{{ inventory_hostname }}"
- name: Update /etc/hosts with all nodes
lineinfile:
path: /etc/hosts
line: "{{ hostvars[item].ansible_host }} {{ item }}"
state: present
loop: "{{ groups['all'] }}"
when:
- hostvars[item].ansible_host is defined
- hostvars[item].ansible_host != 'localhost'
# =========================================================================
# PACKAGES
# =========================================================================
- name: Update apt cache
apt:
update_cache: yes
cache_valid_time: 3600
when: ansible_os_family == "Debian"
- name: Upgrade all packages
apt:
upgrade: safe
when: ansible_os_family == "Debian"
- name: Install common packages
apt:
name: "{{ common_packages }}"
state: present
when: ansible_os_family == "Debian"
# =========================================================================
# USERS & SSH
# =========================================================================
- name: Create admin group
group:
name: daarion-admin
state: present
- name: Create directories
file:
path: "{{ item }}"
state: directory
mode: '0755'
loop:
- "{{ scripts_dir }}"
- "{{ config_dir }}"
- "{{ logs_dir }}"
- "{{ backup_dir }}"
# =========================================================================
# SSH HARDENING
# =========================================================================
- name: Disable root login via SSH (workers only)
lineinfile:
path: /etc/ssh/sshd_config
regexp: '^#?PermitRootLogin'
line: 'PermitRootLogin prohibit-password'
notify: restart sshd
when: "'workers' in group_names"
- name: Set SSH MaxAuthTries
lineinfile:
path: /etc/ssh/sshd_config
regexp: '^#?MaxAuthTries'
line: 'MaxAuthTries 3'
notify: restart sshd
- name: Set SSH ClientAliveInterval
lineinfile:
path: /etc/ssh/sshd_config
regexp: '^#?ClientAliveInterval'
line: 'ClientAliveInterval 300'
notify: restart sshd
# =========================================================================
# KERNEL PARAMETERS
# =========================================================================
- name: Set kernel parameters for containers
sysctl:
name: "{{ item.name }}"
value: "{{ item.value }}"
state: present
reload: yes
loop:
- { name: 'net.ipv4.ip_forward', value: '1' }
- { name: 'net.bridge.bridge-nf-call-iptables', value: '1' }
- { name: 'net.bridge.bridge-nf-call-ip6tables', value: '1' }
- { name: 'fs.inotify.max_user_watches', value: '524288' }
- { name: 'fs.inotify.max_user_instances', value: '512' }
ignore_errors: yes # Some params may not exist on all systems
# =========================================================================
# VERIFICATION
# =========================================================================
- name: Verify setup
debug:
msg: |
Node: {{ inventory_hostname }}
Host: {{ ansible_host }}
Datacenter: {{ datacenter | default('unknown') }}
Role: {{ node_role | default('unknown') }}
GPU: {{ gpu | default(false) }}
handlers:
- name: restart sshd
service:
name: sshd
state: restarted

View File

@@ -0,0 +1,288 @@
# DAARION Network - Security Hardening Playbook
# Comprehensive security setup for all nodes
---
- name: Security Hardening
hosts: all
become: yes
vars:
allowed_ssh_port: "{{ ansible_port | default(22) }}"
tasks:
# =========================================================================
# SECURITY PACKAGES
# =========================================================================
- name: Install security packages
apt:
name: "{{ security_packages }}"
state: present
when: ansible_os_family == "Debian"
- name: Install Trivy (vulnerability scanner)
shell: |
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
args:
creates: /usr/local/bin/trivy
# =========================================================================
# UFW FIREWALL
# =========================================================================
- name: UFW - Reset to defaults
ufw:
state: reset
- name: UFW - Default deny incoming
ufw:
direction: incoming
policy: deny
- name: UFW - Default deny outgoing
ufw:
direction: outgoing
policy: deny
- name: UFW - Allow SSH
ufw:
rule: allow
port: "{{ allowed_ssh_port }}"
proto: tcp
- name: UFW - Allow necessary TCP ports
ufw:
rule: allow
port: "{{ item }}"
proto: tcp
loop: "{{ firewall_allowed_tcp_ports }}"
when: firewall_allowed_tcp_ports is defined
- name: UFW - Allow necessary outgoing
ufw:
rule: allow
direction: out
port: "{{ item.port }}"
proto: "{{ item.proto }}"
loop: "{{ firewall_allowed_outgoing }}"
- name: UFW - Block internal networks
ufw:
rule: deny
direction: out
to_ip: "{{ item }}"
loop: "{{ firewall_blocked_networks }}"
when: firewall_blocked_networks is defined
- name: UFW - Enable
ufw:
state: enabled
# =========================================================================
# FAIL2BAN
# =========================================================================
- name: Configure fail2ban
copy:
dest: /etc/fail2ban/jail.local
content: |
[DEFAULT]
bantime = 3600
findtime = 600
maxretry = 3
[sshd]
enabled = true
port = {{ allowed_ssh_port }}
filter = sshd
logpath = /var/log/auth.log
maxretry = 3
bantime = 86400
notify: restart fail2ban
- name: Enable fail2ban
service:
name: fail2ban
enabled: yes
state: started
# =========================================================================
# AUDITD
# =========================================================================
- name: Configure auditd rules
copy:
dest: /etc/audit/rules.d/daarion.rules
content: |
# Monitor file changes in critical directories
-w /etc/passwd -p wa -k passwd_changes
-w /etc/shadow -p wa -k shadow_changes
-w /etc/ssh/sshd_config -p wa -k sshd_config
# Monitor Docker
-w /var/lib/docker -p wa -k docker
-w /etc/docker -p wa -k docker_config
# Monitor cron
-w /etc/crontab -p wa -k cron
-w /etc/cron.d -p wa -k cron
# Monitor tmp (malware indicator)
-w /tmp -p x -k tmp_exec
-w /var/tmp -p x -k var_tmp_exec
notify: restart auditd
- name: Enable auditd
service:
name: auditd
enabled: yes
state: started
# =========================================================================
# KERNEL HARDENING
# =========================================================================
- name: Kernel security parameters
sysctl:
name: "{{ item.name }}"
value: "{{ item.value }}"
state: present
reload: yes
loop:
- { name: 'net.ipv4.conf.all.accept_redirects', value: '0' }
- { name: 'net.ipv4.conf.default.accept_redirects', value: '0' }
- { name: 'net.ipv4.conf.all.send_redirects', value: '0' }
- { name: 'net.ipv4.conf.default.send_redirects', value: '0' }
- { name: 'net.ipv4.tcp_syncookies', value: '1' }
- { name: 'net.ipv4.icmp_echo_ignore_broadcasts', value: '1' }
- { name: 'kernel.randomize_va_space', value: '2' }
- { name: 'kernel.kptr_restrict', value: '2' }
- { name: 'kernel.dmesg_restrict', value: '1' }
# =========================================================================
# SECURITY CHECK SCRIPT
# =========================================================================
- name: Deploy security check script
copy:
dest: "{{ scripts_dir }}/security-check.sh"
mode: '0755'
content: |
#!/bin/bash
# DAARION Security Check Script
# Runs hourly via cron
LOG="{{ logs_dir }}/security-$(date +%Y%m%d).log"
ALERT_FILE="/tmp/security_alert"
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG"
}
log "=== Security Check Started ==="
# Check for suspicious processes
SUSPICIOUS=$(ps aux | grep -E "(xmrig|kdevtmp|kinsing|perfctl|httpd.*tmp|mysql.*tmp)" | grep -v grep)
if [ -n "$SUSPICIOUS" ]; then
log "CRITICAL: Suspicious process detected!"
log "$SUSPICIOUS"
pkill -9 -f "xmrig|kdevtmp|kinsing|perfctl"
touch "$ALERT_FILE"
fi
# Check for executables in /tmp
TMP_EXEC=$(find /tmp /var/tmp /dev/shm -type f -executable 2>/dev/null)
if [ -n "$TMP_EXEC" ]; then
log "WARNING: Executable files in tmp directories!"
log "$TMP_EXEC"
rm -f $TMP_EXEC 2>/dev/null
fi
# Check CPU usage (potential mining)
LOAD=$(cat /proc/loadavg | cut -d' ' -f1)
CPU_COUNT=$(nproc)
THRESHOLD=$(echo "$CPU_COUNT * 2" | bc)
if (( $(echo "$LOAD > $THRESHOLD" | bc -l) )); then
log "WARNING: High CPU load: $LOAD (threshold: $THRESHOLD)"
fi
# Check for unauthorized SSH keys
for user_home in /root /home/*; do
if [ -f "$user_home/.ssh/authorized_keys" ]; then
KEY_COUNT=$(wc -l < "$user_home/.ssh/authorized_keys")
log "INFO: $user_home has $KEY_COUNT SSH keys"
fi
done
# Check failed SSH attempts
FAILED_SSH=$(grep "Failed password" /var/log/auth.log 2>/dev/null | wc -l)
log "INFO: Failed SSH attempts today: $FAILED_SSH"
# Check Docker containers
if command -v docker &> /dev/null; then
CONTAINER_COUNT=$(docker ps -q | wc -l)
log "INFO: Running Docker containers: $CONTAINER_COUNT"
# Check for containers running as root
docker ps -q | while read cid; do
USER=$(docker inspect --format '{{.Config.User}}' $cid)
NAME=$(docker inspect --format '{{.Name}}' $cid)
if [ -z "$USER" ] || [ "$USER" = "root" ] || [ "$USER" = "0" ]; then
log "WARNING: Container $NAME running as root"
fi
done
fi
log "=== Security Check Completed ==="
- name: Setup security cron
cron:
name: "Hourly security check"
minute: "0"
job: "{{ scripts_dir }}/security-check.sh"
- name: Setup daily rkhunter scan
cron:
name: "Daily rkhunter scan"
hour: "3"
minute: "0"
job: "rkhunter --update && rkhunter --check --skip-keypress > {{ logs_dir }}/rkhunter.log 2>&1"
# =========================================================================
# AUTO UPDATES
# =========================================================================
- name: Configure unattended-upgrades
copy:
dest: /etc/apt/apt.conf.d/50unattended-upgrades
content: |
Unattended-Upgrade::Allowed-Origins {
"${distro_id}:${distro_codename}";
"${distro_id}:${distro_codename}-security";
"${distro_id}ESMApps:${distro_codename}-apps-security";
"${distro_id}ESM:${distro_codename}-infra-security";
};
Unattended-Upgrade::AutoFixInterruptedDpkg "true";
Unattended-Upgrade::Remove-Unused-Dependencies "true";
Unattended-Upgrade::Automatic-Reboot "false";
when: ansible_os_family == "Debian"
# =========================================================================
# VERIFICATION
# =========================================================================
- name: Verify security setup
shell: |
echo "=== Security Status ==="
echo "UFW: $(ufw status | head -1)"
echo "Fail2ban: $(systemctl is-active fail2ban)"
echo "Auditd: $(systemctl is-active auditd)"
echo "Trivy: $(trivy --version 2>/dev/null | head -1 || echo 'not installed')"
register: security_status
changed_when: false
- name: Show security status
debug:
var: security_status.stdout_lines
handlers:
- name: restart fail2ban
service:
name: fail2ban
state: restarted
- name: restart auditd
service:
name: auditd
state: restarted

View File

@@ -0,0 +1,183 @@
# DAARION Network - K3s Installation Playbook
# Lightweight Kubernetes cluster setup
---
# =============================================================================
# INSTALL K3S SERVER (MASTERS)
# =============================================================================
- name: Install K3s Server on Masters
hosts: masters
become: yes
tasks:
- name: Check if K3s is already installed
stat:
path: /etc/rancher/k3s/k3s.yaml
register: k3s_installed
- name: Download K3s installer
get_url:
url: https://get.k3s.io
dest: /tmp/k3s-install.sh
mode: '0755'
when: not k3s_installed.stat.exists
- name: Install K3s server
shell: |
INSTALL_K3S_VERSION={{ k3s_version }} \
sh /tmp/k3s-install.sh server \
--disable traefik \
--disable servicelb \
--write-kubeconfig-mode 644 \
--tls-san {{ ansible_host }} \
--tls-san {{ inventory_hostname }} \
--node-label "datacenter={{ datacenter }}" \
--node-label "node-role={{ node_role }}" \
--cluster-cidr {{ daarion_network_cidr | default('10.42.0.0/16') }} \
--service-cidr {{ daarion_service_cidr | default('10.43.0.0/16') }}
args:
creates: /etc/rancher/k3s/k3s.yaml
register: k3s_install
- name: Wait for K3s to be ready
wait_for:
port: 6443
delay: 10
timeout: 300
- name: Wait for node to be ready
shell: |
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
kubectl wait --for=condition=Ready node/{{ inventory_hostname }} --timeout=300s
register: node_ready
retries: 10
delay: 10
until: node_ready.rc == 0
- name: Get K3s token
slurp:
src: /var/lib/rancher/k3s/server/node-token
register: k3s_token_file
- name: Save K3s token as fact
set_fact:
k3s_join_token: "{{ k3s_token_file.content | b64decode | trim }}"
- name: Fetch kubeconfig
fetch:
src: /etc/rancher/k3s/k3s.yaml
dest: "{{ playbook_dir }}/../kubeconfig/{{ inventory_hostname }}.yaml"
flat: yes
- name: Update kubeconfig with external IP
delegate_to: localhost
become: no
replace:
path: "{{ playbook_dir }}/../kubeconfig/{{ inventory_hostname }}.yaml"
regexp: '127.0.0.1'
replace: "{{ ansible_host }}"
- name: Show K3s status
shell: |
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
kubectl get nodes -o wide
register: k3s_status
changed_when: false
- name: Display K3s status
debug:
var: k3s_status.stdout_lines
# =============================================================================
# INSTALL K3S AGENT (WORKERS)
# =============================================================================
- name: Install K3s Agent on Workers
hosts: workers
become: yes
vars:
k3s_master_host: "{{ hostvars[groups['masters'][0]].ansible_host }}"
k3s_master_token: "{{ hostvars[groups['masters'][0]].k3s_join_token }}"
tasks:
- name: Check if K3s agent is already installed
stat:
path: /var/lib/rancher/k3s/agent
register: k3s_agent_installed
- name: Download K3s installer
get_url:
url: https://get.k3s.io
dest: /tmp/k3s-install.sh
mode: '0755'
when: not k3s_agent_installed.stat.exists
- name: Build node labels
set_fact:
node_labels: >-
--node-label datacenter={{ datacenter }}
--node-label node-role={{ node_role }}
{% if gpu is defined and gpu %}
--node-label gpu=true
--node-label gpu-type={{ gpu_type | default('unknown') }}
--node-label gpu-memory={{ gpu_memory | default('unknown') }}
{% endif %}
- name: Install K3s agent
shell: |
INSTALL_K3S_VERSION={{ k3s_version }} \
K3S_URL=https://{{ k3s_master_host }}:6443 \
K3S_TOKEN={{ k3s_master_token }} \
sh /tmp/k3s-install.sh agent \
{{ node_labels }}
args:
creates: /var/lib/rancher/k3s/agent
register: k3s_agent_install
- name: Wait for agent to connect
pause:
seconds: 30
when: k3s_agent_install.changed
# =============================================================================
# VERIFY CLUSTER
# =============================================================================
- name: Verify K3s Cluster
hosts: masters
become: yes
tasks:
- name: Get cluster nodes
shell: |
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
kubectl get nodes -o wide
register: cluster_nodes
changed_when: false
- name: Display cluster nodes
debug:
var: cluster_nodes.stdout_lines
- name: Get cluster info
shell: |
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
kubectl cluster-info
register: cluster_info
changed_when: false
- name: Display cluster info
debug:
var: cluster_info.stdout_lines
- name: Create daarion namespace
shell: |
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
kubectl create namespace daarion --dry-run=client -o yaml | kubectl apply -f -
changed_when: false
- name: Label GPU nodes
shell: |
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
kubectl label nodes {{ item }} nvidia.com/gpu=true --overwrite
loop: "{{ groups['gpu_nodes'] | default([]) }}"
when: groups['gpu_nodes'] is defined
ignore_errors: yes