Files
microdao-daarion/tests/test_infra_smoke.py
Apple bca81dc719 feat: Node Self-Healing, DAGI Audit, Agent Prompts, Infra Invariants
### Backend (city-service)
- Node Registry + Self-Healing API (migration 039)
- Improved get_all_nodes() with robust fallback for node_registry/node_cache
- Agent Prompts Runtime API for DAGI Router integration
- DAGI Router Audit endpoints (phantom/stale detection)
- Node Agents API (Guardian/Steward)
- Node metrics extended (CPU/GPU/RAM/Disk)

### Frontend (apps/web)
- Node Directory with improved error handling
- Node Cabinet with metrics cards
- DAGI Router Card component
- Node Metrics Card component
- useDAGIAudit hook

### Scripts
- check-invariants.py - deploy verification
- node-bootstrap.sh - node self-registration
- node-guardian-loop.py - continuous self-healing
- dagi_agent_audit.py - DAGI audit utility

### Migrations
- 034: Agent prompts seed
- 035: Agent DAGI audit
- 036: Node metrics extended
- 037: Node agents complete
- 038: Agent prompts full coverage
- 039: Node registry self-healing

### Tests
- test_infra_smoke.py
- test_agent_prompts_runtime.py
- test_dagi_router_api.py

### Documentation
- DEPLOY_CHECKLIST_2024_11_30.md
- Multiple TASK_PHASE docs
2025-11-30 13:52:01 -08:00

337 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Infrastructure Smoke Tests
Базові API тести для перевірки після деплою.
Запускаються як частина deploy pipeline або вручну.
Використання:
pytest tests/test_infra_smoke.py -v
pytest tests/test_infra_smoke.py -v --base-url http://localhost:7001
"""
import os
import pytest
import requests
from datetime import datetime, timezone, timedelta
from typing import Optional
# Configuration
BASE_URL = os.getenv("CITY_SERVICE_URL", "http://daarion-city-service:7001")
TIMEOUT = 10
# Node IDs
NODE1_ID = "node-1-hetzner-gex44"
NODE2_ID = "node-2-macbook-m4max"
def pytest_addoption(parser):
"""Add command line options"""
parser.addoption(
"--base-url",
action="store",
default=BASE_URL,
help="Base URL of city-service API"
)
@pytest.fixture
def base_url(request):
"""Get base URL from command line or environment"""
return request.config.getoption("--base-url") or BASE_URL
@pytest.fixture
def api_client(base_url):
"""Create API client session"""
session = requests.Session()
session.timeout = TIMEOUT
class Client:
def __init__(self, base_url: str, session: requests.Session):
self.base_url = base_url.rstrip("/")
self.session = session
def get(self, path: str) -> requests.Response:
return self.session.get(f"{self.base_url}{path}", timeout=TIMEOUT)
def post(self, path: str, json: dict) -> requests.Response:
return self.session.post(f"{self.base_url}{path}", json=json, timeout=TIMEOUT)
return Client(base_url, session)
# ==============================================================================
# Health Checks
# ==============================================================================
class TestHealthChecks:
"""Basic health check tests"""
def test_healthz_endpoint(self, api_client):
"""Test /healthz returns 200 and status ok"""
response = api_client.get("/healthz")
assert response.status_code == 200, f"Health check failed: {response.text}"
data = response.json()
assert data.get("status") == "ok", f"Unhealthy status: {data}"
def test_public_nodes_endpoint(self, api_client):
"""Test /public/nodes returns node list"""
response = api_client.get("/public/nodes")
assert response.status_code == 200, f"Nodes endpoint failed: {response.text}"
data = response.json()
assert "items" in data, "Response missing 'items' key"
assert "total" in data, "Response missing 'total' key"
# ==============================================================================
# Node Metrics Tests
# ==============================================================================
class TestNodeMetrics:
"""Node metrics tests"""
@pytest.mark.parametrize("node_id", [NODE1_ID, NODE2_ID])
def test_node_metrics_endpoint(self, api_client, node_id):
"""Test node metrics endpoint returns data"""
response = api_client.get(f"/internal/node/{node_id}/metrics/current")
assert response.status_code == 200, f"Node metrics failed for {node_id}: {response.text}"
data = response.json()
# Check required fields
assert "node_id" in data, "Missing node_id"
assert "agent_count_router" in data, "Missing agent_count_router"
assert "agent_count_system" in data, "Missing agent_count_system"
def test_node1_has_agents(self, api_client):
"""Test NODE1 has at least 1 agent in router"""
response = api_client.get(f"/internal/node/{NODE1_ID}/metrics/current")
if response.status_code != 200:
pytest.skip(f"NODE1 metrics not available: {response.status_code}")
data = response.json()
agent_count = data.get("agent_count_router", 0)
assert agent_count >= 1, f"NODE1 has {agent_count} agents in router, expected >= 1"
def test_node2_has_agents(self, api_client):
"""Test NODE2 has at least 1 agent in system"""
response = api_client.get(f"/internal/node/{NODE2_ID}/metrics/current")
if response.status_code != 200:
pytest.skip(f"NODE2 metrics not available: {response.status_code}")
data = response.json()
agent_count = data.get("agent_count_system", 0)
assert agent_count >= 1, f"NODE2 has {agent_count} agents in system, expected >= 1"
# ==============================================================================
# Node Agents Tests
# ==============================================================================
class TestNodeAgents:
"""Node agents (Guardian/Steward) tests"""
@pytest.mark.parametrize("node_id", [NODE1_ID, NODE2_ID])
def test_node_agents_endpoint(self, api_client, node_id):
"""Test node agents endpoint returns data"""
response = api_client.get(f"/internal/node/{node_id}/agents")
assert response.status_code == 200, f"Node agents failed for {node_id}: {response.text}"
data = response.json()
assert "node_id" in data, "Missing node_id"
assert "total" in data, "Missing total"
assert "agents" in data, "Missing agents list"
def test_node1_has_guardian(self, api_client):
"""Test NODE1 has Node Guardian"""
response = api_client.get(f"/internal/node/{NODE1_ID}/agents")
if response.status_code != 200:
pytest.skip(f"NODE1 agents not available: {response.status_code}")
data = response.json()
guardian = data.get("guardian")
assert guardian is not None, "NODE1 missing Node Guardian"
assert guardian.get("id"), "Guardian has no ID"
def test_node1_has_steward(self, api_client):
"""Test NODE1 has Node Steward"""
response = api_client.get(f"/internal/node/{NODE1_ID}/agents")
if response.status_code != 200:
pytest.skip(f"NODE1 agents not available: {response.status_code}")
data = response.json()
steward = data.get("steward")
assert steward is not None, "NODE1 missing Node Steward"
assert steward.get("id"), "Steward has no ID"
def test_node2_has_guardian(self, api_client):
"""Test NODE2 has Node Guardian"""
response = api_client.get(f"/internal/node/{NODE2_ID}/agents")
if response.status_code != 200:
pytest.skip(f"NODE2 agents not available: {response.status_code}")
data = response.json()
guardian = data.get("guardian")
assert guardian is not None, "NODE2 missing Node Guardian"
# ==============================================================================
# DAGI Router Tests
# ==============================================================================
class TestDAGIRouter:
"""DAGI Router tests"""
@pytest.mark.parametrize("node_id", [NODE1_ID, NODE2_ID])
def test_dagi_router_agents_endpoint(self, api_client, node_id):
"""Test DAGI Router agents endpoint returns data"""
response = api_client.get(f"/internal/node/{node_id}/dagi-router/agents")
# May return empty if no audit yet
if response.status_code == 404:
pytest.skip(f"DAGI Router not configured for {node_id}")
assert response.status_code == 200, f"DAGI Router failed for {node_id}: {response.text}"
data = response.json()
assert "node_id" in data, "Missing node_id"
assert "summary" in data, "Missing summary"
assert "agents" in data, "Missing agents list"
def test_node1_router_has_agents(self, api_client):
"""Test NODE1 DAGI Router has agents"""
response = api_client.get(f"/internal/node/{NODE1_ID}/dagi-router/agents")
if response.status_code != 200:
pytest.skip(f"NODE1 DAGI Router not available: {response.status_code}")
data = response.json()
summary = data.get("summary", {})
router_total = summary.get("router_total", 0)
# Warn but don't fail - router may not be configured
if router_total == 0:
pytest.skip("NODE1 DAGI Router has 0 agents (may not be configured)")
assert router_total >= 1, f"DAGI Router has {router_total} agents, expected >= 1"
# ==============================================================================
# Core Agents Tests
# ==============================================================================
class TestCoreAgents:
"""Core agents tests"""
def test_prompts_status_endpoint(self, api_client):
"""Test prompts status batch endpoint"""
agent_ids = ["agent-daarwizz", "agent-devtools", "agent-soul"]
response = api_client.post("/internal/agents/prompts/status", {"agent_ids": agent_ids})
assert response.status_code == 200, f"Prompts status failed: {response.text}"
data = response.json()
assert "status" in data, "Missing status in response"
assert isinstance(data["status"], dict), "Status should be a dict"
def test_daarwizz_runtime_prompt(self, api_client):
"""Test DAARWIZZ has runtime prompt"""
# Try both possible slugs
for agent_id in ["agent-daarwizz", "daarwizz"]:
response = api_client.get(f"/internal/agents/{agent_id}/prompts/runtime")
if response.status_code == 200:
data = response.json()
if data.get("has_prompts"):
assert data.get("prompts", {}).get("core"), "DAARWIZZ missing core prompt"
return
pytest.skip("DAARWIZZ agent not found or no prompts configured")
def test_runtime_system_prompt_endpoint(self, api_client):
"""Test runtime system prompt endpoint works"""
response = api_client.get("/internal/agents/agent-daarwizz/system-prompt")
if response.status_code == 404:
pytest.skip("DAARWIZZ agent not found")
assert response.status_code == 200, f"System prompt failed: {response.text}"
data = response.json()
assert "agent_id" in data, "Missing agent_id"
assert "system_prompt" in data, "Missing system_prompt"
assert len(data.get("system_prompt", "")) > 10, "System prompt too short"
# ==============================================================================
# Integration Tests
# ==============================================================================
class TestIntegration:
"""End-to-end integration tests"""
def test_node_to_agents_flow(self, api_client):
"""Test full flow: node → agents → prompts"""
# Get node
response = api_client.get(f"/internal/node/{NODE1_ID}/agents")
if response.status_code != 200:
pytest.skip(f"NODE1 not available: {response.status_code}")
data = response.json()
agents = data.get("agents", [])
if not agents:
pytest.skip("No agents found for NODE1")
# Get first agent's prompts
agent = agents[0]
agent_id = agent.get("id")
response = api_client.get(f"/internal/agents/{agent_id}/prompts/runtime")
# Should return successfully even if no prompts
assert response.status_code == 200, f"Agent prompts failed for {agent_id}: {response.text}"
def test_public_nodes_have_metrics(self, api_client):
"""Test public nodes endpoint includes metrics"""
response = api_client.get("/public/nodes")
assert response.status_code == 200
data = response.json()
items = data.get("items", [])
if not items:
pytest.skip("No nodes in system")
# Check first node has metrics
node = items[0]
# Should have metrics object after our changes
if "metrics" in node:
metrics = node["metrics"]
assert "cpu_cores" in metrics or "ram_total" in metrics, "Metrics object empty"
# ==============================================================================
# Run as script
# ==============================================================================
if __name__ == "__main__":
pytest.main([__file__, "-v"])