feat: Node Self-Healing, DAGI Audit, Agent Prompts, Infra Invariants
### Backend (city-service) - Node Registry + Self-Healing API (migration 039) - Improved get_all_nodes() with robust fallback for node_registry/node_cache - Agent Prompts Runtime API for DAGI Router integration - DAGI Router Audit endpoints (phantom/stale detection) - Node Agents API (Guardian/Steward) - Node metrics extended (CPU/GPU/RAM/Disk) ### Frontend (apps/web) - Node Directory with improved error handling - Node Cabinet with metrics cards - DAGI Router Card component - Node Metrics Card component - useDAGIAudit hook ### Scripts - check-invariants.py - deploy verification - node-bootstrap.sh - node self-registration - node-guardian-loop.py - continuous self-healing - dagi_agent_audit.py - DAGI audit utility ### Migrations - 034: Agent prompts seed - 035: Agent DAGI audit - 036: Node metrics extended - 037: Node agents complete - 038: Agent prompts full coverage - 039: Node registry self-healing ### Tests - test_infra_smoke.py - test_agent_prompts_runtime.py - test_dagi_router_api.py ### Documentation - DEPLOY_CHECKLIST_2024_11_30.md - Multiple TASK_PHASE docs
This commit is contained in:
280
tests/test_dagi_router_api.py
Normal file
280
tests/test_dagi_router_api.py
Normal file
@@ -0,0 +1,280 @@
|
||||
"""
|
||||
DAGI Router API Tests
|
||||
|
||||
Тести для endpoints:
|
||||
- GET /internal/node/{node_id}/dagi-router/agents
|
||||
- GET /internal/node/{node_id}/metrics/current
|
||||
- POST /internal/node/{node_id}/dagi-audit/run
|
||||
- POST /internal/node/{node_id}/dagi-router/phantom/sync
|
||||
- POST /internal/node/{node_id}/dagi-router/stale/mark
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import httpx
|
||||
from typing import Any, Dict
|
||||
|
||||
# Test configuration
|
||||
CITY_SERVICE_URL = "http://localhost:7001"
|
||||
NODE1_ID = "node-1-hetzner-gex44"
|
||||
NODE2_ID = "node-2-macbook-m4max"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Fixtures
|
||||
# ============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
"""HTTP client для тестування"""
|
||||
return httpx.Client(base_url=CITY_SERVICE_URL, timeout=30.0)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def node_ids():
|
||||
"""Node IDs для тестування"""
|
||||
return [NODE1_ID, NODE2_ID]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# DAGI Router Agents Tests
|
||||
# ============================================================================
|
||||
|
||||
class TestDAGIRouterAgents:
|
||||
"""Тести для GET /internal/node/{node_id}/dagi-router/agents"""
|
||||
|
||||
def test_get_agents_returns_valid_response(self, client):
|
||||
"""Endpoint повертає валідну структуру"""
|
||||
response = client.get(f"/city/internal/node/{NODE1_ID}/dagi-router/agents")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Перевірка структури
|
||||
assert "node_id" in data
|
||||
assert "summary" in data
|
||||
assert "agents" in data
|
||||
|
||||
# Перевірка summary
|
||||
summary = data["summary"]
|
||||
assert "active" in summary
|
||||
assert "phantom" in summary
|
||||
assert "stale" in summary
|
||||
assert "router_total" in summary
|
||||
assert "system_total" in summary
|
||||
|
||||
# Types
|
||||
assert isinstance(summary["active"], int)
|
||||
assert isinstance(summary["phantom"], int)
|
||||
assert isinstance(data["agents"], list)
|
||||
|
||||
def test_get_agents_for_unknown_node(self, client):
|
||||
"""Endpoint повертає пустий response для невідомої ноди"""
|
||||
response = client.get("/city/internal/node/unknown-node-id/dagi-router/agents")
|
||||
|
||||
# Має повернути 200 з пустим списком, не 404
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
assert data["agents"] == []
|
||||
assert data["summary"]["active"] == 0
|
||||
|
||||
def test_agents_have_required_fields(self, client):
|
||||
"""Агенти мають всі необхідні поля"""
|
||||
response = client.get(f"/city/internal/node/{NODE1_ID}/dagi-router/agents")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
if data["agents"]:
|
||||
agent = data["agents"][0]
|
||||
|
||||
# Required fields
|
||||
assert "id" in agent
|
||||
assert "name" in agent
|
||||
assert "status" in agent
|
||||
|
||||
# Status must be valid
|
||||
assert agent["status"] in ["active", "phantom", "stale", "error"]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Node Metrics Tests
|
||||
# ============================================================================
|
||||
|
||||
class TestNodeMetrics:
|
||||
"""Тести для GET /internal/node/{node_id}/metrics/current"""
|
||||
|
||||
def test_get_metrics_returns_valid_response(self, client):
|
||||
"""Endpoint повертає валідну структуру"""
|
||||
response = client.get(f"/city/internal/node/{NODE1_ID}/metrics/current")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Required fields
|
||||
assert "node_id" in data
|
||||
assert data["node_id"] == NODE1_ID
|
||||
|
||||
# Metric fields
|
||||
assert "cpu_cores" in data
|
||||
assert "cpu_usage" in data
|
||||
assert "gpu_model" in data
|
||||
assert "gpu_memory_total" in data
|
||||
assert "gpu_memory_used" in data
|
||||
assert "ram_total" in data
|
||||
assert "ram_used" in data
|
||||
assert "disk_total" in data
|
||||
assert "disk_used" in data
|
||||
assert "agent_count_router" in data
|
||||
assert "agent_count_system" in data
|
||||
|
||||
def test_get_metrics_for_unknown_node(self, client):
|
||||
"""Endpoint повертає minimal response для невідомої ноди"""
|
||||
response = client.get("/city/internal/node/unknown-node-id/metrics/current")
|
||||
|
||||
# Має повернути 200 з мінімальним response
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
assert data["node_id"] == "unknown-node-id"
|
||||
|
||||
def test_metrics_have_numeric_values(self, client):
|
||||
"""Метрики мають числові значення"""
|
||||
response = client.get(f"/city/internal/node/{NODE1_ID}/metrics/current")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# All numeric fields should be numbers
|
||||
numeric_fields = [
|
||||
"cpu_cores", "cpu_usage",
|
||||
"gpu_memory_total", "gpu_memory_used",
|
||||
"ram_total", "ram_used",
|
||||
"disk_total", "disk_used",
|
||||
"agent_count_router", "agent_count_system"
|
||||
]
|
||||
|
||||
for field in numeric_fields:
|
||||
assert isinstance(data[field], (int, float)), f"{field} should be numeric"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# DAGI Audit Tests
|
||||
# ============================================================================
|
||||
|
||||
class TestDAGIAudit:
|
||||
"""Тести для POST /internal/node/{node_id}/dagi-audit/run"""
|
||||
|
||||
def test_run_audit_returns_valid_response(self, client):
|
||||
"""POST audit повертає валідну структуру"""
|
||||
response = client.post(f"/city/internal/node/{NODE1_ID}/dagi-audit/run")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
assert "status" in data
|
||||
assert data["status"] == "completed"
|
||||
assert "summary" in data
|
||||
assert "message" in data
|
||||
|
||||
# Summary fields
|
||||
summary = data["summary"]
|
||||
assert "router_total" in summary
|
||||
assert "db_total" in summary
|
||||
assert "active_count" in summary
|
||||
assert "phantom_count" in summary
|
||||
assert "stale_count" in summary
|
||||
|
||||
def test_get_audit_summary(self, client):
|
||||
"""GET audit summary повертає дані"""
|
||||
response = client.get(f"/city/internal/node/{NODE1_ID}/dagi-audit")
|
||||
|
||||
# Може бути 200 з даними або null
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
if data:
|
||||
assert "node_id" in data
|
||||
assert "timestamp" in data
|
||||
assert "active_count" in data
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Phantom/Stale Sync Tests
|
||||
# ============================================================================
|
||||
|
||||
class TestPhantomStaleSync:
|
||||
"""Тести для phantom/stale sync endpoints"""
|
||||
|
||||
def test_phantom_sync_empty_list(self, client):
|
||||
"""Sync з пустим списком не падає"""
|
||||
response = client.post(
|
||||
f"/city/internal/node/{NODE1_ID}/dagi-router/phantom/sync",
|
||||
json={"agent_ids": []}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
assert data["status"] == "completed"
|
||||
assert data["created_count"] == 0
|
||||
|
||||
def test_stale_mark_empty_list(self, client):
|
||||
"""Mark stale з пустим списком не падає"""
|
||||
response = client.post(
|
||||
f"/city/internal/node/{NODE1_ID}/dagi-router/stale/mark",
|
||||
json={"agent_ids": []}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
assert data["status"] == "completed"
|
||||
assert data["marked_count"] == 0
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Integration Tests
|
||||
# ============================================================================
|
||||
|
||||
class TestIntegration:
|
||||
"""Інтеграційні тести"""
|
||||
|
||||
def test_full_audit_flow(self, client):
|
||||
"""Повний цикл: audit → get agents → get metrics"""
|
||||
# 1. Run audit
|
||||
audit_response = client.post(f"/city/internal/node/{NODE1_ID}/dagi-audit/run")
|
||||
assert audit_response.status_code == 200
|
||||
|
||||
# 2. Get agents
|
||||
agents_response = client.get(f"/city/internal/node/{NODE1_ID}/dagi-router/agents")
|
||||
assert agents_response.status_code == 200
|
||||
agents_data = agents_response.json()
|
||||
|
||||
# 3. Get metrics
|
||||
metrics_response = client.get(f"/city/internal/node/{NODE1_ID}/metrics/current")
|
||||
assert metrics_response.status_code == 200
|
||||
|
||||
# 4. Verify consistency
|
||||
audit_data = audit_response.json()
|
||||
|
||||
# Agent counts should match
|
||||
assert agents_data["summary"]["active"] + agents_data["summary"]["phantom"] + agents_data["summary"]["stale"] >= 0
|
||||
|
||||
def test_both_nodes_accessible(self, client, node_ids):
|
||||
"""Обидві ноди доступні через API"""
|
||||
for node_id in node_ids:
|
||||
response = client.get(f"/city/internal/node/{node_id}/metrics/current")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["node_id"] == node_id
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Run tests
|
||||
# ============================================================================
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "--tb=short"])
|
||||
|
||||
Reference in New Issue
Block a user