microdao-daarion/tests/test_infra_smoke.py

"""
Infrastructure Smoke Tests

Базові API тести для перевірки після деплою.
Запускаються як частина deploy pipeline або вручну.

Використання:
    pytest tests/test_infra_smoke.py -v
    pytest tests/test_infra_smoke.py -v --base-url http://localhost:7001
"""

import os
import pytest
import requests
from datetime import datetime, timezone, timedelta
from typing import Optional

# Configuration
BASE_URL = os.getenv("CITY_SERVICE_URL", "http://daarion-city-service:7001")
TIMEOUT = 10

# Node IDs
NODE1_ID = "node-1-hetzner-gex44"
NODE2_ID = "node-2-macbook-m4max"


def pytest_addoption(parser):
    """Add command line options"""
    parser.addoption(
        "--base-url",
        action="store",
        default=BASE_URL,
        help="Base URL of city-service API"
    )


@pytest.fixture
def base_url(request):
    """Get base URL from command line or environment"""
    return request.config.getoption("--base-url") or BASE_URL


@pytest.fixture
def api_client(base_url):
    """Create API client session"""
    session = requests.Session()
    session.timeout = TIMEOUT

    class Client:
        def __init__(self, base_url: str, session: requests.Session):
            self.base_url = base_url.rstrip("/")
            self.session = session

        def get(self, path: str) -> requests.Response:
            return self.session.get(f"{self.base_url}{path}", timeout=TIMEOUT)

        def post(self, path: str, json: dict) -> requests.Response:
            return self.session.post(f"{self.base_url}{path}", json=json, timeout=TIMEOUT)

    return Client(base_url, session)


# ==============================================================================
# Health Checks
# ==============================================================================

class TestHealthChecks:
    """Basic health check tests"""

    def test_healthz_endpoint(self, api_client):
        """Test /healthz returns 200 and status ok"""
        response = api_client.get("/healthz")

        assert response.status_code == 200, f"Health check failed: {response.text}"
        data = response.json()
        assert data.get("status") == "ok", f"Unhealthy status: {data}"

    def test_public_nodes_endpoint(self, api_client):
        """Test /public/nodes returns node list"""
        response = api_client.get("/public/nodes")

        assert response.status_code == 200, f"Nodes endpoint failed: {response.text}"
        data = response.json()
        assert "items" in data, "Response missing 'items' key"
        assert "total" in data, "Response missing 'total' key"


# ==============================================================================
# Node Metrics Tests
# ==============================================================================

class TestNodeMetrics:
    """Node metrics tests"""

    @pytest.mark.parametrize("node_id", [NODE1_ID, NODE2_ID])
    def test_node_metrics_endpoint(self, api_client, node_id):
        """Test node metrics endpoint returns data"""
        response = api_client.get(f"/internal/node/{node_id}/metrics/current")

        assert response.status_code == 200, f"Node metrics failed for {node_id}: {response.text}"
        data = response.json()

        # Check required fields
        assert "node_id" in data, "Missing node_id"
        assert "agent_count_router" in data, "Missing agent_count_router"
        assert "agent_count_system" in data, "Missing agent_count_system"

    def test_node1_has_agents(self, api_client):
        """Test NODE1 has at least 1 agent in router"""
        response = api_client.get(f"/internal/node/{NODE1_ID}/metrics/current")

        if response.status_code != 200:
            pytest.skip(f"NODE1 metrics not available: {response.status_code}")

        data = response.json()
        agent_count = data.get("agent_count_router", 0)

        assert agent_count >= 1, f"NODE1 has {agent_count} agents in router, expected >= 1"

    def test_node2_has_agents(self, api_client):
        """Test NODE2 has at least 1 agent in system"""
        response = api_client.get(f"/internal/node/{NODE2_ID}/metrics/current")

        if response.status_code != 200:
            pytest.skip(f"NODE2 metrics not available: {response.status_code}")

        data = response.json()
        agent_count = data.get("agent_count_system", 0)

        assert agent_count >= 1, f"NODE2 has {agent_count} agents in system, expected >= 1"


# ==============================================================================
# Node Agents Tests
# ==============================================================================

class TestNodeAgents:
    """Node agents (Guardian/Steward) tests"""

    @pytest.mark.parametrize("node_id", [NODE1_ID, NODE2_ID])
    def test_node_agents_endpoint(self, api_client, node_id):
        """Test node agents endpoint returns data"""
        response = api_client.get(f"/internal/node/{node_id}/agents")

        assert response.status_code == 200, f"Node agents failed for {node_id}: {response.text}"
        data = response.json()

        assert "node_id" in data, "Missing node_id"
        assert "total" in data, "Missing total"
        assert "agents" in data, "Missing agents list"

    def test_node1_has_guardian(self, api_client):
        """Test NODE1 has Node Guardian"""
        response = api_client.get(f"/internal/node/{NODE1_ID}/agents")

        if response.status_code != 200:
            pytest.skip(f"NODE1 agents not available: {response.status_code}")

        data = response.json()
        guardian = data.get("guardian")

        assert guardian is not None, "NODE1 missing Node Guardian"
        assert guardian.get("id"), "Guardian has no ID"

    def test_node1_has_steward(self, api_client):
        """Test NODE1 has Node Steward"""
        response = api_client.get(f"/internal/node/{NODE1_ID}/agents")

        if response.status_code != 200:
            pytest.skip(f"NODE1 agents not available: {response.status_code}")

        data = response.json()
        steward = data.get("steward")

        assert steward is not None, "NODE1 missing Node Steward"
        assert steward.get("id"), "Steward has no ID"

    def test_node2_has_guardian(self, api_client):
        """Test NODE2 has Node Guardian"""
        response = api_client.get(f"/internal/node/{NODE2_ID}/agents")

        if response.status_code != 200:
            pytest.skip(f"NODE2 agents not available: {response.status_code}")

        data = response.json()
        guardian = data.get("guardian")

        assert guardian is not None, "NODE2 missing Node Guardian"


# ==============================================================================
# DAGI Router Tests
# ==============================================================================

class TestDAGIRouter:
    """DAGI Router tests"""

    @pytest.mark.parametrize("node_id", [NODE1_ID, NODE2_ID])
    def test_dagi_router_agents_endpoint(self, api_client, node_id):
        """Test DAGI Router agents endpoint returns data"""
        response = api_client.get(f"/internal/node/{node_id}/dagi-router/agents")

        # May return empty if no audit yet
        if response.status_code == 404:
            pytest.skip(f"DAGI Router not configured for {node_id}")

        assert response.status_code == 200, f"DAGI Router failed for {node_id}: {response.text}"
        data = response.json()

        assert "node_id" in data, "Missing node_id"
        assert "summary" in data, "Missing summary"
        assert "agents" in data, "Missing agents list"

    def test_node1_router_has_agents(self, api_client):
        """Test NODE1 DAGI Router has agents"""
        response = api_client.get(f"/internal/node/{NODE1_ID}/dagi-router/agents")

        if response.status_code != 200:
            pytest.skip(f"NODE1 DAGI Router not available: {response.status_code}")

        data = response.json()
        summary = data.get("summary", {})
        router_total = summary.get("router_total", 0)

        # Warn but don't fail - router may not be configured
        if router_total == 0:
            pytest.skip("NODE1 DAGI Router has 0 agents (may not be configured)")

        assert router_total >= 1, f"DAGI Router has {router_total} agents, expected >= 1"


# ==============================================================================
# Core Agents Tests
# ==============================================================================

class TestCoreAgents:
    """Core agents tests"""

    def test_prompts_status_endpoint(self, api_client):
        """Test prompts status batch endpoint"""
        agent_ids = ["agent-daarwizz", "agent-devtools", "agent-soul"]

        response = api_client.post("/internal/agents/prompts/status", {"agent_ids": agent_ids})

        assert response.status_code == 200, f"Prompts status failed: {response.text}"
        data = response.json()

        assert "status" in data, "Missing status in response"
        assert isinstance(data["status"], dict), "Status should be a dict"

    def test_daarwizz_runtime_prompt(self, api_client):
        """Test DAARWIZZ has runtime prompt"""
        # Try both possible slugs
        for agent_id in ["agent-daarwizz", "daarwizz"]:
            response = api_client.get(f"/internal/agents/{agent_id}/prompts/runtime")

            if response.status_code == 200:
                data = response.json()
                if data.get("has_prompts"):
                    assert data.get("prompts", {}).get("core"), "DAARWIZZ missing core prompt"
                    return

        pytest.skip("DAARWIZZ agent not found or no prompts configured")

    def test_runtime_system_prompt_endpoint(self, api_client):
        """Test runtime system prompt endpoint works"""
        response = api_client.get("/internal/agents/agent-daarwizz/system-prompt")

        if response.status_code == 404:
            pytest.skip("DAARWIZZ agent not found")

        assert response.status_code == 200, f"System prompt failed: {response.text}"
        data = response.json()

        assert "agent_id" in data, "Missing agent_id"
        assert "system_prompt" in data, "Missing system_prompt"
        assert len(data.get("system_prompt", "")) > 10, "System prompt too short"


# ==============================================================================
# Integration Tests
# ==============================================================================

class TestIntegration:
    """End-to-end integration tests"""

    def test_node_to_agents_flow(self, api_client):
        """Test full flow: node → agents → prompts"""
        # Get node
        response = api_client.get(f"/internal/node/{NODE1_ID}/agents")

        if response.status_code != 200:
            pytest.skip(f"NODE1 not available: {response.status_code}")

        data = response.json()
        agents = data.get("agents", [])

        if not agents:
            pytest.skip("No agents found for NODE1")

        # Get first agent's prompts
        agent = agents[0]
        agent_id = agent.get("id")

        response = api_client.get(f"/internal/agents/{agent_id}/prompts/runtime")

        # Should return successfully even if no prompts
        assert response.status_code == 200, f"Agent prompts failed for {agent_id}: {response.text}"

    def test_public_nodes_have_metrics(self, api_client):
        """Test public nodes endpoint includes metrics"""
        response = api_client.get("/public/nodes")

        assert response.status_code == 200
        data = response.json()

        items = data.get("items", [])
        if not items:
            pytest.skip("No nodes in system")

        # Check first node has metrics
        node = items[0]

        # Should have metrics object after our changes
        if "metrics" in node:
            metrics = node["metrics"]
            assert "cpu_cores" in metrics or "ram_total" in metrics, "Metrics object empty"


# ==============================================================================
# Run as script
# ==============================================================================

if __name__ == "__main__":
    pytest.main([__file__, "-v"])