feat(P0/P1/P2): Add E2E agent prober, version pinning, prometheus fixes

This commit is contained in:
Apple
2026-01-28 07:06:07 -08:00
parent 9dcc3563f6
commit a3923cd96f
5 changed files with 303 additions and 0 deletions

View File

@@ -3106,3 +3106,46 @@ async def health():
"agents_count": len(AGENT_REGISTRY),
"timestamp": datetime.utcnow().isoformat(),
}
@router.post("/debug/agent_ping")
async def debug_agent_ping(request: dict = None):
"""
E2E probe endpoint - tests full agent pipeline.
Used by agent-e2e-prober for monitoring.
Returns success only if router responds.
"""
import time
start = time.time()
try:
# Test 1: Check router connectivity
router_url = os.getenv("ROUTER_URL", "http://router:8000")
async with httpx.AsyncClient(timeout=10.0) as client:
router_resp = await client.get(f"{router_url}/health")
router_ok = router_resp.status_code == 200
# Test 2: Check memory service connectivity
memory_url = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
async with httpx.AsyncClient(timeout=10.0) as client:
memory_resp = await client.get(f"{memory_url}/health")
memory_ok = memory_resp.status_code == 200
latency = time.time() - start
return {
"success": router_ok and memory_ok,
"latency_seconds": round(latency, 3),
"checks": {
"router": router_ok,
"memory_service": memory_ok,
},
"timestamp": datetime.utcnow().isoformat(),
}
except Exception as e:
return {
"success": False,
"error": str(e)[:100],
"latency_seconds": round(time.time() - start, 3),
"timestamp": datetime.utcnow().isoformat(),
}