feat(P0/P1/P2): Add E2E agent prober, version pinning, prometheus fixes
This commit is contained in:
@@ -3106,3 +3106,46 @@ async def health():
|
||||
"agents_count": len(AGENT_REGISTRY),
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/debug/agent_ping")
|
||||
async def debug_agent_ping(request: dict = None):
|
||||
"""
|
||||
E2E probe endpoint - tests full agent pipeline.
|
||||
Used by agent-e2e-prober for monitoring.
|
||||
Returns success only if router responds.
|
||||
"""
|
||||
import time
|
||||
start = time.time()
|
||||
|
||||
try:
|
||||
# Test 1: Check router connectivity
|
||||
router_url = os.getenv("ROUTER_URL", "http://router:8000")
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
router_resp = await client.get(f"{router_url}/health")
|
||||
router_ok = router_resp.status_code == 200
|
||||
|
||||
# Test 2: Check memory service connectivity
|
||||
memory_url = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
memory_resp = await client.get(f"{memory_url}/health")
|
||||
memory_ok = memory_resp.status_code == 200
|
||||
|
||||
latency = time.time() - start
|
||||
|
||||
return {
|
||||
"success": router_ok and memory_ok,
|
||||
"latency_seconds": round(latency, 3),
|
||||
"checks": {
|
||||
"router": router_ok,
|
||||
"memory_service": memory_ok,
|
||||
},
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)[:100],
|
||||
"latency_seconds": round(time.time() - start, 3),
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user