feat: Node Self-Healing, DAGI Audit, Agent Prompts, Infra Invariants
### Backend (city-service) - Node Registry + Self-Healing API (migration 039) - Improved get_all_nodes() with robust fallback for node_registry/node_cache - Agent Prompts Runtime API for DAGI Router integration - DAGI Router Audit endpoints (phantom/stale detection) - Node Agents API (Guardian/Steward) - Node metrics extended (CPU/GPU/RAM/Disk) ### Frontend (apps/web) - Node Directory with improved error handling - Node Cabinet with metrics cards - DAGI Router Card component - Node Metrics Card component - useDAGIAudit hook ### Scripts - check-invariants.py - deploy verification - node-bootstrap.sh - node self-registration - node-guardian-loop.py - continuous self-healing - dagi_agent_audit.py - DAGI audit utility ### Migrations - 034: Agent prompts seed - 035: Agent DAGI audit - 036: Node metrics extended - 037: Node agents complete - 038: Agent prompts full coverage - 039: Node registry self-healing ### Tests - test_infra_smoke.py - test_agent_prompts_runtime.py - test_dagi_router_api.py ### Documentation - DEPLOY_CHECKLIST_2024_11_30.md - Multiple TASK_PHASE docs
This commit is contained in:
362
apps/web/src/hooks/useDAGIAudit.ts
Normal file
362
apps/web/src/hooks/useDAGIAudit.ts
Normal file
@@ -0,0 +1,362 @@
|
||||
/**
|
||||
* Hook для DAGI Agent Audit та Router Agents
|
||||
* Отримує дані про стан агентів в контексті DAGI Router
|
||||
*/
|
||||
|
||||
import useSWR from 'swr';
|
||||
|
||||
// Types
|
||||
export interface DAGIAuditSummary {
|
||||
node_id: string;
|
||||
timestamp: string;
|
||||
router_total: number;
|
||||
db_total: number;
|
||||
active_count: number;
|
||||
phantom_count: number;
|
||||
stale_count: number;
|
||||
triggered_by?: string;
|
||||
}
|
||||
|
||||
export interface DAGIActiveAgent {
|
||||
router_id: string;
|
||||
router_name: string;
|
||||
db_id: string;
|
||||
db_name: string;
|
||||
db_external_id?: string;
|
||||
kind?: string;
|
||||
status: string;
|
||||
}
|
||||
|
||||
export interface DAGIPhantomAgent {
|
||||
router_id: string;
|
||||
router_name: string;
|
||||
description?: string;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export interface DAGIStaleAgent {
|
||||
db_id: string;
|
||||
db_name: string;
|
||||
db_external_id?: string;
|
||||
kind?: string;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export interface DAGIAuditFull {
|
||||
summary: DAGIAuditSummary;
|
||||
active_agents: DAGIActiveAgent[];
|
||||
phantom_agents: DAGIPhantomAgent[];
|
||||
stale_agents: DAGIStaleAgent[];
|
||||
report_data?: unknown;
|
||||
}
|
||||
|
||||
export interface DAGIAuditHistory {
|
||||
node_id: string;
|
||||
history: DAGIAuditSummary[];
|
||||
}
|
||||
|
||||
// Router Agents Types (for Table)
|
||||
export interface DAGIRouterAgent {
|
||||
id: string;
|
||||
name: string;
|
||||
role?: string;
|
||||
status: 'active' | 'phantom' | 'stale' | 'error';
|
||||
node_id?: string;
|
||||
models: string[];
|
||||
gpu?: string;
|
||||
cpu?: string;
|
||||
last_seen_at?: string;
|
||||
has_cabinet: boolean;
|
||||
cabinet_slug?: string;
|
||||
description?: string;
|
||||
has_prompts?: boolean; // Чи є системні промти в БД
|
||||
}
|
||||
|
||||
export interface DAGIRouterAgentsSummary {
|
||||
active: number;
|
||||
phantom: number;
|
||||
stale: number;
|
||||
router_total: number;
|
||||
system_total: number;
|
||||
}
|
||||
|
||||
export interface DAGIRouterAgentsResponse {
|
||||
node_id: string;
|
||||
last_audit_at?: string;
|
||||
summary: DAGIRouterAgentsSummary;
|
||||
agents: DAGIRouterAgent[];
|
||||
}
|
||||
|
||||
// Node Metrics Types
|
||||
export interface NodeMetrics {
|
||||
node_id: string;
|
||||
node_name?: string;
|
||||
hostname?: string;
|
||||
status?: string;
|
||||
environment?: string;
|
||||
cpu_model?: string;
|
||||
cpu_cores: number;
|
||||
cpu_usage: number;
|
||||
gpu_model?: string;
|
||||
gpu_memory_total: number;
|
||||
gpu_memory_used: number;
|
||||
ram_total: number;
|
||||
ram_used: number;
|
||||
disk_total: number;
|
||||
disk_used: number;
|
||||
agent_count_router: number;
|
||||
agent_count_system: number;
|
||||
last_heartbeat?: string;
|
||||
}
|
||||
|
||||
// API URL
|
||||
const CITY_SERVICE_URL = process.env.NEXT_PUBLIC_CITY_SERVICE_URL || '';
|
||||
|
||||
// Fetcher
|
||||
const fetcher = async (url: string) => {
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) {
|
||||
if (res.status === 404) return null;
|
||||
throw new Error(`Failed to fetch: ${res.status}`);
|
||||
}
|
||||
return res.json();
|
||||
};
|
||||
|
||||
/**
|
||||
* Отримати останній DAGI audit summary
|
||||
*/
|
||||
export function useDAGIAuditSummary(nodeId: string | undefined) {
|
||||
const { data, error, isLoading, mutate } = useSWR<DAGIAuditSummary | null>(
|
||||
nodeId ? `${CITY_SERVICE_URL}/city/internal/node/${nodeId}/dagi-audit` : null,
|
||||
fetcher,
|
||||
{
|
||||
refreshInterval: 60000, // Оновлювати кожну хвилину
|
||||
revalidateOnFocus: false
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
summary: data,
|
||||
isLoading,
|
||||
error,
|
||||
refresh: mutate
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Отримати повний DAGI audit з деталями
|
||||
*/
|
||||
export function useDAGIAuditFull(nodeId: string | undefined) {
|
||||
const { data, error, isLoading, mutate } = useSWR<DAGIAuditFull | null>(
|
||||
nodeId ? `${CITY_SERVICE_URL}/city/internal/node/${nodeId}/dagi-audit/full` : null,
|
||||
fetcher,
|
||||
{
|
||||
refreshInterval: 60000,
|
||||
revalidateOnFocus: false
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
audit: data,
|
||||
isLoading,
|
||||
error,
|
||||
refresh: mutate
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Отримати агентів DAGI Router для таблиці
|
||||
*/
|
||||
export function useDAGIRouterAgents(nodeId: string | undefined) {
|
||||
const { data, error, isLoading, mutate } = useSWR<DAGIRouterAgentsResponse>(
|
||||
nodeId ? `${CITY_SERVICE_URL}/city/internal/node/${nodeId}/dagi-router/agents` : null,
|
||||
fetcher,
|
||||
{
|
||||
refreshInterval: 30000, // Оновлювати кожні 30 сек
|
||||
revalidateOnFocus: true
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
data,
|
||||
agents: data?.agents || [],
|
||||
summary: data?.summary || { active: 0, phantom: 0, stale: 0, router_total: 0, system_total: 0 },
|
||||
lastAuditAt: data?.last_audit_at,
|
||||
isLoading,
|
||||
error,
|
||||
refresh: mutate
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Отримати історію DAGI audits
|
||||
*/
|
||||
export function useDAGIAuditHistory(nodeId: string | undefined, limit: number = 10) {
|
||||
const { data, error, isLoading } = useSWR<DAGIAuditHistory>(
|
||||
nodeId ? `${CITY_SERVICE_URL}/city/internal/node/${nodeId}/dagi-audit/history?limit=${limit}` : null,
|
||||
fetcher
|
||||
);
|
||||
|
||||
return {
|
||||
history: data?.history || [],
|
||||
isLoading,
|
||||
error
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Отримати метрики ноди
|
||||
*/
|
||||
export function useNodeMetrics(nodeId: string | undefined) {
|
||||
const { data, error, isLoading, mutate } = useSWR<NodeMetrics>(
|
||||
nodeId ? `${CITY_SERVICE_URL}/city/internal/node/${nodeId}/metrics/current` : null,
|
||||
fetcher,
|
||||
{
|
||||
refreshInterval: 30000,
|
||||
revalidateOnFocus: true
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
metrics: data,
|
||||
isLoading,
|
||||
error,
|
||||
refresh: mutate
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Запустити DAGI audit
|
||||
*/
|
||||
export async function runDAGIAudit(nodeId: string): Promise<{
|
||||
status: string;
|
||||
report_id: string;
|
||||
summary: {
|
||||
router_total: number;
|
||||
db_total: number;
|
||||
active_count: number;
|
||||
phantom_count: number;
|
||||
stale_count: number;
|
||||
};
|
||||
message: string;
|
||||
}> {
|
||||
const res = await fetch(
|
||||
`${CITY_SERVICE_URL}/city/internal/node/${nodeId}/dagi-audit/run`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
}
|
||||
);
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new Error(err.detail || 'Failed to run audit');
|
||||
}
|
||||
|
||||
return res.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Синхронізувати phantom агентів (створити в БД)
|
||||
*/
|
||||
export async function syncPhantomAgents(
|
||||
nodeId: string,
|
||||
agentIds: string[]
|
||||
): Promise<{
|
||||
status: string;
|
||||
created_count: number;
|
||||
created_agents: Array<{ id: string; name: string; external_id: string }>;
|
||||
}> {
|
||||
const res = await fetch(
|
||||
`${CITY_SERVICE_URL}/city/internal/node/${nodeId}/dagi-router/phantom/sync`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ agent_ids: agentIds })
|
||||
}
|
||||
);
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new Error(err.detail || 'Failed to sync phantom agents');
|
||||
}
|
||||
|
||||
return res.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Позначити агентів як stale
|
||||
*/
|
||||
export async function markStaleAgents(
|
||||
nodeId: string,
|
||||
agentIds: string[]
|
||||
): Promise<{
|
||||
status: string;
|
||||
marked_count: number;
|
||||
}> {
|
||||
const res = await fetch(
|
||||
`${CITY_SERVICE_URL}/city/internal/node/${nodeId}/dagi-router/stale/mark`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ agent_ids: agentIds })
|
||||
}
|
||||
);
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new Error(err.detail || 'Failed to mark stale agents');
|
||||
}
|
||||
|
||||
return res.json();
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Node Agents API
|
||||
// =============================================================================
|
||||
|
||||
export interface NodeAgent {
|
||||
id: string;
|
||||
name: string;
|
||||
slug?: string;
|
||||
kind?: string;
|
||||
role?: string;
|
||||
status: string;
|
||||
dagi_status?: string;
|
||||
last_seen_at?: string;
|
||||
is_guardian: boolean;
|
||||
is_steward: boolean;
|
||||
}
|
||||
|
||||
export interface NodeAgentsResponse {
|
||||
node_id: string;
|
||||
total: number;
|
||||
guardian?: NodeAgent;
|
||||
steward?: NodeAgent;
|
||||
agents: NodeAgent[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Отримати агентів ноди (Guardian, Steward, runtime agents)
|
||||
*/
|
||||
export function useNodeAgents(nodeId: string | undefined) {
|
||||
const { data, error, isLoading, mutate } = useSWR<NodeAgentsResponse>(
|
||||
nodeId ? `${CITY_SERVICE_URL}/city/internal/node/${nodeId}/agents` : null,
|
||||
fetcher,
|
||||
{
|
||||
refreshInterval: 60000,
|
||||
revalidateOnFocus: false
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
data,
|
||||
guardian: data?.guardian,
|
||||
steward: data?.steward,
|
||||
agents: data?.agents || [],
|
||||
total: data?.total || 0,
|
||||
isLoading,
|
||||
error,
|
||||
refresh: mutate
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user