P3.5-P3.7: 2-layer inventory, capability routing, STT/TTS adapters, Dev Contract
NCS:
- _collect_worker_caps() fetches capability flags from node-worker /caps
- _derive_capabilities() merges served model types + worker provider flags
- installed_artifacts replaces inventory_only (disk scan with DISK_SCAN_PATHS env)
- New endpoints: /capabilities/caps, /capabilities/installed
Node Worker:
- STT_PROVIDER, TTS_PROVIDER, OCR_PROVIDER, IMAGE_PROVIDER env flags
- /caps endpoint returns capabilities + providers for NCS aggregation
- STT adapter (providers/stt_mlx_whisper.py) — remote + local mode
- TTS adapter (providers/tts_mlx_kokoro.py) — remote + local mode
- OCR handler via vision_prompted (ollama_vision with OCR prompt)
- NATS subjects: node.{id}.stt/tts/ocr/image.request
Router:
- POST /v1/capability/{stt,tts,ocr,image} — capability-based offload routing
- GET /v1/capabilities — global view with capabilities_by_node
- require_fresh_caps(ttl) preflight guard
- find_nodes_with_capability(cap) + load-based node selection
Ops:
- ops/fabric_snapshot.py — full runtime snapshot collector
- ops/fabric_preflight.sh — quick check + snapshot save + diff
- docs/fabric_contract.md — Dev Contract v0.1 (preflight-first)
- tests/test_fabric_contract.py — CI enforcement (6 tests)
Made-with: Cursor
This commit is contained in:
186
ops/fabric_preflight.sh
Executable file
186
ops/fabric_preflight.sh
Executable file
@@ -0,0 +1,186 @@
|
||||
#!/usr/bin/env bash
|
||||
# Fabric Preflight — verify all nodes before changes/deploys.
|
||||
# Saves snapshot, compares with previous, fails hard on critical issues.
|
||||
#
|
||||
# Usage:
|
||||
# bash ops/fabric_preflight.sh [NCS_URL] [NCS_URL2] [ROUTER_URL]
|
||||
# bash ops/fabric_preflight.sh # defaults: 127.0.0.1:8099, same, 127.0.0.1:9102
|
||||
set -euo pipefail
|
||||
|
||||
NODA_NCS="${1:-http://127.0.0.1:8099}"
|
||||
ROUTER_URL="${2:-http://127.0.0.1:9102}"
|
||||
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[0;33m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
pass() { echo -e " ${GREEN}PASS${NC} $1"; }
|
||||
warn() { echo -e " ${YELLOW}WARN${NC} $1"; }
|
||||
fail() { echo -e " ${RED}FAIL${NC} $1"; ERRORS=$((ERRORS+1)); }
|
||||
info() { echo -e " ${CYAN}INFO${NC} $1"; }
|
||||
|
||||
ERRORS=0
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
SNAPSHOT_DIR="${SCRIPT_DIR}/preflight_snapshots"
|
||||
mkdir -p "$SNAPSHOT_DIR"
|
||||
|
||||
# ── NCS check ─────────────────────────────────────────────────────────────────
|
||||
|
||||
check_ncs() {
|
||||
local label="$1" url="$2"
|
||||
echo "── $label ($url) ──"
|
||||
local raw
|
||||
raw=$(curl -sf "$url/capabilities" 2>/dev/null) || { fail "NCS unreachable at $url"; return; }
|
||||
|
||||
local node_id served installed swapper_status
|
||||
node_id=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('node_id','?'))" 2>/dev/null)
|
||||
served=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('served_count',0))" 2>/dev/null)
|
||||
installed=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('installed_count',0))" 2>/dev/null)
|
||||
swapper_status=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('runtimes',{}).get('swapper',{}).get('status','?'))" 2>/dev/null)
|
||||
|
||||
[ "$served" -gt 0 ] 2>/dev/null && pass "node=$node_id served=$served installed=$installed" \
|
||||
|| fail "node=$node_id served=$served (empty pool!)"
|
||||
|
||||
[ "$swapper_status" = "disabled" ] && pass "swapper=disabled" || warn "swapper=$swapper_status"
|
||||
|
||||
local caps
|
||||
caps=$(echo "$raw" | python3 -c "
|
||||
import json,sys
|
||||
c=json.load(sys.stdin).get('capabilities',{})
|
||||
parts=[f'{k}={v}' for k,v in c.items() if k!='providers']
|
||||
print(' '.join(parts) if parts else '(none — P3.5 not deployed?)')
|
||||
" 2>/dev/null)
|
||||
[ "$caps" = "(none — P3.5 not deployed?)" ] && warn "capabilities: $caps" || pass "capabilities: $caps"
|
||||
|
||||
local mem_p inflight
|
||||
mem_p=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('node_load',{}).get('mem_pressure','?'))" 2>/dev/null)
|
||||
inflight=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('node_load',{}).get('inflight',json.load(open('/dev/stdin')).get('node_load',{}).get('inflight_jobs',0)) if False else json.load(sys.stdin).get('node_load',{}).get('inflight_jobs',0))" 2>/dev/null || echo "?")
|
||||
[ "$mem_p" = "high" ] && warn "mem_pressure=$mem_p inflight=$inflight" \
|
||||
|| pass "mem_pressure=$mem_p inflight=$inflight"
|
||||
|
||||
local vision_count
|
||||
vision_count=$(echo "$raw" | python3 -c "import json,sys;print(sum(1 for m in json.load(sys.stdin).get('served_models',[]) if m.get('type')=='vision'))" 2>/dev/null)
|
||||
[ "$vision_count" -gt 0 ] && pass "vision models: $vision_count" || warn "no vision models served"
|
||||
|
||||
NCS_RAW="$raw"
|
||||
NCS_NODE_ID="$node_id"
|
||||
}
|
||||
|
||||
# ── Router check ──────────────────────────────────────────────────────────────
|
||||
|
||||
check_router() {
|
||||
local label="$1" url="$2"
|
||||
echo "── $label ($url) ──"
|
||||
local health
|
||||
health=$(curl -sf "$url/health" 2>/dev/null) || { fail "Router unreachable at $url"; return; }
|
||||
local status
|
||||
status=$(echo "$health" | python3 -c "import json,sys;print(json.load(sys.stdin).get('status','?'))" 2>/dev/null)
|
||||
[ "$status" = "ok" ] && pass "health=$status" || fail "health=$status"
|
||||
|
||||
local models_total
|
||||
models_total=$(curl -sf "$url/v1/models" 2>/dev/null | python3 -c "import json,sys;print(json.load(sys.stdin).get('total',0))" 2>/dev/null) || models_total=0
|
||||
[ "$models_total" -gt 0 ] && pass "global pool: $models_total models" || fail "global pool empty"
|
||||
|
||||
local caps_nodes
|
||||
caps_nodes=$(curl -sf "$url/v1/capabilities" 2>/dev/null | python3 -c "
|
||||
import json,sys
|
||||
d=json.load(sys.stdin)
|
||||
nodes=list(d.get('capabilities_by_node',{}).keys())
|
||||
print(f'{len(nodes)} node(s): {\" \".join(nodes)}' if nodes else '(none)')
|
||||
" 2>/dev/null)
|
||||
[ "$caps_nodes" = "(none)" ] && warn "capabilities_by_node: $caps_nodes" || pass "capabilities_by_node: $caps_nodes"
|
||||
|
||||
ROUTER_MODELS=$(curl -sf "$url/v1/models" 2>/dev/null || echo '{}')
|
||||
}
|
||||
|
||||
# ── Snapshot + diff ───────────────────────────────────────────────────────────
|
||||
|
||||
NCS_RAW="{}"
|
||||
NCS_NODE_ID="unknown"
|
||||
ROUTER_MODELS="{}"
|
||||
|
||||
save_and_diff() {
|
||||
local ts
|
||||
ts=$(date +%Y-%m-%d_%H%M%S)
|
||||
local snap_file="${SNAPSHOT_DIR}/${NCS_NODE_ID}_${ts}.json"
|
||||
|
||||
python3 -c "
|
||||
import json, glob, os, sys
|
||||
from datetime import datetime
|
||||
|
||||
ncs = json.loads('''$(echo "$NCS_RAW" | python3 -c "import sys;print(sys.stdin.read().replace(\"'\",\"\"))")''') if '''$NCS_RAW''' != '{}' else {}
|
||||
router = json.loads('''$(echo "$ROUTER_MODELS" | python3 -c "import sys;print(sys.stdin.read().replace(\"'\",\"\"))")''') if '''$ROUTER_MODELS''' != '{}' else {}
|
||||
|
||||
snapshot = {
|
||||
'timestamp': datetime.utcnow().isoformat() + 'Z',
|
||||
'node_id': ncs.get('node_id', '$NCS_NODE_ID'),
|
||||
'errors': $ERRORS,
|
||||
'passed': $ERRORS == 0,
|
||||
'served_count': ncs.get('served_count', 0),
|
||||
'installed_count': ncs.get('installed_count', 0),
|
||||
'capabilities': {k:v for k,v in ncs.get('capabilities', {}).items() if k != 'providers'},
|
||||
'providers': ncs.get('capabilities', {}).get('providers', {}),
|
||||
'node_load': ncs.get('node_load', {}),
|
||||
'router_models_total': router.get('total', 0),
|
||||
'capabilities_by_node': router.get('capabilities_by_node', {}),
|
||||
}
|
||||
|
||||
with open('$snap_file', 'w') as f:
|
||||
json.dump(snapshot, f, indent=2, ensure_ascii=False)
|
||||
|
||||
# Find previous snapshot for diff
|
||||
prefix = '${NCS_NODE_ID}'.lower() + '_'
|
||||
snaps = sorted(glob.glob(os.path.join('$SNAPSHOT_DIR', prefix + '*.json')), reverse=True)
|
||||
prev = None
|
||||
if len(snaps) >= 2:
|
||||
with open(snaps[1]) as f:
|
||||
prev = json.load(f)
|
||||
|
||||
if prev:
|
||||
diffs = []
|
||||
for key in ('served_count', 'installed_count', 'router_models_total'):
|
||||
ov, nv = prev.get(key, '?'), snapshot.get(key, '?')
|
||||
if ov != nv:
|
||||
diffs.append(f' {key}: {ov} → {nv}')
|
||||
old_caps = prev.get('capabilities', {})
|
||||
new_caps = snapshot.get('capabilities', {})
|
||||
for k in sorted(set(list(old_caps.keys()) + list(new_caps.keys()))):
|
||||
ov, nv = old_caps.get(k, '?'), new_caps.get(k, '?')
|
||||
if ov != nv:
|
||||
diffs.append(f' caps.{k}: {ov} → {nv}')
|
||||
if diffs:
|
||||
print('Changes vs previous:')
|
||||
for d in diffs:
|
||||
print(d)
|
||||
else:
|
||||
print('(no changes vs previous snapshot)')
|
||||
else:
|
||||
print('(first snapshot for this node)')
|
||||
" 2>/dev/null || echo "(snapshot diff failed)"
|
||||
|
||||
info "Snapshot: $snap_file"
|
||||
}
|
||||
|
||||
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo "╔══════════════════════════════════════╗"
|
||||
echo "║ Fabric Preflight Check ║"
|
||||
echo "╚══════════════════════════════════════╝"
|
||||
echo ""
|
||||
|
||||
check_ncs "NCS" "$NODA_NCS"
|
||||
echo ""
|
||||
check_router "Router" "$ROUTER_URL"
|
||||
echo ""
|
||||
save_and_diff
|
||||
echo ""
|
||||
|
||||
if [ $ERRORS -gt 0 ]; then
|
||||
echo -e "${RED}Preflight FAILED: $ERRORS error(s)${NC}"
|
||||
echo -e "${RED}BLOCKED: no changes allowed until all errors resolved${NC}"
|
||||
exit 1
|
||||
else
|
||||
echo -e "${GREEN}Preflight PASSED — changes allowed${NC}"
|
||||
fi
|
||||
289
ops/fabric_snapshot.py
Executable file
289
ops/fabric_snapshot.py
Executable file
@@ -0,0 +1,289 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Fabric Snapshot — collect full runtime truth from a node.
|
||||
|
||||
Queries every endpoint (NCS, node-worker, router, Ollama, docker)
|
||||
and saves a single JSON artifact for preflight/postflight comparison.
|
||||
|
||||
Usage:
|
||||
python3 ops/fabric_snapshot.py [--node-id NODA2] [--ncs URL] [--router URL] ...
|
||||
python3 ops/fabric_snapshot.py --ssh root@144.76.224.179 # remote node
|
||||
"""
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
SCRIPT_DIR = Path(__file__).parent
|
||||
SNAPSHOT_DIR = SCRIPT_DIR / "preflight_snapshots"
|
||||
|
||||
try:
|
||||
import httpx
|
||||
except ImportError:
|
||||
print("httpx not installed; pip install httpx", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
async def _get(url: str, timeout: float = 5.0) -> Dict[str, Any]:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout) as c:
|
||||
r = await c.get(url)
|
||||
if r.status_code == 200:
|
||||
return {"status": "ok", "data": r.json()}
|
||||
return {"status": f"http_{r.status_code}", "data": None}
|
||||
except Exception as e:
|
||||
return {"status": f"error:{type(e).__name__}", "data": None, "error": str(e)[:200]}
|
||||
|
||||
|
||||
def _run_cmd(cmd: List[str], timeout: int = 10) -> Dict[str, Any]:
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
||||
return {"status": "ok" if result.returncode == 0 else f"exit_{result.returncode}",
|
||||
"stdout": result.stdout[:10000], "stderr": result.stderr[:2000]}
|
||||
except FileNotFoundError:
|
||||
return {"status": "not_found"}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"status": "timeout"}
|
||||
except Exception as e:
|
||||
return {"status": f"error:{e}"}
|
||||
|
||||
|
||||
def _ssh_cmd(ssh_target: str, remote_cmd: str, timeout: int = 15) -> Dict[str, Any]:
|
||||
full = ["ssh", "-o", "StrictHostKeyChecking=accept-new",
|
||||
"-o", "ConnectTimeout=5", ssh_target, remote_cmd]
|
||||
return _run_cmd(full, timeout=timeout)
|
||||
|
||||
|
||||
async def collect_snapshot(
|
||||
node_id: str,
|
||||
ncs_url: str,
|
||||
worker_url: str,
|
||||
router_url: str,
|
||||
ollama_url: str,
|
||||
ssh_target: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
snap: Dict[str, Any] = {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"node_id": node_id,
|
||||
"collector": "fabric_snapshot.py",
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
# --- NCS ---
|
||||
ncs_caps = await _get(f"{ncs_url}/capabilities")
|
||||
ncs_caps_only = await _get(f"{ncs_url}/capabilities/caps")
|
||||
ncs_installed = await _get(f"{ncs_url}/capabilities/installed")
|
||||
|
||||
snap["ncs"] = {
|
||||
"url": ncs_url,
|
||||
"capabilities_full": ncs_caps.get("data"),
|
||||
"capabilities_flags": ncs_caps_only.get("data"),
|
||||
"installed": ncs_installed.get("data"),
|
||||
}
|
||||
if ncs_caps["status"] != "ok":
|
||||
snap["errors"].append(f"NCS /capabilities: {ncs_caps['status']}")
|
||||
|
||||
ncs_data = ncs_caps.get("data") or {}
|
||||
snap["served_models"] = ncs_data.get("served_models", [])
|
||||
snap["served_count"] = ncs_data.get("served_count", 0)
|
||||
snap["installed_artifacts"] = (ncs_installed.get("data") or {}).get("installed_artifacts", [])
|
||||
snap["installed_count"] = (ncs_installed.get("data") or {}).get("installed_count", 0)
|
||||
snap["capabilities"] = ncs_data.get("capabilities", {})
|
||||
snap["node_load"] = ncs_data.get("node_load", {})
|
||||
snap["runtimes"] = ncs_data.get("runtimes", {})
|
||||
|
||||
# --- Node Worker ---
|
||||
worker_caps = await _get(f"{worker_url}/caps")
|
||||
worker_health = await _get(f"{worker_url}/healthz")
|
||||
worker_metrics = await _get(f"{worker_url}/metrics")
|
||||
snap["worker"] = {
|
||||
"url": worker_url,
|
||||
"caps": worker_caps.get("data"),
|
||||
"health": worker_health.get("data"),
|
||||
"metrics": worker_metrics.get("data"),
|
||||
}
|
||||
if worker_health["status"] != "ok":
|
||||
snap["errors"].append(f"Worker /healthz: {worker_health['status']}")
|
||||
|
||||
# --- Router ---
|
||||
router_health = await _get(f"{router_url}/health")
|
||||
router_models = await _get(f"{router_url}/v1/models")
|
||||
router_caps = await _get(f"{router_url}/v1/capabilities")
|
||||
snap["router"] = {
|
||||
"url": router_url,
|
||||
"health": router_health.get("data"),
|
||||
"models": router_models.get("data"),
|
||||
"capabilities": router_caps.get("data"),
|
||||
}
|
||||
if router_health["status"] != "ok":
|
||||
snap["errors"].append(f"Router /health: {router_health['status']}")
|
||||
|
||||
# --- Ollama ---
|
||||
ollama_tags = await _get(f"{ollama_url}/api/tags")
|
||||
ollama_ps = await _get(f"{ollama_url}/api/ps")
|
||||
snap["ollama"] = {
|
||||
"url": ollama_url,
|
||||
"tags": ollama_tags.get("data"),
|
||||
"ps": ollama_ps.get("data"),
|
||||
}
|
||||
if ollama_tags["status"] != "ok":
|
||||
snap["errors"].append(f"Ollama /api/tags: {ollama_tags['status']}")
|
||||
|
||||
# --- Docker ---
|
||||
if ssh_target:
|
||||
docker_ps = _ssh_cmd(ssh_target, "docker ps --format '{{.Names}}\\t{{.Status}}\\t{{.Ports}}'")
|
||||
else:
|
||||
docker_ps = _run_cmd(["docker", "ps", "--format", "{{.Names}}\t{{.Status}}\t{{.Ports}}"])
|
||||
containers = []
|
||||
if docker_ps["status"] == "ok":
|
||||
for line in docker_ps.get("stdout", "").strip().split("\n"):
|
||||
parts = line.split("\t")
|
||||
if len(parts) >= 2:
|
||||
containers.append({
|
||||
"name": parts[0],
|
||||
"status": parts[1],
|
||||
"ports": parts[2] if len(parts) > 2 else "",
|
||||
})
|
||||
snap["docker"] = {"containers": containers, "container_count": len(containers)}
|
||||
|
||||
# --- Summary ---
|
||||
snap["summary"] = {
|
||||
"ncs_ok": ncs_caps["status"] == "ok",
|
||||
"worker_ok": worker_health["status"] == "ok",
|
||||
"router_ok": router_health["status"] == "ok",
|
||||
"ollama_ok": ollama_tags["status"] == "ok",
|
||||
"served_count": snap["served_count"],
|
||||
"installed_count": snap["installed_count"],
|
||||
"capabilities": {k: v for k, v in snap["capabilities"].items() if k != "providers"},
|
||||
"container_count": len(containers),
|
||||
"error_count": len(snap["errors"]),
|
||||
"passed": len(snap["errors"]) == 0,
|
||||
}
|
||||
|
||||
return snap
|
||||
|
||||
|
||||
def save_snapshot(snap: Dict[str, Any], out_dir: Path = SNAPSHOT_DIR) -> Path:
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
node = snap.get("node_id", "unknown").lower()
|
||||
ts = datetime.now().strftime("%Y-%m-%d_%H%M%S")
|
||||
path = out_dir / f"{node}_{ts}.json"
|
||||
with open(path, "w") as f:
|
||||
json.dump(snap, f, indent=2, ensure_ascii=False, default=str)
|
||||
return path
|
||||
|
||||
|
||||
def find_previous_snapshot(node_id: str, out_dir: Path = SNAPSHOT_DIR) -> Optional[Dict]:
|
||||
prefix = node_id.lower() + "_"
|
||||
files = sorted(
|
||||
[f for f in out_dir.glob(f"{prefix}*.json")],
|
||||
key=lambda p: p.stat().st_mtime,
|
||||
reverse=True,
|
||||
)
|
||||
if len(files) < 2:
|
||||
return None
|
||||
with open(files[1]) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def print_diff(current: Dict, previous: Optional[Dict]):
|
||||
if not previous:
|
||||
print(" (no previous snapshot to compare)")
|
||||
return
|
||||
diffs = []
|
||||
for key in ("served_count", "installed_count"):
|
||||
old = previous.get("summary", {}).get(key, previous.get(key, "?"))
|
||||
new = current.get("summary", {}).get(key, current.get(key, "?"))
|
||||
if old != new:
|
||||
diffs.append(f" {key}: {old} → {new}")
|
||||
|
||||
old_caps = previous.get("summary", {}).get("capabilities", previous.get("capabilities", {}))
|
||||
new_caps = current.get("summary", {}).get("capabilities", current.get("capabilities", {}))
|
||||
all_keys = set(list(old_caps.keys()) + list(new_caps.keys()))
|
||||
for k in sorted(all_keys):
|
||||
if k == "providers":
|
||||
continue
|
||||
ov, nv = old_caps.get(k, "?"), new_caps.get(k, "?")
|
||||
if ov != nv:
|
||||
diffs.append(f" caps.{k}: {ov} → {nv}")
|
||||
|
||||
old_ct = previous.get("docker", {}).get("container_count", "?")
|
||||
new_ct = current.get("docker", {}).get("container_count", "?")
|
||||
if old_ct != new_ct:
|
||||
diffs.append(f" containers: {old_ct} → {new_ct}")
|
||||
|
||||
if diffs:
|
||||
print(" Changes vs previous snapshot:")
|
||||
for d in diffs:
|
||||
print(d)
|
||||
else:
|
||||
print(" (no changes vs previous snapshot)")
|
||||
|
||||
|
||||
def print_summary(snap: Dict):
|
||||
s = snap.get("summary", {})
|
||||
print(f" node_id: {snap.get('node_id')}")
|
||||
print(f" served: {s.get('served_count')}")
|
||||
print(f" installed: {s.get('installed_count')}")
|
||||
print(f" containers: {s.get('container_count')}")
|
||||
caps = s.get("capabilities", {})
|
||||
cap_str = " ".join(f"{k}={'Y' if v else 'N'}" for k, v in caps.items())
|
||||
print(f" capabilities: {cap_str}")
|
||||
print(f" errors: {s.get('error_count')}")
|
||||
if snap.get("errors"):
|
||||
for e in snap["errors"]:
|
||||
print(f" ✗ {e}")
|
||||
passed = s.get("passed", False)
|
||||
status = "\033[32mPASSED\033[0m" if passed else "\033[31mFAILED\033[0m"
|
||||
print(f" result: {status}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Fabric Snapshot Collector")
|
||||
parser.add_argument("--node-id", default="NODA2")
|
||||
parser.add_argument("--ncs", default="http://127.0.0.1:8099")
|
||||
parser.add_argument("--worker", default="http://127.0.0.1:8109")
|
||||
parser.add_argument("--router", default="http://127.0.0.1:9102")
|
||||
parser.add_argument("--ollama", default="http://127.0.0.1:11434")
|
||||
parser.add_argument("--ssh", default="", help="SSH target for remote docker ps (e.g. root@1.2.3.4)")
|
||||
parser.add_argument("--out-dir", default=str(SNAPSHOT_DIR))
|
||||
parser.add_argument("--json-only", action="store_true", help="Print JSON to stdout, no save")
|
||||
args = parser.parse_args()
|
||||
|
||||
snap = asyncio.run(collect_snapshot(
|
||||
node_id=args.node_id,
|
||||
ncs_url=args.ncs,
|
||||
worker_url=args.worker,
|
||||
router_url=args.router,
|
||||
ollama_url=args.ollama,
|
||||
ssh_target=args.ssh,
|
||||
))
|
||||
|
||||
if args.json_only:
|
||||
print(json.dumps(snap, indent=2, ensure_ascii=False, default=str))
|
||||
return
|
||||
|
||||
out_dir = Path(args.out_dir)
|
||||
path = save_snapshot(snap, out_dir)
|
||||
print(f"╔══════════════════════════════════════╗")
|
||||
print(f"║ Fabric Snapshot: {args.node_id:<18s} ║")
|
||||
print(f"╚══════════════════════════════════════╝")
|
||||
print()
|
||||
print_summary(snap)
|
||||
print()
|
||||
prev = find_previous_snapshot(args.node_id, out_dir)
|
||||
print_diff(snap, prev)
|
||||
print()
|
||||
print(f"Saved: {path}")
|
||||
|
||||
if not snap.get("summary", {}).get("passed"):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
ops/preflight_snapshots/.gitkeep
Normal file
0
ops/preflight_snapshots/.gitkeep
Normal file
Reference in New Issue
Block a user