Sync NODE1 runtime config for Sofiia monitor + Clan canary fixes
This commit is contained in:
35
ops/canary_gateway_delivery_priority.sh
Normal file
35
ops/canary_gateway_delivery_priority.sh
Normal file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
TARGET="/opt/microdao-daarion/gateway-bot/http_api.py"
|
||||
|
||||
python3 - <<'PY'
|
||||
from pathlib import Path
|
||||
import re
|
||||
p = Path('/opt/microdao-daarion/gateway-bot/http_api.py')
|
||||
text = p.read_text(encoding='utf-8')
|
||||
|
||||
anchors = {
|
||||
'file_base64': r'\n\s*if file_base64:\n',
|
||||
'image_base64': r'\n\s*elif image_base64:\n',
|
||||
'text_fallback': r'\n\s*else:\n\s*# Send text response only\n',
|
||||
}
|
||||
|
||||
pos = {}
|
||||
for k, pat in anchors.items():
|
||||
m = re.search(pat, text)
|
||||
if not m:
|
||||
raise SystemExit(f"[FAIL] anchor not found: {k}")
|
||||
pos[k] = m.start()
|
||||
|
||||
expected = ['file_base64','image_base64','text_fallback']
|
||||
for a, b in zip(expected, expected[1:]):
|
||||
if not (pos[a] < pos[b]):
|
||||
raise SystemExit(f"[FAIL] priority order broken: {a} should be before {b}")
|
||||
|
||||
print('[OK] gateway delivery priority order is correct')
|
||||
for k in expected:
|
||||
print(f' - {k}: {pos[k]}')
|
||||
PY
|
||||
|
||||
echo "[OK] gateway delivery priority canary passed"
|
||||
26
ops/canary_router_contract.sh
Normal file
26
ops/canary_router_contract.sh
Normal file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROUTER_URL="http://127.0.0.1:9102"
|
||||
|
||||
curl -fsS "$ROUTER_URL/health" >/dev/null
|
||||
|
||||
echo "[INFO] Calling /v1/agents/devtools/infer for contract check"
|
||||
resp=$(curl -fsS -X POST "$ROUTER_URL/v1/agents/devtools/infer" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"prompt":"Reply with: ok","max_tokens":32,"temperature":0.1}')
|
||||
|
||||
RESP="$resp" python3 - <<'PY'
|
||||
import json, os
|
||||
obj = json.loads(os.environ['RESP'])
|
||||
required = [
|
||||
'response', 'model', 'backend', 'tokens_used',
|
||||
'image_base64', 'file_base64', 'file_name', 'file_mime'
|
||||
]
|
||||
missing = [k for k in required if k not in obj]
|
||||
if missing:
|
||||
raise SystemExit(f"Missing keys: {missing}; got keys={sorted(obj.keys())}")
|
||||
print('[OK] Router infer contract keys present')
|
||||
PY
|
||||
|
||||
echo "[OK] router contract canary passed"
|
||||
129
ops/monitor_notify_sofiia.sh
Normal file
129
ops/monitor_notify_sofiia.sh
Normal file
@@ -0,0 +1,129 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
STATUS_JSON="${1:-/opt/microdao-daarion/ops/status/canary_all.latest.json}"
|
||||
ROOT="/opt/microdao-daarion"
|
||||
ROUTER_URL="${ROUTER_URL:-http://127.0.0.1:9102}"
|
||||
REPORT_ENABLED="${SOFIIA_REPORTS_ENABLED:-true}"
|
||||
REPORT_MODE="${SOFIIA_REPORT_MODE:-fail_only}" # fail_only | always
|
||||
REPORT_TIMEOUT="${SOFIIA_REPORT_TIMEOUT:-180}"
|
||||
REPORT_CHAT_ID="${SOFIIA_REPORT_CHAT_ID:-ops-monitor-sofiia}"
|
||||
REPORT_USER_ID="${SOFIIA_REPORT_USER_ID:-ops-monitor-agent}"
|
||||
REPORT_USERNAME="${SOFIIA_REPORT_USERNAME:-monitor-agent}"
|
||||
REPORT_TELEGRAM_CHAT_ID="${SOFIIA_REPORT_TELEGRAM_CHAT_ID:-}"
|
||||
SOFIIA_BOT_TOKEN="${SOFIIA_TELEGRAM_BOT_TOKEN:-${TELEGRAM_BOT_TOKEN:-}}"
|
||||
|
||||
if [[ "${REPORT_ENABLED,,}" != "true" ]]; then
|
||||
echo "[INFO] sofiia notify disabled"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ ! -f "$STATUS_JSON" ]]; then
|
||||
echo "[WARN] status json not found: $STATUS_JSON"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
python3 - "$STATUS_JSON" "$ROOT" "$ROUTER_URL" "$REPORT_MODE" "$REPORT_TIMEOUT" "$REPORT_CHAT_ID" "$REPORT_USER_ID" "$REPORT_USERNAME" "$REPORT_TELEGRAM_CHAT_ID" "$SOFIIA_BOT_TOKEN" <<'PY'
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from urllib import request as urlreq
|
||||
from urllib.error import URLError, HTTPError
|
||||
|
||||
status_json = Path(sys.argv[1])
|
||||
root = Path(sys.argv[2])
|
||||
router_url = sys.argv[3].rstrip('/')
|
||||
report_mode = sys.argv[4]
|
||||
timeout_s = int(sys.argv[5])
|
||||
chat_id = sys.argv[6]
|
||||
user_id = sys.argv[7]
|
||||
username = sys.argv[8]
|
||||
tg_chat_id = sys.argv[9].strip()
|
||||
tg_token = sys.argv[10].strip()
|
||||
|
||||
payload = json.loads(status_json.read_text(encoding='utf-8'))
|
||||
status = str(payload.get('status', 'unknown')).lower()
|
||||
|
||||
if report_mode == 'fail_only' and status == 'ok':
|
||||
print('[INFO] sofiia notify skipped: status=ok and mode=fail_only')
|
||||
sys.exit(0)
|
||||
|
||||
log_path = payload.get('log_path')
|
||||
log_tail = ''
|
||||
if log_path:
|
||||
p = Path(log_path)
|
||||
if p.exists():
|
||||
lines = p.read_text(encoding='utf-8', errors='ignore').splitlines()
|
||||
log_tail = '\n'.join(lines[-40:])
|
||||
|
||||
prompt = (
|
||||
'System monitoring report from NODE1 operator pipeline. '\
|
||||
'Analyze briefly and return 3 sections: status, risks, actions.\\n\\n'
|
||||
f"status={payload.get('status')}\\n"
|
||||
f"exit_code={payload.get('exit_code')}\\n"
|
||||
f"started_at={payload.get('started_at')}\\n"
|
||||
f"ended_at={payload.get('ended_at')}\\n"
|
||||
f"log_path={payload.get('log_path')}\\n\\n"
|
||||
'log_tail:\\n'
|
||||
f"{log_tail[:6000]}"
|
||||
)
|
||||
|
||||
body = {
|
||||
'prompt': prompt,
|
||||
'max_tokens': 400,
|
||||
'temperature': 0.1,
|
||||
'metadata': {
|
||||
'source': 'ops-monitor-canary',
|
||||
'force_concise': True,
|
||||
'user_id': user_id,
|
||||
'chat_id': chat_id,
|
||||
'username': username,
|
||||
'session_id': f'{chat_id}:sofiia:monitor',
|
||||
'report_mode': report_mode,
|
||||
}
|
||||
}
|
||||
|
||||
req = urlreq.Request(
|
||||
url=f"{router_url}/v1/agents/sofiia/infer",
|
||||
data=json.dumps(body).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json'},
|
||||
method='POST',
|
||||
)
|
||||
|
||||
try:
|
||||
with urlreq.urlopen(req, timeout=timeout_s) as resp:
|
||||
raw = resp.read().decode('utf-8', errors='ignore')
|
||||
data = json.loads(raw)
|
||||
text = (data.get('response') or '').strip()
|
||||
short = text[:200]
|
||||
print(f"[OK] sofiia report sent: backend={data.get('backend')} model={data.get('model')} preview={short!r}")
|
||||
|
||||
if tg_chat_id and tg_token and text:
|
||||
msg = (
|
||||
"[NODE1 Monitor]\n"
|
||||
f"status={payload.get('status')} exit_code={payload.get('exit_code')}\n\n"
|
||||
f"{text[:3500]}"
|
||||
)
|
||||
tg_req = urlreq.Request(
|
||||
url=f"https://api.telegram.org/bot{tg_token}/sendMessage",
|
||||
data=json.dumps({"chat_id": tg_chat_id, "text": msg}).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json'},
|
||||
method='POST',
|
||||
)
|
||||
try:
|
||||
with urlreq.urlopen(tg_req, timeout=20) as tg_resp:
|
||||
tg_data = json.loads(tg_resp.read().decode('utf-8', errors='ignore'))
|
||||
if tg_data.get('ok'):
|
||||
print(f"[OK] telegram report delivered: chat_id={tg_chat_id}")
|
||||
else:
|
||||
print(f"[WARN] telegram send not ok: {tg_data}")
|
||||
except Exception as tg_e:
|
||||
print(f"[WARN] telegram send failed: {tg_e}")
|
||||
else:
|
||||
print('[INFO] telegram delivery skipped (missing SOFIIA_REPORT_TELEGRAM_CHAT_ID or token or empty text)')
|
||||
except HTTPError as e:
|
||||
msg = e.read().decode('utf-8', errors='ignore')[:300]
|
||||
raise SystemExit(f"[FAIL] sofiia report HTTPError {e.code}: {msg}")
|
||||
except URLError as e:
|
||||
raise SystemExit(f"[FAIL] sofiia report URLError: {e}")
|
||||
PY
|
||||
Reference in New Issue
Block a user