Files
microdao-daarion/ops/monitor_notify_sofiia.sh

148 lines
5.5 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
STATUS_JSON="${1:-/opt/microdao-daarion/ops/status/canary_all.latest.json}"
ROOT="/opt/microdao-daarion"
ROUTER_URL="${ROUTER_URL:-http://127.0.0.1:9102}"
REPORT_ENABLED="${SOFIIA_REPORTS_ENABLED:-true}"
REPORT_MODE="${SOFIIA_REPORT_MODE:-fail_only}" # fail_only | always
REPORT_TIMEOUT="${SOFIIA_REPORT_TIMEOUT:-180}"
REPORT_MAX_TOKENS="${SOFIIA_REPORT_MAX_TOKENS:-900}"
REPORT_CHAT_ID="${SOFIIA_REPORT_CHAT_ID:-ops-monitor-sofiia}"
REPORT_USER_ID="${SOFIIA_REPORT_USER_ID:-ops-monitor-agent}"
REPORT_USERNAME="${SOFIIA_REPORT_USERNAME:-monitor-agent}"
REPORT_TELEGRAM_CHAT_ID="${SOFIIA_REPORT_TELEGRAM_CHAT_ID:-}"
SOFIIA_BOT_TOKEN="${SOFIIA_TELEGRAM_BOT_TOKEN:-${TELEGRAM_BOT_TOKEN:-}}"
if [[ "${REPORT_ENABLED,,}" != "true" ]]; then
echo "[INFO] sofiia notify disabled"
exit 0
fi
if [[ ! -f "$STATUS_JSON" ]]; then
echo "[WARN] status json not found: $STATUS_JSON"
exit 0
fi
python3 - "$STATUS_JSON" "$ROOT" "$ROUTER_URL" "$REPORT_MODE" "$REPORT_TIMEOUT" "$REPORT_MAX_TOKENS" "$REPORT_CHAT_ID" "$REPORT_USER_ID" "$REPORT_USERNAME" "$REPORT_TELEGRAM_CHAT_ID" "$SOFIIA_BOT_TOKEN" <<'PY'
import json
import sys
from pathlib import Path
from urllib import request as urlreq
from urllib.error import URLError, HTTPError
status_json = Path(sys.argv[1])
root = Path(sys.argv[2])
router_url = sys.argv[3].rstrip('/')
report_mode = sys.argv[4]
timeout_s = int(sys.argv[5])
max_tokens = int(sys.argv[6])
chat_id = sys.argv[7]
user_id = sys.argv[8]
username = sys.argv[9]
tg_chat_id = sys.argv[10].strip()
tg_token = sys.argv[11].strip()
payload = json.loads(status_json.read_text(encoding='utf-8'))
status = str(payload.get('status', 'unknown')).lower()
if report_mode == 'fail_only' and status == 'ok':
print('[INFO] sofiia notify skipped: status=ok and mode=fail_only')
sys.exit(0)
log_path = payload.get('log_path')
log_tail = ''
if log_path:
p = Path(log_path)
if p.exists():
lines = p.read_text(encoding='utf-8', errors='ignore').splitlines()
log_tail = '\n'.join(lines[-40:])
prompt = (
'System monitoring report from NODE1 operator pipeline. '\
'Analyze briefly and return 3 sections: status, risks, actions.\\n\\n'
f"status={payload.get('status')}\\n"
f"exit_code={payload.get('exit_code')}\\n"
f"started_at={payload.get('started_at')}\\n"
f"ended_at={payload.get('ended_at')}\\n"
f"log_path={payload.get('log_path')}\\n\\n"
'log_tail:\\n'
f"{log_tail[:6000]}"
)
body = {
'prompt': prompt,
'max_tokens': max_tokens,
'temperature': 0.1,
'metadata': {
'source': 'ops-monitor-canary',
'force_concise': True,
'user_id': user_id,
'chat_id': chat_id,
'username': username,
'session_id': f'{chat_id}:sofiia:monitor',
'report_mode': report_mode,
}
}
req = urlreq.Request(
url=f"{router_url}/v1/agents/sofiia/infer",
data=json.dumps(body).encode('utf-8'),
headers={'Content-Type': 'application/json'},
method='POST',
)
try:
with urlreq.urlopen(req, timeout=timeout_s) as resp:
raw = resp.read().decode('utf-8', errors='ignore')
data = json.loads(raw)
text = (data.get('response') or '').strip()
short = text[:200]
print(f"[OK] sofiia report sent: backend={data.get('backend')} model={data.get('model')} preview={short!r}")
if tg_chat_id and tg_token and text:
def chunk_text(value: str, limit: int = 3500):
chunks = []
remaining = value
while remaining:
if len(remaining) <= limit:
chunks.append(remaining)
break
split_at = remaining.rfind('\n', 0, limit)
if split_at < max(1, limit // 2):
split_at = limit
chunks.append(remaining[:split_at].rstrip())
remaining = remaining[split_at:].lstrip()
return chunks or [value]
header = (
"[NODE1 Monitor]\n"
f"status={payload.get('status')} exit_code={payload.get('exit_code')}\n\n"
)
parts = chunk_text(text, 3500 - len("(99/99)\n"))
total = len(parts)
delivered = 0
for idx, part in enumerate(parts, start=1):
prefix = f"({idx}/{total})\n" if total > 1 else ""
msg = f"{header}{prefix}{part}" if idx == 1 else f"{prefix}{part}"
tg_req = urlreq.Request(
url=f"https://api.telegram.org/bot{tg_token}/sendMessage",
data=json.dumps({"chat_id": tg_chat_id, "text": msg}).encode('utf-8'),
headers={'Content-Type': 'application/json'},
method='POST',
)
with urlreq.urlopen(tg_req, timeout=20) as tg_resp:
tg_data = json.loads(tg_resp.read().decode('utf-8', errors='ignore'))
if not tg_data.get('ok'):
raise RuntimeError(f"telegram send not ok: {tg_data}")
delivered += 1
print(f"[OK] telegram report delivered: chat_id={tg_chat_id} parts={delivered}")
else:
print('[INFO] telegram delivery skipped (missing SOFIIA_REPORT_TELEGRAM_CHAT_ID or token or empty text)')
except HTTPError as e:
msg = e.read().decode('utf-8', errors='ignore')[:300]
raise SystemExit(f"[FAIL] sofiia report HTTPError {e.code}: {msg}")
except URLError as e:
raise SystemExit(f"[FAIL] sofiia report URLError: {e}")
PY