328 lines
11 KiB
Bash
Executable File
328 lines
11 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
cd "$ROOT_DIR"
|
|
|
|
PG_CONTAINER="${PG_CONTAINER:-dagi-postgres}"
|
|
GW_CONTAINER="${GW_CONTAINER:-dagi-gateway-node1}"
|
|
LEARNER_CONTAINER="${LEARNER_CONTAINER:-dagi-experience-learner-node1}"
|
|
PG_USER="${PG_USER:-daarion}"
|
|
PG_DB="${PG_DB:-daarion_memory}"
|
|
GW_URL="${GW_URL:-http://127.0.0.1:9300/agromatrix/telegram/webhook}"
|
|
COMPOSE_FILE="${COMPOSE_FILE:-docker-compose.node1.yml}"
|
|
SEED_TOTAL="${PHASE6_SEED_TOTAL:-40}"
|
|
SEED_WINNER_COUNT="${PHASE6_SEED_WINNER_COUNT:-20}"
|
|
SMOKE_MIN_EVIDENCE="${PHASE6_SMOKE_MIN_EVIDENCE:-3}"
|
|
SMOKE_MIN_SCORE="${PHASE6_SMOKE_MIN_SCORE:-0.5}"
|
|
SMOKE_WINDOW_DAYS="${PHASE6_SMOKE_WINDOW_DAYS:-1}"
|
|
SMOKE_TTL_DAYS="${PHASE6_SMOKE_TTL_DAYS:-1}"
|
|
SMOKE_DB_TIMEOUT_MS="${PHASE6_SMOKE_DB_TIMEOUT_MS:-250}"
|
|
|
|
if [[ "$SEED_WINNER_COUNT" -ge "$SEED_TOTAL" ]]; then
|
|
echo "[ERR] PHASE6_SEED_WINNER_COUNT must be smaller than PHASE6_SEED_TOTAL" >&2
|
|
exit 1
|
|
fi
|
|
|
|
RUN_ID="${PHASE6_RUN_ID:-$(date +%s)}"
|
|
SEED_REASON="phase6_seed_reason_${RUN_ID}"
|
|
SEED_TRIGGER="reason=${SEED_REASON};chat_type=group"
|
|
TEST_CHAT_ID="${PHASE6_TEST_CHAT_ID:-778001}"
|
|
CLEANUP_ENABLED="${PHASE6_CLEANUP:-1}"
|
|
|
|
psql_exec() {
|
|
docker exec -i "$PG_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$PG_USER" -d "$PG_DB" "$@"
|
|
}
|
|
|
|
psql_at() {
|
|
psql_exec -At -c "$1"
|
|
}
|
|
|
|
wait_gateway_health() {
|
|
local retries=30
|
|
local delay=1
|
|
for _ in $(seq 1 "$retries"); do
|
|
if curl -fsS "${GW_URL%/agromatrix/telegram/webhook}/health" >/dev/null 2>&1; then
|
|
return 0
|
|
fi
|
|
sleep "$delay"
|
|
done
|
|
echo "[ERR] gateway health check timed out" >&2
|
|
return 1
|
|
}
|
|
|
|
wait_learner_health() {
|
|
local retries=30
|
|
local delay=1
|
|
for _ in $(seq 1 "$retries"); do
|
|
if curl -fsS "http://127.0.0.1:9109/health" >/dev/null 2>&1; then
|
|
return 0
|
|
fi
|
|
sleep "$delay"
|
|
done
|
|
echo "[ERR] learner health check timed out" >&2
|
|
return 1
|
|
}
|
|
|
|
apply_gateway_flag() {
|
|
local flag="$1"
|
|
echo "[info] recreate gateway with ANTI_SILENT_TUNING_ENABLED=${flag}"
|
|
ANTI_SILENT_TUNING_ENABLED="$flag" \
|
|
ANTI_SILENT_TUNING_DB_TIMEOUT_MS="$SMOKE_DB_TIMEOUT_MS" \
|
|
docker compose -f "$COMPOSE_FILE" up -d --no-deps --build --force-recreate gateway >/dev/null
|
|
wait_gateway_health
|
|
}
|
|
|
|
apply_learner_smoke_thresholds() {
|
|
echo "[info] recreate experience-learner with smoke thresholds"
|
|
ANTI_SILENT_TUNING_MIN_EVIDENCE="$SMOKE_MIN_EVIDENCE" \
|
|
ANTI_SILENT_TUNING_MIN_SCORE="$SMOKE_MIN_SCORE" \
|
|
ANTI_SILENT_TUNING_WINDOW_DAYS="$SMOKE_WINDOW_DAYS" \
|
|
ANTI_SILENT_TUNING_TTL_DAYS="$SMOKE_TTL_DAYS" \
|
|
docker compose -f "$COMPOSE_FILE" up -d --no-deps --build --force-recreate experience-learner >/dev/null
|
|
wait_learner_health
|
|
}
|
|
|
|
restore_learner_defaults() {
|
|
echo "[info] restore experience-learner default thresholds"
|
|
docker compose -f "$COMPOSE_FILE" up -d --no-deps --build --force-recreate experience-learner >/dev/null
|
|
wait_learner_health
|
|
}
|
|
|
|
cleanup_phase6() {
|
|
if [[ "$CLEANUP_ENABLED" != "1" ]]; then
|
|
return 0
|
|
fi
|
|
echo "[cleanup] remove seed rows"
|
|
psql_exec <<SQL >/dev/null
|
|
DELETE FROM agent_experience_events
|
|
WHERE raw->>'seed_test' = 'true'
|
|
OR request_id LIKE 'phase6-seed-${RUN_ID}-%';
|
|
|
|
DELETE FROM agent_lessons
|
|
WHERE trigger = '${SEED_TRIGGER}'
|
|
OR raw->>'seed_test' = 'true';
|
|
SQL
|
|
}
|
|
|
|
on_exit() {
|
|
local code=$?
|
|
if [[ "$CLEANUP_ENABLED" == "1" ]]; then
|
|
cleanup_phase6 || true
|
|
fi
|
|
restore_learner_defaults >/dev/null 2>&1 || true
|
|
apply_gateway_flag false >/dev/null 2>&1 || true
|
|
if [[ $code -ne 0 ]]; then
|
|
echo "[FAIL] Phase-6 smoke failed" >&2
|
|
fi
|
|
exit $code
|
|
}
|
|
trap on_exit EXIT
|
|
|
|
echo "[precheck] containers"
|
|
docker ps --format '{{.Names}}' | grep -q "^${PG_CONTAINER}$" || { echo "[ERR] missing ${PG_CONTAINER}" >&2; exit 1; }
|
|
docker ps --format '{{.Names}}' | grep -q "^${GW_CONTAINER}$" || { echo "[ERR] missing ${GW_CONTAINER}" >&2; exit 1; }
|
|
docker ps --format '{{.Names}}' | grep -q "^${LEARNER_CONTAINER}$" || { echo "[ERR] missing ${LEARNER_CONTAINER}" >&2; exit 1; }
|
|
|
|
wait_gateway_health
|
|
wait_learner_health
|
|
apply_learner_smoke_thresholds
|
|
|
|
echo "[seed] insert ${SEED_TOTAL} synthetic gateway events (reason=${SEED_REASON})"
|
|
psql_exec <<SQL >/dev/null
|
|
WITH seed AS (
|
|
SELECT
|
|
(
|
|
substr(md5(random()::text || clock_timestamp()::text), 1, 8) || '-' ||
|
|
substr(md5(random()::text || clock_timestamp()::text), 9, 4) || '-' ||
|
|
substr(md5(random()::text || clock_timestamp()::text), 13, 4) || '-' ||
|
|
substr(md5(random()::text || clock_timestamp()::text), 17, 4) || '-' ||
|
|
substr(md5(random()::text || clock_timestamp()::text), 21, 12)
|
|
)::uuid AS event_id,
|
|
now() - (g * interval '15 second') AS ts,
|
|
CASE WHEN g <= ${SEED_WINNER_COUNT} THEN 'SILENT_POLICY' ELSE 'UNSUPPORTED_INPUT' END AS template_id,
|
|
CASE WHEN g <= ${SEED_WINNER_COUNT} THEN 'none' ELSE 'retry' END AS user_signal
|
|
FROM generate_series(1, ${SEED_TOTAL}) g
|
|
)
|
|
INSERT INTO agent_experience_events (
|
|
event_id, ts, node_id, source, agent_id, task_type, request_id,
|
|
channel, inputs_hash, provider, model, profile, latency_ms,
|
|
tokens_in, tokens_out, ok, error_class, error_msg_redacted,
|
|
http_status, raw
|
|
)
|
|
SELECT
|
|
event_id,
|
|
ts,
|
|
'NODA1',
|
|
'gateway',
|
|
'agromatrix',
|
|
'webhook',
|
|
'phase6-seed-${RUN_ID}-' || event_id::text,
|
|
'telegram',
|
|
md5(event_id::text),
|
|
'gateway',
|
|
'gateway',
|
|
NULL,
|
|
20,
|
|
NULL,
|
|
NULL,
|
|
true,
|
|
NULL,
|
|
NULL,
|
|
200,
|
|
jsonb_build_object(
|
|
'seed_test', true,
|
|
'event_id', event_id::text,
|
|
'source', 'gateway',
|
|
'agent_id', 'agromatrix',
|
|
'chat_type', 'group',
|
|
'anti_silent_action', 'ACK_EMITTED',
|
|
'anti_silent_template', template_id,
|
|
'policy', jsonb_build_object('sowa_decision', 'SILENT', 'reason', '${SEED_REASON}'),
|
|
'feedback', jsonb_build_object('user_signal', user_signal),
|
|
'result', jsonb_build_object('ok', true, 'http_status', 200)
|
|
)
|
|
FROM seed;
|
|
SQL
|
|
|
|
echo "[trigger] publish one matching event to JetStream"
|
|
SEED_REASON_ENV="$SEED_REASON" docker exec -e SEED_REASON_ENV="$SEED_REASON" -i "$LEARNER_CONTAINER" python - <<'PY'
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
import nats
|
|
|
|
reason = os.environ["SEED_REASON_ENV"]
|
|
|
|
async def main():
|
|
nc = await nats.connect("nats://nats:4222")
|
|
js = nc.jetstream()
|
|
event = {
|
|
"event_id": str(uuid.uuid4()),
|
|
"ts": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
"node_id": "NODA1",
|
|
"source": "gateway",
|
|
"agent_id": "agromatrix",
|
|
"request_id": "phase6-seed-pub-" + str(uuid.uuid4()),
|
|
"channel": "telegram",
|
|
"chat_type": "group",
|
|
"task_type": "webhook",
|
|
"inputs_hash": "phase6-seed-trigger",
|
|
"anti_silent_action": "ACK_EMITTED",
|
|
"anti_silent_template": "SILENT_POLICY",
|
|
"policy": {"sowa_decision": "SILENT", "reason": reason},
|
|
"feedback": {"user_signal": "none", "operator_tag": None},
|
|
"llm": {"provider": "gateway", "model": "gateway", "profile": None, "latency_ms": 20, "tokens_in": None, "tokens_out": None},
|
|
"result": {"ok": True, "http_status": 200, "error_class": None, "error_msg_redacted": None},
|
|
}
|
|
await js.publish("agent.experience.v1.agromatrix", json.dumps(event).encode("utf-8"), headers={"Nats-Msg-Id": event["event_id"]})
|
|
await nc.close()
|
|
|
|
asyncio.run(main())
|
|
PY
|
|
|
|
|
|
echo "[assert] learner generated anti_silent_tuning lesson"
|
|
lesson_action=""
|
|
for _ in $(seq 1 25); do
|
|
lesson_action="$(psql_at "SELECT action FROM agent_lessons WHERE raw->>'lesson_type'='anti_silent_tuning' AND trigger='${SEED_TRIGGER}' ORDER BY ts DESC LIMIT 1;")"
|
|
if [[ -n "$lesson_action" ]]; then
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
if [[ "$lesson_action" != "prefer_template=SILENT_POLICY" ]]; then
|
|
echo "[ERR] expected lesson action prefer_template=SILENT_POLICY, got: ${lesson_action:-<empty>}" >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "[seed] insert manual apply lesson (seed_test=true) for unsupported_no_message"
|
|
manual_lesson_id="$(python3 - <<'PY'
|
|
import uuid
|
|
print(uuid.uuid4())
|
|
PY
|
|
)"
|
|
manual_key="$(python3 - <<PY
|
|
import hashlib
|
|
print(hashlib.sha256(f"phase6-manual-apply-${RUN_ID}".encode()).hexdigest())
|
|
PY
|
|
)"
|
|
|
|
psql_exec <<SQL >/dev/null
|
|
INSERT INTO agent_lessons (
|
|
lesson_id, lesson_key, ts, scope, agent_id, task_type,
|
|
trigger, action, avoid, signals, evidence, raw
|
|
) VALUES (
|
|
'${manual_lesson_id}'::uuid,
|
|
'${manual_key}',
|
|
now(),
|
|
'global',
|
|
NULL,
|
|
'webhook',
|
|
'reason=unsupported_no_message;chat_type=group',
|
|
'prefer_template=SILENT_POLICY',
|
|
'avoid_template=UNSUPPORTED_INPUT',
|
|
jsonb_build_object('lesson_type','anti_silent_tuning','policy_reason','unsupported_no_message','chat_type','group','seed_test',true),
|
|
jsonb_build_object('n_best',999,'score_best',1.0,'window_days',1),
|
|
jsonb_build_object(
|
|
'lesson_type','anti_silent_tuning',
|
|
'seed_test',true,
|
|
'trigger','reason=unsupported_no_message;chat_type=group',
|
|
'action','prefer_template=SILENT_POLICY',
|
|
'expires_at', to_char((now() + interval '2 hours') at time zone 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS"Z"')
|
|
)
|
|
)
|
|
ON CONFLICT (lesson_key) DO UPDATE SET
|
|
ts = EXCLUDED.ts,
|
|
action = EXCLUDED.action,
|
|
avoid = EXCLUDED.avoid,
|
|
signals = EXCLUDED.signals,
|
|
evidence = EXCLUDED.evidence,
|
|
raw = EXCLUDED.raw;
|
|
SQL
|
|
|
|
apply_gateway_flag true
|
|
|
|
echo "[assert] gateway applies tuning when flag ON"
|
|
ok_on=0
|
|
for i in 1 2 3 4 5 6 7 8; do
|
|
resp_on="$(curl -sS -X POST "$GW_URL" -H 'content-type: application/json' -d "{\"update_id\":${RUN_ID}00${i},\"message\":{\"message_id\":${i},\"date\":0,\"chat\":{\"id\":${TEST_CHAT_ID},\"type\":\"group\"},\"from\":{\"id\":12345,\"is_bot\":false,\"first_name\":\"T\"},\"sticker\":{\"file_id\":\"phase6-on-${i}\"}}}")"
|
|
if RESP_ON="$resp_on" python3 - <<'PY'
|
|
import json
|
|
import os
|
|
resp = json.loads(os.environ["RESP_ON"])
|
|
if resp.get("template_id") == "SILENT_POLICY" and str(resp.get("tuning_applied", "")).lower() == "true":
|
|
print("[ok] tuning ON response:", resp)
|
|
raise SystemExit(0)
|
|
raise SystemExit(1)
|
|
PY
|
|
then
|
|
ok_on=1
|
|
break
|
|
fi
|
|
echo "[warn] tuning ON attempt ${i} did not match expected payload: ${resp_on}" >&2
|
|
sleep 2
|
|
done
|
|
if [[ "$ok_on" -ne 1 ]]; then
|
|
echo "[ERR] tuning ON assertion failed after retries" >&2
|
|
exit 1
|
|
fi
|
|
|
|
apply_gateway_flag false
|
|
|
|
echo "[assert] gateway does not apply tuning when flag OFF"
|
|
resp_off="$(curl -sS -X POST "$GW_URL" -H 'content-type: application/json' -d "{\"update_id\":${RUN_ID}002,\"message\":{\"message_id\":2,\"date\":0,\"chat\":{\"id\":${TEST_CHAT_ID},\"type\":\"group\"},\"from\":{\"id\":12345,\"is_bot\":false,\"first_name\":\"T\"},\"sticker\":{\"file_id\":\"phase6-off\"}}}")"
|
|
RESP_OFF="$resp_off" python3 - <<'PY'
|
|
import json
|
|
import os
|
|
resp = json.loads(os.environ["RESP_OFF"])
|
|
if resp.get("template_id") != "UNSUPPORTED_INPUT" or str(resp.get("tuning_applied", "")).lower() != "false":
|
|
raise SystemExit(f"unexpected response with tuning OFF: {resp}")
|
|
print("[ok] tuning OFF response:", resp)
|
|
PY
|
|
|
|
echo "[PASS] phase6 smoke completed"
|