ops(ci): add phase6 smoke automation and CI workflows

2026-03-05 09:19:20 -08:00
parent 4d6e73f352
commit e6e705a38b
6 changed files with 837 additions and 1 deletions
--- a/.gitea/workflows/phase6-smoke.yml
+++ b/.gitea/workflows/phase6-smoke.yml
@@ -0,0 +1,116 @@
+name: phase6-smoke
+
+on:
+  workflow_dispatch:
+    inputs:
+      ssh_host:
+        description: "NODA1 SSH host (optional override)"
+        required: false
+        type: string
+      ssh_user:
+        description: "NODA1 SSH user (optional override, default root)"
+        required: false
+        type: string
+  workflow_run:
+    workflows:
+      - deploy-node1
+      - deploy-node1-runtime
+      - Deploy Node1
+    types:
+      - completed
+
+jobs:
+  phase6-smoke:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Skip when upstream deploy was not successful
+        shell: bash
+        run: |
+          set -euo pipefail
+          event="${{ gitea.event_name }}"
+          if [ "$event" != "workflow_run" ]; then
+            exit 0
+          fi
+          conclusion="$(jq -r '.workflow_run.conclusion // empty' "${GITHUB_EVENT_PATH:-/dev/null}" 2>/dev/null || true)"
+          if [ "$conclusion" != "success" ]; then
+            echo "workflow_run conclusion=${conclusion:-unknown}; skip smoke."
+            exit 0
+          fi
+
+      - name: Resolve SSH target
+        shell: bash
+        env:
+          DEFAULT_SSH_HOST: ${{ secrets.NODA1_SSH_HOST }}
+          DEFAULT_SSH_USER: ${{ secrets.NODA1_SSH_USER }}
+        run: |
+          set -euo pipefail
+          host="${DEFAULT_SSH_HOST:-}"
+          user="${DEFAULT_SSH_USER:-root}"
+
+          if [ "${{ gitea.event_name }}" = "workflow_dispatch" ]; then
+            if [ -n "${{ inputs.ssh_host }}" ]; then
+              host="${{ inputs.ssh_host }}"
+            fi
+            if [ -n "${{ inputs.ssh_user }}" ]; then
+              user="${{ inputs.ssh_user }}"
+            fi
+          fi
+
+          if [ -z "$host" ]; then
+            echo "Missing SSH host (workflow input or secret NODA1_SSH_HOST)" >&2
+            exit 1
+          fi
+
+          echo "SSH_HOST=$host" >> "$GITHUB_ENV"
+          echo "SSH_USER=$user" >> "$GITHUB_ENV"
+
+      - name: Prepare SSH key
+        shell: bash
+        env:
+          SSH_PRIVATE_KEY: ${{ secrets.NODA1_SSH_KEY }}
+        run: |
+          set -euo pipefail
+          set +x
+          if [ -z "${SSH_PRIVATE_KEY:-}" ]; then
+            echo "Missing secret NODA1_SSH_KEY" >&2
+            exit 1
+          fi
+          mkdir -p ~/.ssh
+          chmod 700 ~/.ssh
+          printf '%s\n' "$SSH_PRIVATE_KEY" > ~/.ssh/id_ed25519
+          chmod 600 ~/.ssh/id_ed25519
+
+      - name: Run phase6 smoke (retry once)
+        shell: bash
+        run: |
+          set -euo pipefail
+          set +x
+          mkdir -p artifacts
+          for attempt in 1 2; do
+            log="artifacts/phase6-smoke-attempt${attempt}.log"
+            if ssh \
+              -i ~/.ssh/id_ed25519 \
+              -o BatchMode=yes \
+              -o StrictHostKeyChecking=accept-new \
+              -o ConnectTimeout=10 \
+              "${SSH_USER}@${SSH_HOST}" \
+              "set -euo pipefail; cd /opt/microdao-daarion; git rev-parse HEAD; make phase6-smoke" \
+              | tee "$log"; then
+              cp "$log" artifacts/phase6-smoke.log
+              exit 0
+            fi
+
+            if [ "$attempt" -eq 2 ]; then
+              echo "phase6 smoke failed after retry" >&2
+              exit 1
+            fi
+            sleep 15
+          done
+
+      - name: Print artifact paths
+        if: always()
+        shell: bash
+        run: |
+          set -euo pipefail
+          echo "Smoke logs stored in workspace:"
+          ls -la artifacts || true
--- a/.github/workflows/phase6-smoke.yml
+++ b/.github/workflows/phase6-smoke.yml
@@ -0,0 +1,122 @@
+name: phase6-smoke
+
+on:
+  workflow_dispatch:
+    inputs:
+      ssh_host:
+        description: "NODA1 SSH host (optional override)"
+        required: false
+        type: string
+      ssh_user:
+        description: "NODA1 SSH user (optional override, default root)"
+        required: false
+        type: string
+  workflow_call:
+    inputs:
+      ssh_host:
+        required: false
+        type: string
+      ssh_user:
+        required: false
+        type: string
+    secrets:
+      NODA1_SSH_HOST:
+        required: false
+      NODA1_SSH_USER:
+        required: false
+      NODA1_SSH_KEY:
+        required: true
+  workflow_run:
+    workflows:
+      - Deploy Node1
+      - deploy-node1
+      - deploy-node1-runtime
+    types:
+      - completed
+
+jobs:
+  phase6-smoke:
+    if: >
+      github.event_name == 'workflow_dispatch' ||
+      github.event_name == 'workflow_call' ||
+      (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success')
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    env:
+      DEFAULT_SSH_HOST: ${{ secrets.NODA1_SSH_HOST }}
+      DEFAULT_SSH_USER: ${{ secrets.NODA1_SSH_USER }}
+    steps:
+      - name: Resolve SSH target
+        shell: bash
+        run: |
+          set -euo pipefail
+          host="${DEFAULT_SSH_HOST}"
+          user="${DEFAULT_SSH_USER:-root}"
+
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ] || [ "${{ github.event_name }}" = "workflow_call" ]; then
+            if [ -n "${{ inputs.ssh_host }}" ]; then
+              host="${{ inputs.ssh_host }}"
+            fi
+            if [ -n "${{ inputs.ssh_user }}" ]; then
+              user="${{ inputs.ssh_user }}"
+            fi
+          fi
+
+          if [ -z "${host}" ]; then
+            echo "Missing SSH host (workflow input or secret NODA1_SSH_HOST)" >&2
+            exit 1
+          fi
+
+          echo "SSH_HOST=${host}" >> "${GITHUB_ENV}"
+          echo "SSH_USER=${user:-root}" >> "${GITHUB_ENV}"
+
+      - name: Prepare SSH key
+        shell: bash
+        env:
+          SSH_PRIVATE_KEY: ${{ secrets.NODA1_SSH_KEY }}
+        run: |
+          set -euo pipefail
+          set +x
+          if [ -z "${SSH_PRIVATE_KEY}" ]; then
+            echo "Missing secret NODA1_SSH_KEY" >&2
+            exit 1
+          fi
+          mkdir -p ~/.ssh
+          chmod 700 ~/.ssh
+          printf '%s\n' "${SSH_PRIVATE_KEY}" > ~/.ssh/id_ed25519
+          chmod 600 ~/.ssh/id_ed25519
+
+      - name: Run phase6 smoke (retry once)
+        shell: bash
+        run: |
+          set -euo pipefail
+          set +x
+          mkdir -p artifacts
+          for attempt in 1 2; do
+            log="artifacts/phase6-smoke-attempt${attempt}.log"
+            if ssh \
+              -i ~/.ssh/id_ed25519 \
+              -o BatchMode=yes \
+              -o StrictHostKeyChecking=accept-new \
+              -o ConnectTimeout=10 \
+              "${SSH_USER}@${SSH_HOST}" \
+              "set -euo pipefail; cd /opt/microdao-daarion; git rev-parse HEAD; make phase6-smoke" \
+              | tee "${log}"; then
+              cp "${log}" artifacts/phase6-smoke.log
+              exit 0
+            fi
+
+            if [ "${attempt}" -eq 2 ]; then
+              echo "phase6 smoke failed after retry" >&2
+              exit 1
+            fi
+            sleep 15
+          done
+
+      - name: Upload smoke logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: phase6-smoke-logs
+          path: artifacts/
+          if-no-files-found: ignore
--- a/5
+++ b/5
@@ -3,7 +3,7 @@ SHELL := /bin/bash
 COMPOSE = docker compose -f infra/compose/docker-compose.yml
 PROFILES = --profile farmos --profile thingsboard --profile nats --profile integration

-.PHONY: up down reset logs seed test
+.PHONY: up down reset logs seed test phase6-smoke

 up:
 	$(COMPOSE) $(PROFILES) up -d --build
@@ -23,6 +23,9 @@ seed:
 test:
 	python3 -m pytest -q

+phase6-smoke:
+	./ops/smoke_phase6.sh
+
 replay-dlq:
 	python3 scripts/replay_dlq.py

--- a/docs/ops/ci_smoke.md
+++ b/docs/ops/ci_smoke.md
@@ -0,0 +1,59 @@
+# CI Smoke: Phase-6
+
+## Workflows
+- `.github/workflows/phase6-smoke.yml`
+  - `workflow_dispatch`: manual smoke run on NODA1 via SSH.
+  - `workflow_call`: reusable smoke step for deploy workflows (recommended for hard gate).
+  - `workflow_run`: auto-run after successful deploy workflows:
+    - `Deploy Node1`
+    - `deploy-node1`
+    - `deploy-node1-runtime`
+- `.gitea/workflows/phase6-smoke.yml`
+  - `workflow_dispatch`: manual smoke run for Gitea Actions.
+  - `workflow_run`: auto-run after deploy workflows in Gitea.
+
+## Required Secrets
+- `NODA1_SSH_HOST`
+- `NODA1_SSH_USER` (optional, defaults to `root` if empty)
+- `NODA1_SSH_KEY`
+
+## Manual Run
+1. Open Actions (`GitHub` or `Gitea`) -> `phase6-smoke`.
+2. Click `Run workflow` / `Run`.
+3. Optionally override `ssh_host` and `ssh_user`.
+4. Run and wait for the `phase6-smoke` job result.
+
+## On-Deploy Run
+- Triggered automatically only when configured deploy workflow finishes with `success`.
+- Job retries once on transient failures (SSH/network hiccups).
+- If smoke still fails, workflow is marked failed.
+- For strict deploy gating in the same pipeline, call this workflow via `workflow_call` from deploy workflow and set `needs`.
+  In Gitea, use same-workflow `needs` gate (or `workflow_run` from deploy workflow) because `workflow_call` support depends on runner/version.
+
+Example (`.github/workflows/deploy-node1.yml`):
+```yaml
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "deploy..."
+
+  smoke:
+    needs: [deploy]
+    uses: ./.github/workflows/phase6-smoke.yml
+    secrets:
+      NODA1_SSH_HOST: ${{ secrets.NODA1_SSH_HOST }}
+      NODA1_SSH_USER: ${{ secrets.NODA1_SSH_USER }}
+      NODA1_SSH_KEY: ${{ secrets.NODA1_SSH_KEY }}
+```
+
+## Artifacts
+- `phase6-smoke-logs` artifact includes:
+  - `phase6-smoke.log`
+  - per-attempt logs (`phase6-smoke-attempt1.log`, `phase6-smoke-attempt2.log` when retry happened)
+
+## Troubleshooting
+- `Missing SSH host`: add `NODA1_SSH_HOST` secret or pass `ssh_host` input.
+- `Missing secret NODA1_SSH_KEY`: add deploy key secret.
+- SSH host key issues: workflow uses `StrictHostKeyChecking=accept-new`; if host changed, rotate known host entry and retry.
+- Remote smoke fail: open artifact logs and check `/opt/microdao-daarion` state on NODA1.
--- a/docs/ops/phase6_anti_silent_tuning.md
+++ b/docs/ops/phase6_anti_silent_tuning.md
@@ -0,0 +1,209 @@
+# Phase-6 Anti-Silent Lessons Tuning (Closed-loop)
+
+## Goal
+Tune anti-silent ACK template selection using evidence from gateway events (`reason + chat_type + template_id + user_signal`) without unsafe auto-mutation.
+
+## Safety Invariants
+- No automatic global policy rewrite.
+- Gateway applies tuning only when `ANTI_SILENT_TUNING_ENABLED=true`.
+- Learner emits tuning lessons only with thresholds (`MIN_EVIDENCE`, `MIN_SCORE`).
+- Lessons have TTL (`expires_at`) for rollback-by-expiry.
+
+## Components
+- `services/experience-learner/main.py`
+  - emits `lesson_type=anti_silent_tuning`
+  - computes score: `1 - (w_retry*retry_rate + w_negative*negative_rate + w_suppressed*suppressed_rate)`
+- `gateway-bot/http_api.py`
+  - applies tuning in anti-silent template resolver under feature flag
+  - fail-open on DB/lookup errors
+- `gateway-bot/gateway_experience_bus.py`
+  - DB lookup of active tuning lesson by trigger (`reason=<...>;chat_type=<...>`)
+
+## Environment
+
+### Learner
+- `ANTI_SILENT_TUNING_ENABLED=true`
+- `ANTI_SILENT_TUNING_WINDOW_DAYS=7`
+- `ANTI_SILENT_TUNING_MIN_EVIDENCE=20`
+- `ANTI_SILENT_TUNING_MIN_SCORE=0.75`
+- `ANTI_SILENT_TUNING_WEIGHT_RETRY=0.6`
+- `ANTI_SILENT_TUNING_WEIGHT_NEGATIVE=0.3`
+- `ANTI_SILENT_TUNING_WEIGHT_SUPPRESSED=0.1`
+- `ANTI_SILENT_TUNING_TTL_DAYS=7`
+
+### Gateway
+- `ANTI_SILENT_TUNING_ENABLED=false` (default; turn on only after smoke)
+- `ANTI_SILENT_TUNING_DB_TIMEOUT_MS=40`
+- `ANTI_SILENT_TUNING_CACHE_TTL_SECONDS=60`
+
+## Deploy
+```bash
+cd /opt/microdao-daarion
+docker compose -f docker-compose.node1.yml up -d --no-deps --build --force-recreate experience-learner gateway
+```
+
+## Single-Command Smoke (Phase-6.1)
+```bash
+make phase6-smoke
+```
+
+The command runs:
+1. deterministic seed events
+2. learner lesson generation assertion
+3. gateway apply assertion (flag ON)
+4. gateway fallback assertion (flag OFF)
+5. seed cleanup (events + lessons)
+
+Use `PHASE6_CLEANUP=0 make phase6-smoke` to keep artifacts for debugging.
+
+## CI Integration
+- Workflow: `.github/workflows/phase6-smoke.yml`
+- Modes:
+  - `workflow_dispatch` (manual)
+  - `workflow_run` after successful deploy workflow
+- Operations guide: `docs/ops/ci_smoke.md`
+
+## Fixed Smoke (Deterministic)
+
+### 1) Temporary smoke thresholds
+Use low thresholds only for smoke:
+- `ANTI_SILENT_TUNING_MIN_EVIDENCE=3`
+- `ANTI_SILENT_TUNING_MIN_SCORE=0.5`
+- `ANTI_SILENT_TUNING_WINDOW_DAYS=1`
+
+### 2) Seed synthetic gateway events
+```bash
+export PG_CONTAINER='dagi-postgres'
+
+docker exec "$PG_CONTAINER" psql -U daarion -d daarion_memory -c "
+WITH seed AS (
+  SELECT
+    (
+      substr(md5(random()::text || clock_timestamp()::text), 1, 8) || '-' ||
+      substr(md5(random()::text || clock_timestamp()::text), 9, 4) || '-' ||
+      substr(md5(random()::text || clock_timestamp()::text), 13, 4) || '-' ||
+      substr(md5(random()::text || clock_timestamp()::text), 17, 4) || '-' ||
+      substr(md5(random()::text || clock_timestamp()::text), 21, 12)
+    )::uuid AS event_id,
+    now() - (g * interval '1 minute') AS ts,
+    CASE WHEN g <= 3 THEN 'UNSUPPORTED_INPUT' ELSE 'SILENT_POLICY' END AS template_id,
+    CASE WHEN g <= 3 THEN 'retry' ELSE 'none' END AS user_signal
+  FROM generate_series(1,6) g
+)
+INSERT INTO agent_experience_events (
+  event_id, ts, node_id, source, agent_id, task_type, request_id,
+  channel, inputs_hash, provider, model, profile, latency_ms,
+  tokens_in, tokens_out, ok, error_class, error_msg_redacted,
+  http_status, raw
+)
+SELECT
+  event_id,
+  ts,
+  'NODA1',
+  'gateway',
+  'agromatrix',
+  'webhook',
+  'phase6-seed-' || event_id::text,
+  'telegram',
+  md5(event_id::text),
+  'gateway',
+  'gateway',
+  NULL,
+  25,
+  NULL,
+  NULL,
+  true,
+  NULL,
+  NULL,
+  200,
+  jsonb_build_object(
+    'event_id', event_id::text,
+    'ts', to_char(ts, 'YYYY-MM-DD"T"HH24:MI:SS"Z"'),
+    'source', 'gateway',
+    'agent_id', 'agromatrix',
+    'chat_type', 'group',
+    'anti_silent_action', 'ACK_EMITTED',
+    'anti_silent_template', template_id,
+    'policy', jsonb_build_object('sowa_decision', 'SILENT', 'reason', 'unsupported_no_message'),
+    'feedback', jsonb_build_object('user_signal', user_signal),
+    'result', jsonb_build_object('ok', true, 'http_status', 200)
+  )
+FROM seed;
+"
+```
+
+### 3) Trigger learner evaluation with one real event
+```bash
+export GATEWAY_WEBHOOK_URL='http://127.0.0.1:9300/agromatrix/telegram/webhook'
+
+curl -sS -X POST "$GATEWAY_WEBHOOK_URL" \
+  -H 'content-type: application/json' \
+  -d @docs/ops/payloads/phase5_payload_group_unsupported_no_message.json
+```
+
+### 4) Verify tuning lesson exists
+```bash
+docker exec "$PG_CONTAINER" psql -U daarion -d daarion_memory -P pager=off -c "
+SELECT ts,
+       trigger,
+       action,
+       raw->>'lesson_type' AS lesson_type,
+       raw->>'expires_at' AS expires_at,
+       evidence
+FROM agent_lessons
+WHERE raw->>'lesson_type'='anti_silent_tuning'
+ORDER BY ts DESC
+LIMIT 5;
+"
+```
+Expected: lesson with
+- `trigger=reason=unsupported_no_message;chat_type=group`
+- `action=prefer_template=SILENT_POLICY`
+
+### 5) Enable gateway tuning and verify apply
+```bash
+# set ANTI_SILENT_TUNING_ENABLED=true for gateway and restart container
+# then replay unsupported payload:
+
+curl -sS -X POST "$GATEWAY_WEBHOOK_URL" \
+  -H 'content-type: application/json' \
+  -d @docs/ops/payloads/phase5_payload_group_unsupported_no_message.json
+
+# verify latest gateway events
+
+docker exec "$PG_CONTAINER" psql -U daarion -d daarion_memory -P pager=off -c "
+SELECT ts,
+       raw->'policy'->>'reason' AS reason,
+       raw->>'chat_type' AS chat_type,
+       raw->>'anti_silent_template' AS template_id,
+       raw->>'anti_silent_tuning_applied' AS tuning_applied,
+       raw->>'anti_silent_action' AS anti_action
+FROM agent_experience_events
+WHERE source='gateway'
+  AND raw->'policy'->>'reason'='unsupported_no_message'
+ORDER BY ts DESC
+LIMIT 10;
+"
+```
+Expected: newest rows have
+- `template_id=SILENT_POLICY`
+- `tuning_applied=true`
+
+## PASS
+- Tuning lesson created only when evidence/score thresholds pass.
+- Gateway does not change template when feature flag is off.
+- With feature flag on, gateway applies active non-expired tuning lesson.
+- Expired lessons are ignored.
+
+## FAIL
+- Tuning lesson appears below evidence/score threshold.
+- Gateway changes template while feature flag is off.
+- Gateway applies expired lesson.
+- Webhook path fails when tuning lookup fails (must stay fail-open).
+
+## Manual Cleanup Query
+```sql
+DELETE FROM agent_lessons
+WHERE raw->>'lesson_type'='anti_silent_tuning'
+  AND raw->>'seed_test'='true';
+```
--- a/ops/smoke_phase6.sh
+++ b/ops/smoke_phase6.sh
@@ -0,0 +1,327 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT_DIR"
+
+PG_CONTAINER="${PG_CONTAINER:-dagi-postgres}"
+GW_CONTAINER="${GW_CONTAINER:-dagi-gateway-node1}"
+LEARNER_CONTAINER="${LEARNER_CONTAINER:-dagi-experience-learner-node1}"
+PG_USER="${PG_USER:-daarion}"
+PG_DB="${PG_DB:-daarion_memory}"
+GW_URL="${GW_URL:-http://127.0.0.1:9300/agromatrix/telegram/webhook}"
+COMPOSE_FILE="${COMPOSE_FILE:-docker-compose.node1.yml}"
+SEED_TOTAL="${PHASE6_SEED_TOTAL:-40}"
+SEED_WINNER_COUNT="${PHASE6_SEED_WINNER_COUNT:-20}"
+SMOKE_MIN_EVIDENCE="${PHASE6_SMOKE_MIN_EVIDENCE:-3}"
+SMOKE_MIN_SCORE="${PHASE6_SMOKE_MIN_SCORE:-0.5}"
+SMOKE_WINDOW_DAYS="${PHASE6_SMOKE_WINDOW_DAYS:-1}"
+SMOKE_TTL_DAYS="${PHASE6_SMOKE_TTL_DAYS:-1}"
+SMOKE_DB_TIMEOUT_MS="${PHASE6_SMOKE_DB_TIMEOUT_MS:-250}"
+
+if [[ "$SEED_WINNER_COUNT" -ge "$SEED_TOTAL" ]]; then
+  echo "[ERR] PHASE6_SEED_WINNER_COUNT must be smaller than PHASE6_SEED_TOTAL" >&2
+  exit 1
+fi
+
+RUN_ID="${PHASE6_RUN_ID:-$(date +%s)}"
+SEED_REASON="phase6_seed_reason_${RUN_ID}"
+SEED_TRIGGER="reason=${SEED_REASON};chat_type=group"
+TEST_CHAT_ID="${PHASE6_TEST_CHAT_ID:-778001}"
+CLEANUP_ENABLED="${PHASE6_CLEANUP:-1}"
+
+psql_exec() {
+  docker exec -i "$PG_CONTAINER" psql -v ON_ERROR_STOP=1 -U "$PG_USER" -d "$PG_DB" "$@"
+}
+
+psql_at() {
+  psql_exec -At -c "$1"
+}
+
+wait_gateway_health() {
+  local retries=30
+  local delay=1
+  for _ in $(seq 1 "$retries"); do
+    if curl -fsS "${GW_URL%/agromatrix/telegram/webhook}/health" >/dev/null 2>&1; then
+      return 0
+    fi
+    sleep "$delay"
+  done
+  echo "[ERR] gateway health check timed out" >&2
+  return 1
+}
+
+wait_learner_health() {
+  local retries=30
+  local delay=1
+  for _ in $(seq 1 "$retries"); do
+    if curl -fsS "http://127.0.0.1:9109/health" >/dev/null 2>&1; then
+      return 0
+    fi
+    sleep "$delay"
+  done
+  echo "[ERR] learner health check timed out" >&2
+  return 1
+}
+
+apply_gateway_flag() {
+  local flag="$1"
+  echo "[info] recreate gateway with ANTI_SILENT_TUNING_ENABLED=${flag}"
+  ANTI_SILENT_TUNING_ENABLED="$flag" \
+  ANTI_SILENT_TUNING_DB_TIMEOUT_MS="$SMOKE_DB_TIMEOUT_MS" \
+    docker compose -f "$COMPOSE_FILE" up -d --no-deps --build --force-recreate gateway >/dev/null
+  wait_gateway_health
+}
+
+apply_learner_smoke_thresholds() {
+  echo "[info] recreate experience-learner with smoke thresholds"
+  ANTI_SILENT_TUNING_MIN_EVIDENCE="$SMOKE_MIN_EVIDENCE" \
+  ANTI_SILENT_TUNING_MIN_SCORE="$SMOKE_MIN_SCORE" \
+  ANTI_SILENT_TUNING_WINDOW_DAYS="$SMOKE_WINDOW_DAYS" \
+  ANTI_SILENT_TUNING_TTL_DAYS="$SMOKE_TTL_DAYS" \
+    docker compose -f "$COMPOSE_FILE" up -d --no-deps --build --force-recreate experience-learner >/dev/null
+  wait_learner_health
+}
+
+restore_learner_defaults() {
+  echo "[info] restore experience-learner default thresholds"
+  docker compose -f "$COMPOSE_FILE" up -d --no-deps --build --force-recreate experience-learner >/dev/null
+  wait_learner_health
+}
+
+cleanup_phase6() {
+  if [[ "$CLEANUP_ENABLED" != "1" ]]; then
+    return 0
+  fi
+  echo "[cleanup] remove seed rows"
+  psql_exec <<SQL >/dev/null
+DELETE FROM agent_experience_events
+WHERE raw->>'seed_test' = 'true'
+  OR request_id LIKE 'phase6-seed-${RUN_ID}-%';
+
+DELETE FROM agent_lessons
+WHERE trigger = '${SEED_TRIGGER}'
+   OR raw->>'seed_test' = 'true';
+SQL
+}
+
+on_exit() {
+  local code=$?
+  if [[ "$CLEANUP_ENABLED" == "1" ]]; then
+    cleanup_phase6 || true
+  fi
+  restore_learner_defaults >/dev/null 2>&1 || true
+  apply_gateway_flag false >/dev/null 2>&1 || true
+  if [[ $code -ne 0 ]]; then
+    echo "[FAIL] Phase-6 smoke failed" >&2
+  fi
+  exit $code
+}
+trap on_exit EXIT
+
+echo "[precheck] containers"
+docker ps --format '{{.Names}}' | grep -q "^${PG_CONTAINER}$" || { echo "[ERR] missing ${PG_CONTAINER}" >&2; exit 1; }
+docker ps --format '{{.Names}}' | grep -q "^${GW_CONTAINER}$" || { echo "[ERR] missing ${GW_CONTAINER}" >&2; exit 1; }
+docker ps --format '{{.Names}}' | grep -q "^${LEARNER_CONTAINER}$" || { echo "[ERR] missing ${LEARNER_CONTAINER}" >&2; exit 1; }
+
+wait_gateway_health
+wait_learner_health
+apply_learner_smoke_thresholds
+
+echo "[seed] insert ${SEED_TOTAL} synthetic gateway events (reason=${SEED_REASON})"
+psql_exec <<SQL >/dev/null
+WITH seed AS (
+  SELECT
+    (
+      substr(md5(random()::text || clock_timestamp()::text), 1, 8) || '-' ||
+      substr(md5(random()::text || clock_timestamp()::text), 9, 4) || '-' ||
+      substr(md5(random()::text || clock_timestamp()::text), 13, 4) || '-' ||
+      substr(md5(random()::text || clock_timestamp()::text), 17, 4) || '-' ||
+      substr(md5(random()::text || clock_timestamp()::text), 21, 12)
+    )::uuid AS event_id,
+    now() - (g * interval '15 second') AS ts,
+    CASE WHEN g <= ${SEED_WINNER_COUNT} THEN 'SILENT_POLICY' ELSE 'UNSUPPORTED_INPUT' END AS template_id,
+    CASE WHEN g <= ${SEED_WINNER_COUNT} THEN 'none' ELSE 'retry' END AS user_signal
+  FROM generate_series(1, ${SEED_TOTAL}) g
+)
+INSERT INTO agent_experience_events (
+  event_id, ts, node_id, source, agent_id, task_type, request_id,
+  channel, inputs_hash, provider, model, profile, latency_ms,
+  tokens_in, tokens_out, ok, error_class, error_msg_redacted,
+  http_status, raw
+)
+SELECT
+  event_id,
+  ts,
+  'NODA1',
+  'gateway',
+  'agromatrix',
+  'webhook',
+  'phase6-seed-${RUN_ID}-' || event_id::text,
+  'telegram',
+  md5(event_id::text),
+  'gateway',
+  'gateway',
+  NULL,
+  20,
+  NULL,
+  NULL,
+  true,
+  NULL,
+  NULL,
+  200,
+  jsonb_build_object(
+    'seed_test', true,
+    'event_id', event_id::text,
+    'source', 'gateway',
+    'agent_id', 'agromatrix',
+    'chat_type', 'group',
+    'anti_silent_action', 'ACK_EMITTED',
+    'anti_silent_template', template_id,
+    'policy', jsonb_build_object('sowa_decision', 'SILENT', 'reason', '${SEED_REASON}'),
+    'feedback', jsonb_build_object('user_signal', user_signal),
+    'result', jsonb_build_object('ok', true, 'http_status', 200)
+  )
+FROM seed;
+SQL
+
+echo "[trigger] publish one matching event to JetStream"
+SEED_REASON_ENV="$SEED_REASON" docker exec -e SEED_REASON_ENV="$SEED_REASON" -i "$LEARNER_CONTAINER" python - <<'PY'
+import asyncio
+import json
+import os
+import uuid
+from datetime import datetime, timezone
+import nats
+
+reason = os.environ["SEED_REASON_ENV"]
+
+async def main():
+    nc = await nats.connect("nats://nats:4222")
+    js = nc.jetstream()
+    event = {
+        "event_id": str(uuid.uuid4()),
+        "ts": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+        "node_id": "NODA1",
+        "source": "gateway",
+        "agent_id": "agromatrix",
+        "request_id": "phase6-seed-pub-" + str(uuid.uuid4()),
+        "channel": "telegram",
+        "chat_type": "group",
+        "task_type": "webhook",
+        "inputs_hash": "phase6-seed-trigger",
+        "anti_silent_action": "ACK_EMITTED",
+        "anti_silent_template": "SILENT_POLICY",
+        "policy": {"sowa_decision": "SILENT", "reason": reason},
+        "feedback": {"user_signal": "none", "operator_tag": None},
+        "llm": {"provider": "gateway", "model": "gateway", "profile": None, "latency_ms": 20, "tokens_in": None, "tokens_out": None},
+        "result": {"ok": True, "http_status": 200, "error_class": None, "error_msg_redacted": None},
+    }
+    await js.publish("agent.experience.v1.agromatrix", json.dumps(event).encode("utf-8"), headers={"Nats-Msg-Id": event["event_id"]})
+    await nc.close()
+
+asyncio.run(main())
+PY
+
+
+echo "[assert] learner generated anti_silent_tuning lesson"
+lesson_action=""
+for _ in $(seq 1 25); do
+  lesson_action="$(psql_at "SELECT action FROM agent_lessons WHERE raw->>'lesson_type'='anti_silent_tuning' AND trigger='${SEED_TRIGGER}' ORDER BY ts DESC LIMIT 1;")"
+  if [[ -n "$lesson_action" ]]; then
+    break
+  fi
+  sleep 1
+done
+
+if [[ "$lesson_action" != "prefer_template=SILENT_POLICY" ]]; then
+  echo "[ERR] expected lesson action prefer_template=SILENT_POLICY, got: ${lesson_action:-<empty>}" >&2
+  exit 1
+fi
+
+echo "[seed] insert manual apply lesson (seed_test=true) for unsupported_no_message"
+manual_lesson_id="$(python3 - <<'PY'
+import uuid
+print(uuid.uuid4())
+PY
+)"
+manual_key="$(python3 - <<PY
+import hashlib
+print(hashlib.sha256(f"phase6-manual-apply-${RUN_ID}".encode()).hexdigest())
+PY
+)"
+
+psql_exec <<SQL >/dev/null
+INSERT INTO agent_lessons (
+  lesson_id, lesson_key, ts, scope, agent_id, task_type,
+  trigger, action, avoid, signals, evidence, raw
+) VALUES (
+  '${manual_lesson_id}'::uuid,
+  '${manual_key}',
+  now(),
+  'global',
+  NULL,
+  'webhook',
+  'reason=unsupported_no_message;chat_type=group',
+  'prefer_template=SILENT_POLICY',
+  'avoid_template=UNSUPPORTED_INPUT',
+  jsonb_build_object('lesson_type','anti_silent_tuning','policy_reason','unsupported_no_message','chat_type','group','seed_test',true),
+  jsonb_build_object('n_best',999,'score_best',1.0,'window_days',1),
+  jsonb_build_object(
+    'lesson_type','anti_silent_tuning',
+    'seed_test',true,
+    'trigger','reason=unsupported_no_message;chat_type=group',
+    'action','prefer_template=SILENT_POLICY',
+    'expires_at', to_char((now() + interval '2 hours') at time zone 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS"Z"')
+  )
+)
+ON CONFLICT (lesson_key) DO UPDATE SET
+  ts = EXCLUDED.ts,
+  action = EXCLUDED.action,
+  avoid = EXCLUDED.avoid,
+  signals = EXCLUDED.signals,
+  evidence = EXCLUDED.evidence,
+  raw = EXCLUDED.raw;
+SQL
+
+apply_gateway_flag true
+
+echo "[assert] gateway applies tuning when flag ON"
+ok_on=0
+for i in 1 2 3 4 5 6 7 8; do
+  resp_on="$(curl -sS -X POST "$GW_URL" -H 'content-type: application/json' -d "{\"update_id\":${RUN_ID}00${i},\"message\":{\"message_id\":${i},\"date\":0,\"chat\":{\"id\":${TEST_CHAT_ID},\"type\":\"group\"},\"from\":{\"id\":12345,\"is_bot\":false,\"first_name\":\"T\"},\"sticker\":{\"file_id\":\"phase6-on-${i}\"}}}")"
+  if RESP_ON="$resp_on" python3 - <<'PY'
+import json
+import os
+resp = json.loads(os.environ["RESP_ON"])
+if resp.get("template_id") == "SILENT_POLICY" and str(resp.get("tuning_applied", "")).lower() == "true":
+    print("[ok] tuning ON response:", resp)
+    raise SystemExit(0)
+raise SystemExit(1)
+PY
+  then
+    ok_on=1
+    break
+  fi
+  echo "[warn] tuning ON attempt ${i} did not match expected payload: ${resp_on}" >&2
+  sleep 2
+done
+if [[ "$ok_on" -ne 1 ]]; then
+  echo "[ERR] tuning ON assertion failed after retries" >&2
+  exit 1
+fi
+
+apply_gateway_flag false
+
+echo "[assert] gateway does not apply tuning when flag OFF"
+resp_off="$(curl -sS -X POST "$GW_URL" -H 'content-type: application/json' -d "{\"update_id\":${RUN_ID}002,\"message\":{\"message_id\":2,\"date\":0,\"chat\":{\"id\":${TEST_CHAT_ID},\"type\":\"group\"},\"from\":{\"id\":12345,\"is_bot\":false,\"first_name\":\"T\"},\"sticker\":{\"file_id\":\"phase6-off\"}}}")"
+RESP_OFF="$resp_off" python3 - <<'PY'
+import json
+import os
+resp = json.loads(os.environ["RESP_OFF"])
+if resp.get("template_id") != "UNSUPPORTED_INPUT" or str(resp.get("tuning_applied", "")).lower() != "false":
+    raise SystemExit(f"unexpected response with tuning OFF: {resp}")
+print("[ok] tuning OFF response:", resp)
+PY
+
+echo "[PASS] phase6 smoke completed"