feat(soak): add --sender-count rotation + --inter-msg-ms; add NODA1 runtime snapshot

Made-with: Cursor
This commit is contained in:
Apple
2026-03-05 08:06:31 -08:00
parent e1d73ebc98
commit e12c99903d
2 changed files with 264 additions and 4 deletions

View File

@@ -247,10 +247,28 @@ async def run_soak(
max_p95_ms: float,
max_drop_rate: float,
db_path: str = "",
sender_count: int = 1,
inter_message_ms: float = 0.0,
) -> Dict[str, Any]:
"""
sender_count > 1: rotate senders @soak-0001:..., @soak-0002:..., etc.
This avoids sender_rpm rate-limiting when testing invoke latency.
inter_message_ms > 0: sleep between each inject (spread load over time).
"""
results: List[tuple] = []
semaphore = asyncio.Semaphore(concurrency)
# Build sender pool
server = sender.split(":", 1)[-1] if ":" in sender else "daarion.space"
sender_pool = (
[sender] if sender_count <= 1
else [f"@soak-{i:04d}:{server}" for i in range(sender_count)]
)
if sender_count > 1:
print(f"[soak] Sender rotation: {sender_count} senders "
f"(@soak-0000:{server} … @soak-{sender_count-1:04d}:{server})")
async with httpx.AsyncClient() as client:
# Pre-check: inject endpoint + health
preflight_err = await _preflight_inject(client, url, room_id)
@@ -273,15 +291,19 @@ async def run_soak(
fo_before = _parse_counter(metrics_before, "matrix_bridge_failover_total")
print(f"[soak] Bridge health before: {health_before.get('ok', '?')}")
print(f"[soak] Starting {n_messages} messages (concurrency={concurrency}) ...")
rl_note = f" (⚠️ rate_limited before={rl_before:.0f}, using {len(sender_pool)} sender(s))"
print(f"[soak] Starting {n_messages} messages (concurrency={concurrency}) ...{rl_note}")
t_start = time.monotonic()
async def worker(i: int):
async with semaphore:
if inter_message_ms > 0:
await asyncio.sleep(inter_message_ms / 1000.0)
msg = f"soak-msg-{i:04d}"
current_sender = sender_pool[i % len(sender_pool)]
lat, status, err = await _send_one(
client, url, agent_id, msg, room_id, sender
client, url, agent_id, msg, room_id, current_sender
)
results.append((lat, status, err))
if (i + 1) % max(1, n_messages // 10) == 0:
@@ -382,6 +404,15 @@ async def run_soak(
f"WAL grew {wal_delta:.1f}MB (threshold {report['wal']['threshold_mb']}MB) "
"— possible SQLite write pressure (Bottleneck #2)"
)
# Rate-limited warning (not a failure, but surfaced prominently)
rl_delta = m.get("rate_limited", 0)
if rl_delta > 0:
rl_pct = rl_delta / s["total_messages"] * 100
report["warnings"] = report.get("warnings", [])
report["warnings"].append(
f"rate_limited={rl_delta:.0f} ({rl_pct:.0f}% of messages) — "
"use --sender-count >= RATE_LIMIT_SENDER_RPM for invoke baseline"
)
report["passed"] = len(failures) == 0
report["failures"] = failures
@@ -419,6 +450,9 @@ def _print_report(r: Dict[str, Any]) -> None:
wal_warn = " ⚠️" if (w.get("delta_mb") or 0) > w.get("threshold_mb", 10) else ""
print(f" WAL: {w['before_mb']}MB → {w['after_mb']}MB {wal_delta_str}{wal_warn}")
print()
if r.get("warnings"):
for w in r["warnings"]:
print(f" ⚠️ {w}")
if r["failures"]:
for f in r["failures"]:
print(f"{f}")
@@ -445,11 +479,18 @@ def main() -> int:
help=f"Max p95 latency ms (default: {_DEFAULT_MAX_P95_MS})")
parser.add_argument("--max-drop-rate",type=float, default=_DEFAULT_MAX_DROP_RATE,
help=f"Max queue drop rate 0..1 (default: {_DEFAULT_MAX_DROP_RATE})")
parser.add_argument("--report-file", default="",
parser.add_argument("--report-file", default="",
help="Optional path to write JSON report")
parser.add_argument("--db-path", default="",
parser.add_argument("--db-path", default="",
help="Path to policy_store.db for WAL check "
"(e.g. /opt/microdao-daarion/data/matrix_bridge.db)")
parser.add_argument("--sender-count", type=int, default=1,
help="Number of rotating senders (@soak-0001:server, ...). "
"Use >= SENDER_RPM_LIMIT to avoid rate-limit during invoke baseline. "
"Default: 1 (single sender, tests rate-limit behavior)")
parser.add_argument("--inter-msg-ms", type=float, default=0.0,
help="Sleep between each injected message (ms). "
"Use to spread load over time (e.g. 100ms = ~10 rps). Default: 0")
args = parser.parse_args()
report = asyncio.run(run_soak(
@@ -462,6 +503,8 @@ def main() -> int:
max_p95_ms=args.max_p95_ms,
max_drop_rate=args.max_drop_rate,
db_path=args.db_path,
sender_count=args.sender_count,
inter_message_ms=args.inter_msg_ms,
))
_print_report(report)