feat(soak): add --sender-count rotation + --inter-msg-ms; add NODA1 runtime snapshot
Made-with: Cursor
This commit is contained in:
@@ -247,10 +247,28 @@ async def run_soak(
|
||||
max_p95_ms: float,
|
||||
max_drop_rate: float,
|
||||
db_path: str = "",
|
||||
sender_count: int = 1,
|
||||
inter_message_ms: float = 0.0,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
sender_count > 1: rotate senders @soak-0001:..., @soak-0002:..., etc.
|
||||
This avoids sender_rpm rate-limiting when testing invoke latency.
|
||||
|
||||
inter_message_ms > 0: sleep between each inject (spread load over time).
|
||||
"""
|
||||
results: List[tuple] = []
|
||||
semaphore = asyncio.Semaphore(concurrency)
|
||||
|
||||
# Build sender pool
|
||||
server = sender.split(":", 1)[-1] if ":" in sender else "daarion.space"
|
||||
sender_pool = (
|
||||
[sender] if sender_count <= 1
|
||||
else [f"@soak-{i:04d}:{server}" for i in range(sender_count)]
|
||||
)
|
||||
if sender_count > 1:
|
||||
print(f"[soak] Sender rotation: {sender_count} senders "
|
||||
f"(@soak-0000:{server} … @soak-{sender_count-1:04d}:{server})")
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
# Pre-check: inject endpoint + health
|
||||
preflight_err = await _preflight_inject(client, url, room_id)
|
||||
@@ -273,15 +291,19 @@ async def run_soak(
|
||||
fo_before = _parse_counter(metrics_before, "matrix_bridge_failover_total")
|
||||
|
||||
print(f"[soak] Bridge health before: {health_before.get('ok', '?')}")
|
||||
print(f"[soak] Starting {n_messages} messages (concurrency={concurrency}) ...")
|
||||
rl_note = f" (⚠️ rate_limited before={rl_before:.0f}, using {len(sender_pool)} sender(s))"
|
||||
print(f"[soak] Starting {n_messages} messages (concurrency={concurrency}) ...{rl_note}")
|
||||
|
||||
t_start = time.monotonic()
|
||||
|
||||
async def worker(i: int):
|
||||
async with semaphore:
|
||||
if inter_message_ms > 0:
|
||||
await asyncio.sleep(inter_message_ms / 1000.0)
|
||||
msg = f"soak-msg-{i:04d}"
|
||||
current_sender = sender_pool[i % len(sender_pool)]
|
||||
lat, status, err = await _send_one(
|
||||
client, url, agent_id, msg, room_id, sender
|
||||
client, url, agent_id, msg, room_id, current_sender
|
||||
)
|
||||
results.append((lat, status, err))
|
||||
if (i + 1) % max(1, n_messages // 10) == 0:
|
||||
@@ -382,6 +404,15 @@ async def run_soak(
|
||||
f"WAL grew {wal_delta:.1f}MB (threshold {report['wal']['threshold_mb']}MB) "
|
||||
"— possible SQLite write pressure (Bottleneck #2)"
|
||||
)
|
||||
# Rate-limited warning (not a failure, but surfaced prominently)
|
||||
rl_delta = m.get("rate_limited", 0)
|
||||
if rl_delta > 0:
|
||||
rl_pct = rl_delta / s["total_messages"] * 100
|
||||
report["warnings"] = report.get("warnings", [])
|
||||
report["warnings"].append(
|
||||
f"rate_limited={rl_delta:.0f} ({rl_pct:.0f}% of messages) — "
|
||||
"use --sender-count >= RATE_LIMIT_SENDER_RPM for invoke baseline"
|
||||
)
|
||||
|
||||
report["passed"] = len(failures) == 0
|
||||
report["failures"] = failures
|
||||
@@ -419,6 +450,9 @@ def _print_report(r: Dict[str, Any]) -> None:
|
||||
wal_warn = " ⚠️" if (w.get("delta_mb") or 0) > w.get("threshold_mb", 10) else ""
|
||||
print(f" WAL: {w['before_mb']}MB → {w['after_mb']}MB {wal_delta_str}{wal_warn}")
|
||||
print()
|
||||
if r.get("warnings"):
|
||||
for w in r["warnings"]:
|
||||
print(f" ⚠️ {w}")
|
||||
if r["failures"]:
|
||||
for f in r["failures"]:
|
||||
print(f" ❌ {f}")
|
||||
@@ -445,11 +479,18 @@ def main() -> int:
|
||||
help=f"Max p95 latency ms (default: {_DEFAULT_MAX_P95_MS})")
|
||||
parser.add_argument("--max-drop-rate",type=float, default=_DEFAULT_MAX_DROP_RATE,
|
||||
help=f"Max queue drop rate 0..1 (default: {_DEFAULT_MAX_DROP_RATE})")
|
||||
parser.add_argument("--report-file", default="",
|
||||
parser.add_argument("--report-file", default="",
|
||||
help="Optional path to write JSON report")
|
||||
parser.add_argument("--db-path", default="",
|
||||
parser.add_argument("--db-path", default="",
|
||||
help="Path to policy_store.db for WAL check "
|
||||
"(e.g. /opt/microdao-daarion/data/matrix_bridge.db)")
|
||||
parser.add_argument("--sender-count", type=int, default=1,
|
||||
help="Number of rotating senders (@soak-0001:server, ...). "
|
||||
"Use >= SENDER_RPM_LIMIT to avoid rate-limit during invoke baseline. "
|
||||
"Default: 1 (single sender, tests rate-limit behavior)")
|
||||
parser.add_argument("--inter-msg-ms", type=float, default=0.0,
|
||||
help="Sleep between each injected message (ms). "
|
||||
"Use to spread load over time (e.g. 100ms = ~10 rps). Default: 0")
|
||||
args = parser.parse_args()
|
||||
|
||||
report = asyncio.run(run_soak(
|
||||
@@ -462,6 +503,8 @@ def main() -> int:
|
||||
max_p95_ms=args.max_p95_ms,
|
||||
max_drop_rate=args.max_drop_rate,
|
||||
db_path=args.db_path,
|
||||
sender_count=args.sender_count,
|
||||
inter_message_ms=args.inter_msg_ms,
|
||||
))
|
||||
_print_report(report)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user