feat: MD pipeline — market-data-service hardening + SenpAI NATS consumer

Producer (market-data-service):
- Backpressure: smart drop policy (heartbeats→quotes→trades preserved)
- Heartbeat monitor: synthetic HeartbeatEvent on provider silence
- Graceful shutdown: WS→bus→storage→DB engine cleanup sequence
- Bybit V5 public WS provider (backup for Binance, no API key needed)
- FailoverManager: health-based provider switching with recovery
- NATS output adapter: md.events.{type}.{symbol} for SenpAI
- /bus-stats endpoint for backpressure monitoring
- Dockerfile + docker-compose.node1.yml integration
- 36 tests (parsing + bus + failover), requirements.lock

Consumer (senpai-md-consumer):
- NATSConsumer: subscribe md.events.>, queue group senpai-md, backpressure
- State store: LatestState + RollingWindow (deque, 60s)
- Feature engine: 11 features (mid, spread, VWAP, return, vol, latency)
- Rule-based signals: long/short on return+volume+spread conditions
- Publisher: rate-limited features + signals + alerts to NATS
- HTTP API: /health, /metrics, /state/latest, /features/latest, /stats
- 10 Prometheus metrics
- Dockerfile + docker-compose.senpai.yml
- 41 tests (parsing + state + features + rate-limit), requirements.lock

CI: ruff + pytest + smoke import for both services
Tests: 77 total passed, lint clean
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Apple
2026-02-09 11:46:15 -08:00
parent c50843933f
commit 09dee24342
47 changed files with 3930 additions and 56 deletions

View File

@@ -0,0 +1,270 @@
"""
SenpAI Market-Data Consumer — entry point.
Orchestrates:
1. NATS subscription (md.events.>)
2. Event processing → state updates → feature computation
3. Feature/signal/alert publishing back to NATS
4. HTTP API for monitoring
Usage:
python -m senpai.md_consumer
"""
from __future__ import annotations
import asyncio
import logging
import signal
import time
import structlog
from senpai.md_consumer import api
from senpai.md_consumer import metrics as m
from senpai.md_consumer.config import settings
from senpai.md_consumer.features import (
check_signal,
make_feature_snapshot,
compute_features,
)
from senpai.md_consumer.models import (
AlertEvent,
EventType,
TradeEvent,
QuoteEvent,
)
from senpai.md_consumer.nats_consumer import NATSConsumer
from senpai.md_consumer.publisher import Publisher
from senpai.md_consumer.state import LatestState
logger = structlog.get_logger()
# ── Logging setup ──────────────────────────────────────────────────────
def setup_logging() -> None:
log_level = getattr(logging, settings.log_level.upper(), logging.INFO)
structlog.configure(
processors=[
structlog.contextvars.merge_contextvars,
structlog.processors.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.dev.ConsoleRenderer(),
],
wrapper_class=structlog.make_filtering_bound_logger(log_level),
context_class=dict,
logger_factory=structlog.PrintLoggerFactory(),
)
logging.basicConfig(level=log_level, format="%(message)s")
# ── Processing pipeline ───────────────────────────────────────────────
async def process_events(
consumer: NATSConsumer,
state: LatestState,
publisher: Publisher,
) -> None:
"""
Main processing loop:
1. Read event from queue
2. Update state
3. Compute features
4. Publish features + check signals
5. Check alerts
"""
last_alert_check = time.monotonic()
events_per_sec_count = 0
time.monotonic()
while True:
try:
event = await consumer.queue.get()
except asyncio.CancelledError:
break
proc_start = time.monotonic()
try:
# Update state based on event type
if event.event_type == EventType.TRADE:
assert isinstance(event, TradeEvent)
state.update_trade(event)
symbol = event.symbol
elif event.event_type == EventType.QUOTE:
assert isinstance(event, QuoteEvent)
state.update_quote(event)
symbol = event.symbol
elif event.event_type == EventType.HEARTBEAT:
# Heartbeats don't update state, just track
symbol = None
elif event.event_type == EventType.BOOK_L2:
# TODO: book updates
symbol = None
else:
symbol = None
# Compute features + publish (only for trade/quote events)
if symbol and settings.features_enabled:
snapshot = make_feature_snapshot(state, symbol)
await publisher.publish_features(snapshot)
# Check for trade signal
sig = check_signal(snapshot.features, symbol)
if sig:
await publisher.publish_signal(sig)
# Processing latency metric
proc_ms = (time.monotonic() - proc_start) * 1000
m.PROCESSING_LATENCY.observe(proc_ms)
# Events/sec tracking
events_per_sec_count += 1
except Exception as e:
logger.error(
"process.error",
error=str(e),
event_type=event.event_type.value if event else "?",
)
# Periodic alert checks (every 5 seconds)
now = time.monotonic()
if now - last_alert_check > 5.0:
last_alert_check = now
await _check_alerts(state, publisher, consumer)
async def _check_alerts(
state: LatestState,
publisher: Publisher,
consumer: NATSConsumer,
) -> None:
"""Check alert conditions and emit if needed."""
# Backpressure alert
fill = consumer.queue_fill_ratio
if fill > 0.8:
await publisher.publish_alert(
AlertEvent(
alert_type="backpressure",
level="warning" if fill < 0.95 else "critical",
message=f"Queue fill at {fill:.0%}",
details={"fill_ratio": fill},
)
)
# Latency alert (per symbol)
for sym in state.symbols:
features = compute_features(state, sym)
p95 = features.get("latency_ms_p95")
if p95 is not None and p95 > settings.alert_latency_ms:
await publisher.publish_alert(
AlertEvent(
alert_type="latency",
level="warning",
message=f"{sym} p95 latency {p95:.0f}ms > {settings.alert_latency_ms}ms",
details={"symbol": sym, "p95_ms": p95},
)
)
# ── Main ───────────────────────────────────────────────────────────────
async def main() -> None:
setup_logging()
logger.info("service.starting", nats_url=settings.nats_url)
# State store
state = LatestState(window_seconds=settings.rolling_window_seconds)
# NATS consumer
consumer = NATSConsumer()
await consumer.connect()
await consumer.subscribe()
# Publisher (reuses same NATS connection)
publisher = Publisher(consumer._nc)
# Wire up API
api.set_state(state)
def _get_stats() -> dict:
return {
"queue_size": consumer.queue.qsize(),
"queue_fill_ratio": round(consumer.queue_fill_ratio, 3),
"queue_max": settings.queue_size,
"events_processed": state.event_count,
"symbols_tracked": state.symbols,
"features_enabled": settings.features_enabled,
"nats_connected": bool(consumer._nc and consumer._nc.is_connected),
}
api.set_stats_fn(_get_stats)
# Start HTTP API
http_server = await api.start_api()
# Start processing loop
process_task = asyncio.create_task(
process_events(consumer, state, publisher)
)
# Graceful shutdown
shutdown_event = asyncio.Event()
def _signal_handler():
logger.info("service.shutdown_signal")
shutdown_event.set()
loop = asyncio.get_event_loop()
for sig in (signal.SIGINT, signal.SIGTERM):
try:
loop.add_signal_handler(sig, _signal_handler)
except NotImplementedError:
pass
logger.info(
"service.ready",
subject=settings.nats_subject,
queue_group=settings.nats_queue_group,
http_port=settings.http_port,
features_enabled=settings.features_enabled,
)
# Wait for shutdown
await shutdown_event.wait()
# ── Cleanup ───────────────────────────────────────────────────────
logger.info("service.shutting_down")
process_task.cancel()
try:
await process_task
except asyncio.CancelledError:
pass
await consumer.close()
http_server.close()
await http_server.wait_closed()
logger.info(
"service.stopped",
events_processed=state.event_count,
symbols=state.symbols,
)
def cli():
asyncio.run(main())
if __name__ == "__main__":
cli()