Files
microdao-daarion/services/senpai-md-consumer/senpai/md_consumer/nats_consumer.py
Apple 09dee24342 feat: MD pipeline — market-data-service hardening + SenpAI NATS consumer
Producer (market-data-service):
- Backpressure: smart drop policy (heartbeats→quotes→trades preserved)
- Heartbeat monitor: synthetic HeartbeatEvent on provider silence
- Graceful shutdown: WS→bus→storage→DB engine cleanup sequence
- Bybit V5 public WS provider (backup for Binance, no API key needed)
- FailoverManager: health-based provider switching with recovery
- NATS output adapter: md.events.{type}.{symbol} for SenpAI
- /bus-stats endpoint for backpressure monitoring
- Dockerfile + docker-compose.node1.yml integration
- 36 tests (parsing + bus + failover), requirements.lock

Consumer (senpai-md-consumer):
- NATSConsumer: subscribe md.events.>, queue group senpai-md, backpressure
- State store: LatestState + RollingWindow (deque, 60s)
- Feature engine: 11 features (mid, spread, VWAP, return, vol, latency)
- Rule-based signals: long/short on return+volume+spread conditions
- Publisher: rate-limited features + signals + alerts to NATS
- HTTP API: /health, /metrics, /state/latest, /features/latest, /stats
- 10 Prometheus metrics
- Dockerfile + docker-compose.senpai.yml
- 41 tests (parsing + state + features + rate-limit), requirements.lock

CI: ruff + pytest + smoke import for both services
Tests: 77 total passed, lint clean
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-09 11:46:15 -08:00

230 lines
7.1 KiB
Python

"""
NATS consumer — subscribes to md.events.> and feeds the processing pipeline.
Features:
- Queue group subscription (horizontal scaling)
- Bounded asyncio.Queue with backpressure drop policy
- Auto-reconnect via nats-py
- Optional JetStream durable consumer
"""
from __future__ import annotations
import asyncio
import json
import logging
import nats
from nats.aio.client import Client as NatsClient
from nats.aio.msg import Msg
from senpai.md_consumer.config import settings
from senpai.md_consumer.models import EventType, parse_event, Event
from senpai.md_consumer import metrics as m
logger = logging.getLogger(__name__)
# Events that can be dropped under backpressure (lowest priority first)
_DROPPABLE = {EventType.HEARTBEAT, EventType.QUOTE, EventType.BOOK_L2}
class NATSConsumer:
"""
Reads normalised events from NATS, validates, and puts into
a bounded asyncio.Queue for downstream processing.
Backpressure policy:
- Queue < 90% → accept all events
- Queue >= 90% → drop heartbeats, quotes, book snapshots
- Trades are NEVER dropped (critical for analytics)
"""
def __init__(self) -> None:
self._nc: NatsClient | None = None
self._sub = None
self._js_sub = None
self._queue: asyncio.Queue[Event] = asyncio.Queue(
maxsize=settings.queue_size
)
self._running = False
self._drop_count: dict[str, int] = {}
@property
def queue(self) -> asyncio.Queue[Event]:
return self._queue
@property
def queue_fill_ratio(self) -> float:
if settings.queue_size <= 0:
return 0.0
return self._queue.qsize() / settings.queue_size
async def connect(self) -> None:
"""Connect to NATS with auto-reconnect."""
self._nc = await nats.connect(
self._url,
reconnect_time_wait=2,
max_reconnect_attempts=-1,
name="senpai-md-consumer",
error_cb=self._on_error,
disconnected_cb=self._on_disconnected,
reconnected_cb=self._on_reconnected,
closed_cb=self._on_closed,
)
m.NATS_CONNECTED.set(1)
logger.info(
"nats.connected",
extra={"url": self._url, "subject": settings.nats_subject},
)
@property
def _url(self) -> str:
return settings.nats_url
async def subscribe(self) -> None:
"""Subscribe to market data events."""
if not self._nc:
raise RuntimeError("Not connected. Call connect() first.")
if settings.use_jetstream:
await self._subscribe_jetstream()
else:
await self._subscribe_core()
async def _subscribe_core(self) -> None:
"""Core NATS subscription with queue group."""
self._sub = await self._nc.subscribe(
settings.nats_subject,
queue=settings.nats_queue_group,
cb=self._on_message,
)
logger.info(
"nats.subscribed_core",
extra={
"subject": settings.nats_subject,
"queue_group": settings.nats_queue_group,
},
)
async def _subscribe_jetstream(self) -> None:
"""JetStream durable subscription."""
js = self._nc.jetstream()
# Try to create or bind to existing consumer
self._js_sub = await js.subscribe(
settings.nats_subject,
queue=settings.nats_queue_group,
durable="senpai-md-durable",
cb=self._on_message,
manual_ack=True,
)
logger.info(
"nats.subscribed_jetstream",
extra={
"subject": settings.nats_subject,
"durable": "senpai-md-durable",
},
)
async def _on_message(self, msg: Msg) -> None:
"""
Callback for each NATS message.
Parse → backpressure check → enqueue.
"""
try:
data = json.loads(msg.data)
except (json.JSONDecodeError, UnicodeDecodeError) as e:
m.EVENTS_DROPPED.labels(reason="parse_error", event_type="unknown").inc()
logger.warning("nats.parse_error", extra={"error": str(e)})
if settings.use_jetstream:
await msg.ack()
return
event = parse_event(data)
if event is None:
m.EVENTS_DROPPED.labels(reason="invalid_event", event_type="unknown").inc()
if settings.use_jetstream:
await msg.ack()
return
# Track inbound
m.EVENTS_IN.labels(
event_type=event.event_type.value,
provider=event.provider,
).inc()
# Backpressure check
fill = self.queue_fill_ratio
m.QUEUE_FILL.set(fill)
m.QUEUE_SIZE.set(self._queue.qsize())
if fill >= settings.queue_drop_threshold:
# Under pressure: only accept trades
if event.event_type in _DROPPABLE:
et = event.event_type.value
self._drop_count[et] = self._drop_count.get(et, 0) + 1
m.EVENTS_DROPPED.labels(
reason="backpressure",
event_type=et,
).inc()
if self._drop_count[et] % 1000 == 1:
logger.warning(
"nats.backpressure_drop",
extra={
"type": et,
"fill": f"{fill:.0%}",
"total_drops": self._drop_count,
},
)
if settings.use_jetstream:
await msg.ack()
return
# Enqueue
try:
self._queue.put_nowait(event)
except asyncio.QueueFull:
# Last resort: try to drop oldest non-trade
m.EVENTS_DROPPED.labels(
reason="queue_full", event_type=event.event_type.value
).inc()
if settings.use_jetstream:
await msg.ack()
async def close(self) -> None:
"""Graceful shutdown."""
self._running = False
if self._sub:
try:
await self._sub.unsubscribe()
except Exception:
pass
if self._nc:
try:
await self._nc.flush(timeout=5)
await self._nc.close()
except Exception:
pass
m.NATS_CONNECTED.set(0)
logger.info("nats.closed", extra={"drops": self._drop_count})
# ── NATS callbacks ────────────────────────────────────────────────
async def _on_error(self, e: Exception) -> None:
logger.error("nats.error", extra={"error": str(e)})
async def _on_disconnected(self) -> None:
m.NATS_CONNECTED.set(0)
logger.warning("nats.disconnected")
async def _on_reconnected(self) -> None:
m.NATS_CONNECTED.set(1)
m.NATS_RECONNECTS.inc()
logger.info("nats.reconnected")
async def _on_closed(self) -> None:
m.NATS_CONNECTED.set(0)
logger.info("nats.closed_callback")