feat: MD pipeline — market-data-service hardening + SenpAI NATS consumer

Producer (market-data-service):
- Backpressure: smart drop policy (heartbeats→quotes→trades preserved)
- Heartbeat monitor: synthetic HeartbeatEvent on provider silence
- Graceful shutdown: WS→bus→storage→DB engine cleanup sequence
- Bybit V5 public WS provider (backup for Binance, no API key needed)
- FailoverManager: health-based provider switching with recovery
- NATS output adapter: md.events.{type}.{symbol} for SenpAI
- /bus-stats endpoint for backpressure monitoring
- Dockerfile + docker-compose.node1.yml integration
- 36 tests (parsing + bus + failover), requirements.lock

Consumer (senpai-md-consumer):
- NATSConsumer: subscribe md.events.>, queue group senpai-md, backpressure
- State store: LatestState + RollingWindow (deque, 60s)
- Feature engine: 11 features (mid, spread, VWAP, return, vol, latency)
- Rule-based signals: long/short on return+volume+spread conditions
- Publisher: rate-limited features + signals + alerts to NATS
- HTTP API: /health, /metrics, /state/latest, /features/latest, /stats
- 10 Prometheus metrics
- Dockerfile + docker-compose.senpai.yml
- 41 tests (parsing + state + features + rate-limit), requirements.lock

CI: ruff + pytest + smoke import for both services
Tests: 77 total passed, lint clean
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Apple
2026-02-09 11:46:15 -08:00
parent c50843933f
commit 09dee24342
47 changed files with 3930 additions and 56 deletions

View File

@@ -0,0 +1,212 @@
"""
Test feature computations — deterministic scenarios.
"""
import pytest
from senpai.md_consumer.features import (
_percentile,
_realized_vol,
_vwap,
check_signal,
compute_features,
)
from senpai.md_consumer.models import QuoteEvent, TradeEvent
from senpai.md_consumer.state import LatestState, TradeRecord
# ── VWAP ───────────────────────────────────────────────────────────────
def test_vwap_basic():
trades = [
TradeRecord(price=100.0, size=10.0, ts=0),
TradeRecord(price=200.0, size=10.0, ts=0),
]
# VWAP = (100*10 + 200*10) / (10+10) = 150
assert _vwap(trades) == 150.0
def test_vwap_weighted():
trades = [
TradeRecord(price=100.0, size=90.0, ts=0),
TradeRecord(price=200.0, size=10.0, ts=0),
]
# VWAP = (100*90 + 200*10) / 100 = 110
assert _vwap(trades) == 110.0
def test_vwap_empty():
assert _vwap([]) is None
def test_vwap_zero_volume():
trades = [TradeRecord(price=100.0, size=0.0, ts=0)]
assert _vwap(trades) is None
# ── Realized volatility ───────────────────────────────────────────────
def test_realized_vol_constant_price():
"""Constant price → 0 volatility."""
trades = [TradeRecord(price=100.0, size=1.0, ts=0) for _ in range(10)]
vol = _realized_vol(trades)
assert vol is not None
assert vol == 0.0
def test_realized_vol_two_prices():
"""Not enough data points → None."""
trades = [
TradeRecord(price=100.0, size=1.0, ts=0),
TradeRecord(price=101.0, size=1.0, ts=0),
]
assert _realized_vol(trades) is None # needs at least 3
def test_realized_vol_positive():
"""Variable prices should give positive volatility."""
trades = [
TradeRecord(price=100.0, size=1.0, ts=0),
TradeRecord(price=102.0, size=1.0, ts=0),
TradeRecord(price=99.0, size=1.0, ts=0),
TradeRecord(price=103.0, size=1.0, ts=0),
]
vol = _realized_vol(trades)
assert vol is not None
assert vol > 0
# ── Percentile ─────────────────────────────────────────────────────────
def test_percentile_basic():
data = [1.0, 2.0, 3.0, 4.0, 5.0]
assert _percentile(data, 50) == 3.0
assert _percentile(data, 0) == 1.0
assert _percentile(data, 100) == 5.0
def test_percentile_p95():
data = list(range(1, 101)) # 1..100
data_float = [float(x) for x in data]
p95 = _percentile(data_float, 95)
assert 95 <= p95 <= 96
# ── Full feature computation ──────────────────────────────────────────
def test_compute_features_with_state():
state = LatestState(window_seconds=60.0)
# Add quote
state.update_quote(QuoteEvent(
provider="binance",
symbol="BTCUSDT",
bid=70000.0,
ask=70002.0,
bid_size=5.0,
ask_size=3.0,
))
# Add some trades
for i in range(5):
state.update_trade(TradeEvent(
provider="binance",
symbol="BTCUSDT",
price=70000.0 + i * 10,
size=1.0,
))
features = compute_features(state, "BTCUSDT")
# Mid
assert features["mid"] == pytest.approx(70001.0)
# Spread
assert features["spread_abs"] == pytest.approx(2.0)
assert features["spread_bps"] is not None
assert features["spread_bps"] > 0
# Trade count
assert features["trade_count_10s"] == 5.0
# Volume
assert features["trade_volume_10s"] == 5.0
# VWAP should be defined
assert features["trade_vwap_10s"] is not None
assert features["trade_vwap_60s"] is not None
def test_compute_features_no_data():
state = LatestState(window_seconds=60.0)
features = compute_features(state, "BTCUSDT")
# All should be None
assert features["mid"] is None
assert features["spread_abs"] is None
assert features["trade_vwap_10s"] is None
# ── Signal detection ──────────────────────────────────────────────────
def test_check_signal_long():
"""Strong positive return + volume + tight spread → long signal."""
features = {
"return_10s": 0.005, # 0.5% (> 0.3% threshold)
"trade_volume_10s": 5.0, # > 1.0 threshold
"spread_bps": 3.0, # < 20 bps threshold
}
signal = check_signal(features, "BTCUSDT")
assert signal is not None
assert signal.direction == "long"
assert signal.confidence > 0
def test_check_signal_short():
"""Strong negative return → short signal."""
features = {
"return_10s": -0.005,
"trade_volume_10s": 5.0,
"spread_bps": 3.0,
}
signal = check_signal(features, "BTCUSDT")
assert signal is not None
assert signal.direction == "short"
def test_check_signal_no_trigger():
"""Small return → no signal."""
features = {
"return_10s": 0.0001,
"trade_volume_10s": 5.0,
"spread_bps": 3.0,
}
signal = check_signal(features, "BTCUSDT")
assert signal is None
def test_check_signal_wide_spread():
"""Wide spread → no signal (even with strong return)."""
features = {
"return_10s": 0.01,
"trade_volume_10s": 5.0,
"spread_bps": 50.0, # > 20 bps
}
signal = check_signal(features, "BTCUSDT")
assert signal is None
def test_check_signal_low_volume():
"""Low volume → no signal."""
features = {
"return_10s": 0.01,
"trade_volume_10s": 0.1, # < 1.0
"spread_bps": 3.0,
}
signal = check_signal(features, "BTCUSDT")
assert signal is None

View File

@@ -0,0 +1,154 @@
"""
Test event parsing from JSON payloads (mirrors market-data-service contracts).
"""
import json
from senpai.md_consumer.models import (
EventType,
TradeEvent,
QuoteEvent,
HeartbeatEvent,
parse_event,
)
# ── Trade events ───────────────────────────────────────────────────────
def test_parse_trade_basic():
data = {
"event_type": "trade",
"provider": "binance",
"symbol": "BTCUSDT",
"price": 70500.0,
"size": 1.5,
"ts_recv": "2026-02-09T12:00:00+00:00",
}
event = parse_event(data)
assert event is not None
assert isinstance(event, TradeEvent)
assert event.event_type == EventType.TRADE
assert event.symbol == "BTCUSDT"
assert event.price == 70500.0
assert event.size == 1.5
assert event.provider == "binance"
def test_parse_trade_with_extra_fields():
"""Unknown fields should be silently ignored (tolerant parsing)."""
data = {
"event_type": "trade",
"provider": "bybit",
"symbol": "ETHUSDT",
"price": 2100.0,
"size": 10.0,
"ts_recv": "2026-02-09T12:00:00+00:00",
"unknown_field": "should_be_ignored",
"another_extra": 42,
}
event = parse_event(data)
assert event is not None
assert event.symbol == "ETHUSDT"
def test_parse_trade_with_side_and_exchange_ts():
data = {
"event_type": "trade",
"provider": "binance",
"symbol": "BTCUSDT",
"price": 70000.0,
"size": 0.5,
"side": "buy",
"ts_exchange": "2026-02-09T12:00:00+00:00",
"ts_recv": "2026-02-09T12:00:00.100+00:00",
"trade_id": "t12345",
}
event = parse_event(data)
assert event.side == "buy"
assert event.trade_id == "t12345"
assert event.ts_exchange is not None
# ── Quote events ───────────────────────────────────────────────────────
def test_parse_quote_basic():
data = {
"event_type": "quote",
"provider": "binance",
"symbol": "BTCUSDT",
"bid": 70000.0,
"ask": 70001.0,
"bid_size": 5.0,
"ask_size": 3.0,
"ts_recv": "2026-02-09T12:00:00+00:00",
}
event = parse_event(data)
assert isinstance(event, QuoteEvent)
assert event.bid == 70000.0
assert event.ask == 70001.0
def test_parse_quote_zero_values():
data = {
"event_type": "quote",
"provider": "binance",
"symbol": "BTCUSDT",
"bid": 0.0,
"ask": 0.0,
"bid_size": 0.0,
"ask_size": 0.0,
}
event = parse_event(data)
assert event is not None
assert event.bid == 0.0
# ── Heartbeat events ──────────────────────────────────────────────────
def test_parse_heartbeat():
data = {
"event_type": "heartbeat",
"provider": "alpaca",
"ts_recv": "2026-02-09T12:00:00+00:00",
}
event = parse_event(data)
assert isinstance(event, HeartbeatEvent)
assert event.provider == "alpaca"
# ── Edge cases ─────────────────────────────────────────────────────────
def test_parse_unknown_type():
data = {"event_type": "unknown_type", "provider": "test"}
event = parse_event(data)
assert event is None
def test_parse_missing_type():
data = {"provider": "test", "symbol": "BTC"}
event = parse_event(data)
assert event is None
def test_parse_invalid_data():
data = {"event_type": "trade"} # missing required fields
event = parse_event(data)
assert event is None
def test_parse_empty_dict():
event = parse_event({})
assert event is None
def test_parse_from_json_bytes():
"""Simulate actual NATS message deserialization."""
raw = b'{"event_type":"trade","provider":"binance","symbol":"BTCUSDT","price":70500.0,"size":1.5}'
data = json.loads(raw)
event = parse_event(data)
assert event is not None
assert event.price == 70500.0

View File

@@ -0,0 +1,111 @@
"""
Test publisher rate limiting.
"""
from unittest.mock import AsyncMock
import pytest
from senpai.md_consumer.publisher import Publisher
from senpai.md_consumer.models import FeatureSnapshot, TradeSignal
@pytest.fixture
def mock_nc():
"""Mock NATS client."""
nc = AsyncMock()
nc.publish = AsyncMock()
return nc
@pytest.fixture
def publisher(mock_nc):
return Publisher(mock_nc)
@pytest.mark.asyncio
async def test_publish_features_respects_rate_limit(mock_nc, publisher):
"""Second publish for same symbol within rate window should be skipped."""
snapshot = FeatureSnapshot(
symbol="BTCUSDT",
features={"mid": 70000.0},
)
# First publish should succeed
result1 = await publisher.publish_features(snapshot)
assert result1 is True
# Immediate second publish should be rate-limited
result2 = await publisher.publish_features(snapshot)
assert result2 is False # rate-limited
# Only one actual NATS publish
assert mock_nc.publish.call_count == 1
@pytest.mark.asyncio
async def test_publish_features_different_symbols(mock_nc, publisher):
"""Different symbols have independent rate limiters."""
snap1 = FeatureSnapshot(symbol="BTCUSDT", features={"mid": 70000.0})
snap2 = FeatureSnapshot(symbol="ETHUSDT", features={"mid": 2000.0})
r1 = await publisher.publish_features(snap1)
r2 = await publisher.publish_features(snap2)
assert r1 is True
assert r2 is True
assert mock_nc.publish.call_count == 2
@pytest.mark.asyncio
async def test_publish_signal_no_rate_limit(mock_nc, publisher):
"""Signals are NOT rate limited."""
signal = TradeSignal(
symbol="BTCUSDT",
direction="long",
confidence=0.8,
reason="test",
)
r1 = await publisher.publish_signal(signal)
r2 = await publisher.publish_signal(signal)
assert r1 is True
assert r2 is True
assert mock_nc.publish.call_count == 2
@pytest.mark.asyncio
async def test_publish_features_after_rate_window(mock_nc, publisher):
"""After rate window passes, publish should succeed again."""
# Override min interval to something very small for testing
publisher._min_interval = 0.01 # 10ms
snapshot = FeatureSnapshot(
symbol="BTCUSDT",
features={"mid": 70000.0},
)
r1 = await publisher.publish_features(snapshot)
assert r1 is True
# Wait for rate window to pass
import asyncio
await asyncio.sleep(0.02)
r2 = await publisher.publish_features(snapshot)
assert r2 is True
assert mock_nc.publish.call_count == 2
@pytest.mark.asyncio
async def test_publish_handles_nats_error(mock_nc, publisher):
"""NATS publish error should not raise, just return False."""
mock_nc.publish.side_effect = Exception("NATS down")
snapshot = FeatureSnapshot(
symbol="BTCUSDT",
features={"mid": 70000.0},
)
result = await publisher.publish_features(snapshot)
assert result is False

View File

@@ -0,0 +1,138 @@
"""
Test state management — LatestState and RollingWindow.
"""
import time
from senpai.md_consumer.state import (
LatestState,
RollingWindow,
TradeRecord,
)
from senpai.md_consumer.models import TradeEvent, QuoteEvent
# ── RollingWindow ──────────────────────────────────────────────────────
def test_rolling_window_add_trade():
w = RollingWindow(window_seconds=60.0)
t = TradeRecord(price=100.0, size=1.0, ts=time.monotonic())
w.add_trade(t)
assert len(w.trades) == 1
assert w.trades[0].price == 100.0
def test_rolling_window_eviction():
"""Old records should be evicted."""
w = RollingWindow(window_seconds=1.0) # 1 second window
old_ts = time.monotonic() - 2.0 # 2 seconds ago
w.add_trade(TradeRecord(price=100.0, size=1.0, ts=old_ts))
w.add_trade(TradeRecord(price=200.0, size=2.0, ts=time.monotonic()))
# Old record should be evicted
trades = list(w.trades)
assert len(trades) == 1
assert trades[0].price == 200.0
def test_rolling_window_trades_since():
w = RollingWindow(window_seconds=60.0)
now = time.monotonic()
# Add trades at different times
w.add_trade(TradeRecord(price=100.0, size=1.0, ts=now - 30)) # 30s ago
w.add_trade(TradeRecord(price=200.0, size=2.0, ts=now - 5)) # 5s ago
w.add_trade(TradeRecord(price=300.0, size=3.0, ts=now)) # now
last_10s = w.trades_since(10.0)
assert len(last_10s) == 2 # 5s ago + now
assert last_10s[0].price == 200.0
def test_rolling_window_empty():
w = RollingWindow(window_seconds=60.0)
assert len(w.trades) == 0
assert len(w.quotes) == 0
assert w.trades_since(10.0) == []
# ── LatestState ────────────────────────────────────────────────────────
def test_latest_state_update_trade():
state = LatestState(window_seconds=60.0)
event = TradeEvent(
provider="binance",
symbol="BTCUSDT",
price=70500.0,
size=1.5,
side="buy",
)
state.update_trade(event)
latest = state.get_latest_trade("BTCUSDT")
assert latest is not None
assert latest.price == 70500.0
assert latest.side == "buy"
assert state.event_count == 1
def test_latest_state_update_quote():
state = LatestState(window_seconds=60.0)
event = QuoteEvent(
provider="binance",
symbol="BTCUSDT",
bid=70000.0,
ask=70001.0,
bid_size=5.0,
ask_size=3.0,
)
state.update_quote(event)
latest = state.get_latest_quote("BTCUSDT")
assert latest is not None
assert latest.bid == 70000.0
assert latest.ask == 70001.0
def test_latest_state_symbols():
state = LatestState(window_seconds=60.0)
state.update_trade(TradeEvent(
provider="binance", symbol="BTCUSDT", price=100.0, size=1.0
))
state.update_quote(QuoteEvent(
provider="binance", symbol="ETHUSDT",
bid=2000.0, ask=2001.0, bid_size=1.0, ask_size=1.0,
))
assert "BTCUSDT" in state.symbols
assert "ETHUSDT" in state.symbols
def test_latest_state_to_dict():
state = LatestState(window_seconds=60.0)
state.update_trade(TradeEvent(
provider="binance", symbol="BTCUSDT", price=70500.0, size=1.0
))
state.update_quote(QuoteEvent(
provider="binance", symbol="BTCUSDT",
bid=70000.0, ask=70001.0, bid_size=1.0, ask_size=1.0,
))
d = state.to_dict("BTCUSDT")
assert d["symbol"] == "BTCUSDT"
assert "latest_trade" in d
assert "latest_quote" in d
assert d["latest_trade"]["price"] == 70500.0
def test_latest_state_missing_symbol():
state = LatestState(window_seconds=60.0)
assert state.get_latest_trade("NOPE") is None
assert state.get_latest_quote("NOPE") is None