feat: MD pipeline — market-data-service hardening + SenpAI NATS consumer
Producer (market-data-service):
- Backpressure: smart drop policy (heartbeats→quotes→trades preserved)
- Heartbeat monitor: synthetic HeartbeatEvent on provider silence
- Graceful shutdown: WS→bus→storage→DB engine cleanup sequence
- Bybit V5 public WS provider (backup for Binance, no API key needed)
- FailoverManager: health-based provider switching with recovery
- NATS output adapter: md.events.{type}.{symbol} for SenpAI
- /bus-stats endpoint for backpressure monitoring
- Dockerfile + docker-compose.node1.yml integration
- 36 tests (parsing + bus + failover), requirements.lock
Consumer (senpai-md-consumer):
- NATSConsumer: subscribe md.events.>, queue group senpai-md, backpressure
- State store: LatestState + RollingWindow (deque, 60s)
- Feature engine: 11 features (mid, spread, VWAP, return, vol, latency)
- Rule-based signals: long/short on return+volume+spread conditions
- Publisher: rate-limited features + signals + alerts to NATS
- HTTP API: /health, /metrics, /state/latest, /features/latest, /stats
- 10 Prometheus metrics
- Dockerfile + docker-compose.senpai.yml
- 41 tests (parsing + state + features + rate-limit), requirements.lock
CI: ruff + pytest + smoke import for both services
Tests: 77 total passed, lint clean
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -18,7 +18,6 @@ import asyncio
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import structlog
|
||||
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
||||
@@ -26,14 +25,18 @@ from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
||||
from app.config import settings
|
||||
from app.core.bus import EventBus
|
||||
from app.consumers.metrics import MetricsConsumer
|
||||
from app.consumers.nats_output import NatsOutputConsumer
|
||||
from app.consumers.print import PrintConsumer
|
||||
from app.consumers.storage import StorageConsumer
|
||||
from app.db.schema import init_db
|
||||
from app.db.schema import engine, init_db
|
||||
from app.db import repo
|
||||
from app.providers import MarketDataProvider, get_provider
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Global reference to bus (for HTTP status endpoint)
|
||||
_bus: EventBus | None = None
|
||||
|
||||
# ── Logging setup ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -105,6 +108,18 @@ async def _http_handler(reader: asyncio.StreamReader, writer: asyncio.StreamWrit
|
||||
}
|
||||
body = json.dumps(result, ensure_ascii=False).encode()
|
||||
content_type = "application/json"
|
||||
elif path == "/bus-stats":
|
||||
import json as _json
|
||||
|
||||
bus_info = {"queue_size": 0, "fill_percent": 0.0}
|
||||
if _bus:
|
||||
bus_info = {
|
||||
"queue_size": _bus.queue_size,
|
||||
"fill_percent": round(_bus.fill_percent * 100, 1),
|
||||
"max_size": _bus._max_size,
|
||||
}
|
||||
body = _json.dumps(bus_info).encode()
|
||||
content_type = "application/json"
|
||||
else:
|
||||
body = b'{"error":"not found"}'
|
||||
content_type = "application/json"
|
||||
@@ -179,8 +194,13 @@ async def main(provider_names: list[str], symbols: list[str]) -> None:
|
||||
# Init database
|
||||
await init_db()
|
||||
|
||||
# Setup bus + consumers
|
||||
bus = EventBus()
|
||||
global _bus
|
||||
|
||||
# Setup bus + consumers (heartbeat interval from config)
|
||||
bus = EventBus(
|
||||
queue_size=10_000,
|
||||
heartbeat_interval=settings.heartbeat_timeout / 2, # check twice per timeout
|
||||
)
|
||||
|
||||
storage = StorageConsumer()
|
||||
await storage.start()
|
||||
@@ -192,16 +212,29 @@ async def main(provider_names: list[str], symbols: list[str]) -> None:
|
||||
printer = PrintConsumer()
|
||||
bus.add_consumer(printer)
|
||||
|
||||
# Optional: NATS output adapter
|
||||
nats_consumer = None
|
||||
if settings.nats_configured:
|
||||
nats_consumer = NatsOutputConsumer()
|
||||
await nats_consumer.start()
|
||||
bus.add_consumer(nats_consumer)
|
||||
logger.info("nats_output.enabled", subject_prefix=settings.nats_subject_prefix)
|
||||
else:
|
||||
logger.info("nats_output.disabled", hint="Set NATS_URL + NATS_ENABLED=true to enable")
|
||||
|
||||
# Create providers and register them for heartbeat monitoring
|
||||
providers: list[MarketDataProvider] = []
|
||||
for name in provider_names:
|
||||
p = get_provider(name)
|
||||
providers.append(p)
|
||||
bus.register_provider(p.name)
|
||||
|
||||
_bus = bus
|
||||
await bus.start()
|
||||
|
||||
# Start HTTP server
|
||||
http_server = await start_http_server()
|
||||
|
||||
# Create providers
|
||||
providers: list[MarketDataProvider] = []
|
||||
for name in provider_names:
|
||||
providers.append(get_provider(name))
|
||||
|
||||
# Run all providers concurrently
|
||||
tasks = []
|
||||
for p in providers:
|
||||
@@ -224,21 +257,43 @@ async def main(provider_names: list[str], symbols: list[str]) -> None:
|
||||
# Wait for shutdown
|
||||
await shutdown_event.wait()
|
||||
|
||||
# Cleanup
|
||||
# ── Graceful shutdown sequence ──────────────────────────────────────
|
||||
logger.info("service.shutting_down")
|
||||
|
||||
# 1. Cancel provider streaming tasks (with timeout)
|
||||
for task in tasks:
|
||||
task.cancel()
|
||||
done, pending = await asyncio.wait(tasks, timeout=5.0)
|
||||
for task in pending:
|
||||
logger.warning("service.task_force_cancel", extra={"task": task.get_name()})
|
||||
task.cancel()
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# 2. Close provider WebSocket connections
|
||||
for p in providers:
|
||||
await p.close()
|
||||
try:
|
||||
await p.close()
|
||||
except Exception as e:
|
||||
logger.warning("service.provider_close_error", extra={"provider": p.name, "error": str(e)})
|
||||
|
||||
# 3. Stop bus (drains remaining events to consumers)
|
||||
await bus.stop()
|
||||
|
||||
# 4. Stop storage consumer (flush JSONL)
|
||||
await storage.stop()
|
||||
|
||||
# 4b. Stop NATS output (flush + close)
|
||||
if nats_consumer:
|
||||
await nats_consumer.stop()
|
||||
|
||||
# 5. Close HTTP server
|
||||
http_server.close()
|
||||
await http_server.wait_closed()
|
||||
|
||||
logger.info("service.stopped")
|
||||
# 6. Close SQLAlchemy engine (flush connections)
|
||||
await engine.dispose()
|
||||
|
||||
logger.info("service.stopped", extra={"exit": "clean"})
|
||||
|
||||
|
||||
# ── CLI ────────────────────────────────────────────────────────────────
|
||||
@@ -270,7 +325,7 @@ def cli():
|
||||
symbols = [s.strip() for s in args.symbols.split(",") if s.strip()]
|
||||
|
||||
if args.provider.lower() == "all":
|
||||
provider_names = ["binance", "alpaca"]
|
||||
provider_names = ["binance", "alpaca", "bybit"]
|
||||
else:
|
||||
provider_names = [p.strip() for p in args.provider.split(",") if p.strip()]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user