feat: market-data-service for SenpAI trading agent
New service: real-time market data collection with unified event model. Architecture: - Domain events: TradeEvent, QuoteEvent, BookL2Event, HeartbeatEvent - Provider interface: MarketDataProvider ABC with connect/subscribe/stream/close - Async EventBus with fan-out to multiple consumers Providers: - BinanceProvider: public WebSocket (trades + bookTicker), no API key needed, auto-reconnect with exponential backoff, heartbeat timeout detection - AlpacaProvider: IEX real-time data + paper trading auth, dry-run mode when no keys configured (heartbeats only) Consumers: - StorageConsumer: SQLite (via SQLAlchemy async) + JSONL append-only log - MetricsConsumer: Prometheus counters, latency histograms, events/sec gauge - PrintConsumer: sampled structured logging (1/100 events) CLI: python -m app run --provider binance --symbols BTCUSDT,ETHUSDT HTTP: /health, /metrics (Prometheus), /latest?symbol=XXX Tests: 19/19 passed (Binance parse, Alpaca parse, bus smoke tests) Config: pydantic-settings + .env, all secrets via environment variables. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
57
services/market-data-service/app/providers/__init__.py
Normal file
57
services/market-data-service/app/providers/__init__.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""
|
||||
Market data provider interface and registry.
|
||||
|
||||
To add a new provider:
|
||||
1. Create providers/your_provider.py
|
||||
2. Subclass MarketDataProvider
|
||||
3. Register in PROVIDER_REGISTRY below
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import AsyncIterator
|
||||
|
||||
from app.domain.events import Event
|
||||
|
||||
|
||||
class MarketDataProvider(ABC):
|
||||
"""
|
||||
Base class for all market-data feed adapters.
|
||||
|
||||
Lifecycle: connect() → subscribe() → stream() → close()
|
||||
"""
|
||||
|
||||
name: str = "unknown"
|
||||
|
||||
@abstractmethod
|
||||
async def connect(self) -> None:
|
||||
"""Establish connection to the data source."""
|
||||
|
||||
@abstractmethod
|
||||
async def subscribe(self, symbols: list[str]) -> None:
|
||||
"""Subscribe to symbols. May be called after reconnect."""
|
||||
|
||||
@abstractmethod
|
||||
async def stream(self) -> AsyncIterator[Event]:
|
||||
"""Yield normalized domain events. Must handle reconnect internally."""
|
||||
yield # type: ignore
|
||||
|
||||
@abstractmethod
|
||||
async def close(self) -> None:
|
||||
"""Graceful shutdown."""
|
||||
|
||||
|
||||
def get_provider(name: str) -> MarketDataProvider:
|
||||
"""Factory: instantiate provider by name."""
|
||||
from app.providers.binance import BinanceProvider
|
||||
from app.providers.alpaca import AlpacaProvider
|
||||
|
||||
registry: dict[str, type[MarketDataProvider]] = {
|
||||
"binance": BinanceProvider,
|
||||
"alpaca": AlpacaProvider,
|
||||
}
|
||||
cls = registry.get(name.lower())
|
||||
if cls is None:
|
||||
available = ", ".join(registry.keys())
|
||||
raise ValueError(f"Unknown provider '{name}'. Available: {available}")
|
||||
return cls()
|
||||
270
services/market-data-service/app/providers/alpaca.py
Normal file
270
services/market-data-service/app/providers/alpaca.py
Normal file
@@ -0,0 +1,270 @@
|
||||
"""
|
||||
Alpaca Markets provider — paper trading + IEX real-time data.
|
||||
|
||||
Requires ALPACA_KEY + ALPACA_SECRET in .env for live mode.
|
||||
Falls back to dry-run mode if keys are not configured.
|
||||
|
||||
Subscribes to:
|
||||
- trades → TradeEvent
|
||||
- quotes → QuoteEvent
|
||||
|
||||
Alpaca WebSocket protocol:
|
||||
wss://stream.data.alpaca.markets/v2/iex
|
||||
Auth → subscribe → stream messages
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import AsyncIterator
|
||||
|
||||
import websockets
|
||||
from websockets.exceptions import ConnectionClosed
|
||||
|
||||
from app.config import settings
|
||||
from app.domain.events import (
|
||||
Event,
|
||||
HeartbeatEvent,
|
||||
QuoteEvent,
|
||||
TradeEvent,
|
||||
)
|
||||
from app.providers import MarketDataProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _iso_to_dt(ts_str: str | None) -> datetime | None:
|
||||
"""Parse Alpaca ISO-8601 timestamp to UTC datetime."""
|
||||
if not ts_str:
|
||||
return None
|
||||
try:
|
||||
# Alpaca uses RFC3339 with Z or +00:00
|
||||
ts_str = ts_str.replace("Z", "+00:00")
|
||||
return datetime.fromisoformat(ts_str)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
class AlpacaProvider(MarketDataProvider):
|
||||
"""
|
||||
Alpaca IEX real-time data + paper trading integration.
|
||||
|
||||
In dry-run mode (no keys), generates synthetic heartbeats
|
||||
and logs a warning — useful for testing the pipeline without keys.
|
||||
"""
|
||||
|
||||
name = "alpaca"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._ws: websockets.WebSocketClientProtocol | None = None
|
||||
self._symbols: list[str] = []
|
||||
self._connected = False
|
||||
self._authenticated = False
|
||||
self._reconnect_count = 0
|
||||
self._dry_run = not settings.alpaca_configured or settings.alpaca_dry_run
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Establish WebSocket connection and authenticate."""
|
||||
if self._dry_run:
|
||||
logger.warning(
|
||||
"alpaca.dry_run_mode",
|
||||
extra={"reason": "No ALPACA_KEY/ALPACA_SECRET or dry_run=True"},
|
||||
)
|
||||
self._connected = True
|
||||
return
|
||||
|
||||
url = settings.alpaca_data_ws_url
|
||||
logger.info("alpaca.connecting", extra={"url": url})
|
||||
|
||||
self._ws = await websockets.connect(
|
||||
url,
|
||||
ping_interval=20,
|
||||
ping_timeout=10,
|
||||
close_timeout=5,
|
||||
)
|
||||
|
||||
# Read welcome message
|
||||
welcome = await self._ws.recv()
|
||||
welcome_data = json.loads(welcome)
|
||||
logger.info("alpaca.welcome", extra={"msg": welcome_data})
|
||||
|
||||
# Authenticate
|
||||
auth_msg = {
|
||||
"action": "auth",
|
||||
"key": settings.alpaca_key,
|
||||
"secret": settings.alpaca_secret,
|
||||
}
|
||||
await self._ws.send(json.dumps(auth_msg))
|
||||
|
||||
auth_resp = await self._ws.recv()
|
||||
auth_data = json.loads(auth_resp)
|
||||
logger.info("alpaca.auth_response", extra={"msg": auth_data})
|
||||
|
||||
# Check auth result
|
||||
if isinstance(auth_data, list):
|
||||
for msg in auth_data:
|
||||
if msg.get("T") == "error":
|
||||
raise ConnectionError(f"Alpaca auth failed: {msg}")
|
||||
if msg.get("T") == "success" and msg.get("msg") == "authenticated":
|
||||
self._authenticated = True
|
||||
|
||||
self._connected = True
|
||||
logger.info("alpaca.connected", extra={"authenticated": self._authenticated})
|
||||
|
||||
async def subscribe(self, symbols: list[str]) -> None:
|
||||
"""Subscribe to trades + quotes for symbols."""
|
||||
self._symbols = [s.upper() for s in symbols]
|
||||
|
||||
if self._dry_run:
|
||||
logger.info(
|
||||
"alpaca.dry_run_subscribe",
|
||||
extra={"symbols": self._symbols},
|
||||
)
|
||||
return
|
||||
|
||||
if not self._ws:
|
||||
raise RuntimeError("Not connected.")
|
||||
|
||||
sub_msg = {
|
||||
"action": "subscribe",
|
||||
"trades": self._symbols,
|
||||
"quotes": self._symbols,
|
||||
}
|
||||
await self._ws.send(json.dumps(sub_msg))
|
||||
|
||||
# Read subscription confirmation
|
||||
sub_resp = await self._ws.recv()
|
||||
logger.info("alpaca.subscribed", extra={"response": json.loads(sub_resp)})
|
||||
|
||||
async def stream(self) -> AsyncIterator[Event]:
|
||||
"""Yield domain events. Dry-run mode emits periodic heartbeats."""
|
||||
if self._dry_run:
|
||||
async for event in self._dry_run_stream():
|
||||
yield event
|
||||
return
|
||||
|
||||
backoff = settings.reconnect_base_delay
|
||||
|
||||
while True:
|
||||
try:
|
||||
if not self._connected or not self._ws:
|
||||
await self._reconnect(backoff)
|
||||
|
||||
try:
|
||||
raw = await asyncio.wait_for(
|
||||
self._ws.recv(), # type: ignore
|
||||
timeout=settings.heartbeat_timeout,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("alpaca.heartbeat_timeout")
|
||||
self._connected = False
|
||||
continue
|
||||
|
||||
backoff = settings.reconnect_base_delay
|
||||
messages = json.loads(raw)
|
||||
|
||||
# Alpaca sends arrays of messages
|
||||
if not isinstance(messages, list):
|
||||
messages = [messages]
|
||||
|
||||
for msg in messages:
|
||||
event = self._parse(msg)
|
||||
if event:
|
||||
yield event
|
||||
|
||||
except ConnectionClosed as e:
|
||||
logger.warning(
|
||||
"alpaca.connection_closed",
|
||||
extra={"code": e.code, "reason": str(e.reason)},
|
||||
)
|
||||
self._connected = False
|
||||
backoff = min(backoff * 2, settings.reconnect_max_delay)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("alpaca.stream_error", extra={"error": str(e)})
|
||||
self._connected = False
|
||||
backoff = min(backoff * 2, settings.reconnect_max_delay)
|
||||
|
||||
async def _dry_run_stream(self) -> AsyncIterator[Event]:
|
||||
"""Emit heartbeats in dry-run mode (no real data)."""
|
||||
logger.info("alpaca.dry_run_stream_started")
|
||||
while True:
|
||||
yield HeartbeatEvent(provider=self.name)
|
||||
await asyncio.sleep(5.0)
|
||||
|
||||
async def _reconnect(self, delay: float) -> None:
|
||||
self._reconnect_count += 1
|
||||
logger.info(
|
||||
"alpaca.reconnecting",
|
||||
extra={"delay": delay, "attempt": self._reconnect_count},
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
try:
|
||||
if self._ws:
|
||||
await self._ws.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._authenticated = False
|
||||
await self.connect()
|
||||
if self._symbols:
|
||||
await self.subscribe(self._symbols)
|
||||
|
||||
def _parse(self, msg: dict) -> Event | None:
|
||||
"""Parse single Alpaca message into domain event."""
|
||||
msg_type = msg.get("T")
|
||||
|
||||
if msg_type == "t":
|
||||
return self._parse_trade(msg)
|
||||
elif msg_type == "q":
|
||||
return self._parse_quote(msg)
|
||||
elif msg_type in ("success", "subscription", "error"):
|
||||
# Control messages — skip
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def _parse_trade(self, data: dict) -> TradeEvent:
|
||||
"""
|
||||
Alpaca trade:
|
||||
{"T":"t", "S":"AAPL", "p":150.25, "s":100, "t":"2024-01-15T...", "i":12345, ...}
|
||||
"""
|
||||
return TradeEvent(
|
||||
provider=self.name,
|
||||
symbol=data.get("S", "").upper(),
|
||||
price=float(data.get("p", 0)),
|
||||
size=float(data.get("s", 0)),
|
||||
ts_exchange=_iso_to_dt(data.get("t")),
|
||||
trade_id=str(data.get("i", "")),
|
||||
)
|
||||
|
||||
def _parse_quote(self, data: dict) -> QuoteEvent:
|
||||
"""
|
||||
Alpaca quote:
|
||||
{"T":"q", "S":"AAPL", "bp":150.24, "bs":200, "ap":150.26, "as":100,
|
||||
"t":"2024-01-15T...", ...}
|
||||
"""
|
||||
return QuoteEvent(
|
||||
provider=self.name,
|
||||
symbol=data.get("S", "").upper(),
|
||||
bid=float(data.get("bp", 0)),
|
||||
ask=float(data.get("ap", 0)),
|
||||
bid_size=float(data.get("bs", 0)),
|
||||
ask_size=float(data.get("as", 0)),
|
||||
ts_exchange=_iso_to_dt(data.get("t")),
|
||||
)
|
||||
|
||||
async def close(self) -> None:
|
||||
self._connected = False
|
||||
if self._ws:
|
||||
try:
|
||||
await self._ws.close()
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
"alpaca.closed",
|
||||
extra={"reconnect_count": self._reconnect_count},
|
||||
)
|
||||
223
services/market-data-service/app/providers/binance.py
Normal file
223
services/market-data-service/app/providers/binance.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""
|
||||
Binance public WebSocket provider.
|
||||
|
||||
No API key required. Subscribes to:
|
||||
- <symbol>@trade → TradeEvent
|
||||
- <symbol>@bookTicker → QuoteEvent
|
||||
|
||||
Auto-reconnect with exponential backoff via tenacity.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import AsyncIterator
|
||||
|
||||
import websockets
|
||||
from websockets.exceptions import ConnectionClosed
|
||||
|
||||
from app.config import settings
|
||||
from app.domain.events import (
|
||||
Event,
|
||||
HeartbeatEvent,
|
||||
QuoteEvent,
|
||||
TradeEvent,
|
||||
)
|
||||
from app.providers import MarketDataProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _ms_to_dt(ms: int | float | None) -> datetime | None:
|
||||
"""Convert millisecond epoch to UTC datetime."""
|
||||
if ms is None:
|
||||
return None
|
||||
return datetime.fromtimestamp(ms / 1000.0, tz=timezone.utc)
|
||||
|
||||
|
||||
class BinanceProvider(MarketDataProvider):
|
||||
"""
|
||||
Binance public WebSocket streams.
|
||||
|
||||
Connects to the combined stream endpoint and subscribes to
|
||||
trade + bookTicker channels for each symbol.
|
||||
"""
|
||||
|
||||
name = "binance"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._ws: websockets.WebSocketClientProtocol | None = None
|
||||
self._symbols: list[str] = []
|
||||
self._connected = False
|
||||
self._reconnect_count = 0
|
||||
self._base_url = settings.binance_ws_url
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Establish WebSocket connection."""
|
||||
logger.info("binance.connecting", extra={"url": self._base_url})
|
||||
self._ws = await websockets.connect(
|
||||
self._base_url,
|
||||
ping_interval=20,
|
||||
ping_timeout=10,
|
||||
close_timeout=5,
|
||||
)
|
||||
self._connected = True
|
||||
logger.info("binance.connected")
|
||||
|
||||
async def subscribe(self, symbols: list[str]) -> None:
|
||||
"""Subscribe to trade + bookTicker for each symbol."""
|
||||
if not self._ws:
|
||||
raise RuntimeError("Not connected. Call connect() first.")
|
||||
|
||||
self._symbols = [s.lower() for s in symbols]
|
||||
streams = []
|
||||
for sym in self._symbols:
|
||||
streams.append(f"{sym}@trade")
|
||||
streams.append(f"{sym}@bookTicker")
|
||||
|
||||
subscribe_msg = {
|
||||
"method": "SUBSCRIBE",
|
||||
"params": streams,
|
||||
"id": 1,
|
||||
}
|
||||
await self._ws.send(json.dumps(subscribe_msg))
|
||||
logger.info(
|
||||
"binance.subscribed",
|
||||
extra={"symbols": self._symbols, "streams": len(streams)},
|
||||
)
|
||||
|
||||
async def stream(self) -> AsyncIterator[Event]:
|
||||
"""
|
||||
Yield domain events. Handles reconnect automatically.
|
||||
"""
|
||||
backoff = settings.reconnect_base_delay
|
||||
|
||||
while True:
|
||||
try:
|
||||
if not self._connected or not self._ws:
|
||||
await self._reconnect(backoff)
|
||||
|
||||
# Set timeout for heartbeat detection
|
||||
try:
|
||||
raw = await asyncio.wait_for(
|
||||
self._ws.recv(), # type: ignore
|
||||
timeout=settings.heartbeat_timeout,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"binance.heartbeat_timeout",
|
||||
extra={"timeout": settings.heartbeat_timeout},
|
||||
)
|
||||
self._connected = False
|
||||
continue
|
||||
|
||||
# Reset backoff on successful message
|
||||
backoff = settings.reconnect_base_delay
|
||||
|
||||
data = json.loads(raw)
|
||||
|
||||
# Skip subscription confirmations
|
||||
if "result" in data and "id" in data:
|
||||
continue
|
||||
|
||||
event = self._parse(data)
|
||||
if event:
|
||||
yield event
|
||||
|
||||
except ConnectionClosed as e:
|
||||
logger.warning(
|
||||
"binance.connection_closed",
|
||||
extra={"code": e.code, "reason": str(e.reason)},
|
||||
)
|
||||
self._connected = False
|
||||
backoff = min(backoff * 2, settings.reconnect_max_delay)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("binance.stream_error", extra={"error": str(e)})
|
||||
self._connected = False
|
||||
backoff = min(backoff * 2, settings.reconnect_max_delay)
|
||||
|
||||
async def _reconnect(self, delay: float) -> None:
|
||||
"""Reconnect with delay, then resubscribe."""
|
||||
self._reconnect_count += 1
|
||||
logger.info(
|
||||
"binance.reconnecting",
|
||||
extra={"delay": delay, "attempt": self._reconnect_count},
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
try:
|
||||
if self._ws:
|
||||
await self._ws.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
await self.connect()
|
||||
if self._symbols:
|
||||
await self.subscribe(self._symbols)
|
||||
|
||||
def _parse(self, data: dict) -> Event | None:
|
||||
"""Parse raw Binance JSON into domain events."""
|
||||
event_type = data.get("e")
|
||||
|
||||
if event_type == "trade":
|
||||
return self._parse_trade(data)
|
||||
elif event_type == "bookTicker" or ("b" in data and "a" in data and "s" in data and "e" not in data):
|
||||
# bookTicker doesn't always have "e" field in combined stream
|
||||
return self._parse_book_ticker(data)
|
||||
|
||||
return None
|
||||
|
||||
def _parse_trade(self, data: dict) -> TradeEvent:
|
||||
"""
|
||||
Binance trade payload:
|
||||
{
|
||||
"e": "trade", "E": 1672515782136, "s": "BNBBTC",
|
||||
"t": 12345, "p": "0.001", "q": "100",
|
||||
"T": 1672515782136, "m": true
|
||||
}
|
||||
"""
|
||||
return TradeEvent(
|
||||
provider=self.name,
|
||||
symbol=data.get("s", "").upper(),
|
||||
price=float(data.get("p", 0)),
|
||||
size=float(data.get("q", 0)),
|
||||
ts_exchange=_ms_to_dt(data.get("T")),
|
||||
side="sell" if data.get("m") else "buy", # m=True → buyer is maker → trade is a sell
|
||||
trade_id=str(data.get("t", "")),
|
||||
)
|
||||
|
||||
def _parse_book_ticker(self, data: dict) -> QuoteEvent:
|
||||
"""
|
||||
Binance bookTicker payload:
|
||||
{
|
||||
"u": 400900217, "s": "BNBUSDT",
|
||||
"b": "25.35190000", "B": "31.21000000",
|
||||
"a": "25.36520000", "A": "40.66000000"
|
||||
}
|
||||
"""
|
||||
return QuoteEvent(
|
||||
provider=self.name,
|
||||
symbol=data.get("s", "").upper(),
|
||||
bid=float(data.get("b", 0)),
|
||||
ask=float(data.get("a", 0)),
|
||||
bid_size=float(data.get("B", 0)),
|
||||
ask_size=float(data.get("A", 0)),
|
||||
ts_exchange=_ms_to_dt(data.get("E")),
|
||||
)
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the WebSocket connection."""
|
||||
self._connected = False
|
||||
if self._ws:
|
||||
try:
|
||||
await self._ws.close()
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
"binance.closed",
|
||||
extra={"reconnect_count": self._reconnect_count},
|
||||
)
|
||||
Reference in New Issue
Block a user