feat: market-data-service for SenpAI trading agent

New service: real-time market data collection with unified event model.

Architecture:
- Domain events: TradeEvent, QuoteEvent, BookL2Event, HeartbeatEvent
- Provider interface: MarketDataProvider ABC with connect/subscribe/stream/close
- Async EventBus with fan-out to multiple consumers

Providers:
- BinanceProvider: public WebSocket (trades + bookTicker), no API key needed,
  auto-reconnect with exponential backoff, heartbeat timeout detection
- AlpacaProvider: IEX real-time data + paper trading auth,
  dry-run mode when no keys configured (heartbeats only)

Consumers:
- StorageConsumer: SQLite (via SQLAlchemy async) + JSONL append-only log
- MetricsConsumer: Prometheus counters, latency histograms, events/sec gauge
- PrintConsumer: sampled structured logging (1/100 events)

CLI: python -m app run --provider binance --symbols BTCUSDT,ETHUSDT
HTTP: /health, /metrics (Prometheus), /latest?symbol=XXX

Tests: 19/19 passed (Binance parse, Alpaca parse, bus smoke tests)

Config: pydantic-settings + .env, all secrets via environment variables.
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Apple
2026-02-09 11:19:00 -08:00
parent ad6b6d2662
commit c50843933f
26 changed files with 2036 additions and 0 deletions

View File

@@ -0,0 +1,57 @@
"""
Market data provider interface and registry.
To add a new provider:
1. Create providers/your_provider.py
2. Subclass MarketDataProvider
3. Register in PROVIDER_REGISTRY below
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import AsyncIterator
from app.domain.events import Event
class MarketDataProvider(ABC):
"""
Base class for all market-data feed adapters.
Lifecycle: connect() → subscribe() → stream() → close()
"""
name: str = "unknown"
@abstractmethod
async def connect(self) -> None:
"""Establish connection to the data source."""
@abstractmethod
async def subscribe(self, symbols: list[str]) -> None:
"""Subscribe to symbols. May be called after reconnect."""
@abstractmethod
async def stream(self) -> AsyncIterator[Event]:
"""Yield normalized domain events. Must handle reconnect internally."""
yield # type: ignore
@abstractmethod
async def close(self) -> None:
"""Graceful shutdown."""
def get_provider(name: str) -> MarketDataProvider:
"""Factory: instantiate provider by name."""
from app.providers.binance import BinanceProvider
from app.providers.alpaca import AlpacaProvider
registry: dict[str, type[MarketDataProvider]] = {
"binance": BinanceProvider,
"alpaca": AlpacaProvider,
}
cls = registry.get(name.lower())
if cls is None:
available = ", ".join(registry.keys())
raise ValueError(f"Unknown provider '{name}'. Available: {available}")
return cls()

View File

@@ -0,0 +1,270 @@
"""
Alpaca Markets provider — paper trading + IEX real-time data.
Requires ALPACA_KEY + ALPACA_SECRET in .env for live mode.
Falls back to dry-run mode if keys are not configured.
Subscribes to:
- trades → TradeEvent
- quotes → QuoteEvent
Alpaca WebSocket protocol:
wss://stream.data.alpaca.markets/v2/iex
Auth → subscribe → stream messages
"""
from __future__ import annotations
import asyncio
import json
import logging
from datetime import datetime, timezone
from typing import AsyncIterator
import websockets
from websockets.exceptions import ConnectionClosed
from app.config import settings
from app.domain.events import (
Event,
HeartbeatEvent,
QuoteEvent,
TradeEvent,
)
from app.providers import MarketDataProvider
logger = logging.getLogger(__name__)
def _iso_to_dt(ts_str: str | None) -> datetime | None:
"""Parse Alpaca ISO-8601 timestamp to UTC datetime."""
if not ts_str:
return None
try:
# Alpaca uses RFC3339 with Z or +00:00
ts_str = ts_str.replace("Z", "+00:00")
return datetime.fromisoformat(ts_str)
except (ValueError, TypeError):
return None
class AlpacaProvider(MarketDataProvider):
"""
Alpaca IEX real-time data + paper trading integration.
In dry-run mode (no keys), generates synthetic heartbeats
and logs a warning — useful for testing the pipeline without keys.
"""
name = "alpaca"
def __init__(self) -> None:
self._ws: websockets.WebSocketClientProtocol | None = None
self._symbols: list[str] = []
self._connected = False
self._authenticated = False
self._reconnect_count = 0
self._dry_run = not settings.alpaca_configured or settings.alpaca_dry_run
async def connect(self) -> None:
"""Establish WebSocket connection and authenticate."""
if self._dry_run:
logger.warning(
"alpaca.dry_run_mode",
extra={"reason": "No ALPACA_KEY/ALPACA_SECRET or dry_run=True"},
)
self._connected = True
return
url = settings.alpaca_data_ws_url
logger.info("alpaca.connecting", extra={"url": url})
self._ws = await websockets.connect(
url,
ping_interval=20,
ping_timeout=10,
close_timeout=5,
)
# Read welcome message
welcome = await self._ws.recv()
welcome_data = json.loads(welcome)
logger.info("alpaca.welcome", extra={"msg": welcome_data})
# Authenticate
auth_msg = {
"action": "auth",
"key": settings.alpaca_key,
"secret": settings.alpaca_secret,
}
await self._ws.send(json.dumps(auth_msg))
auth_resp = await self._ws.recv()
auth_data = json.loads(auth_resp)
logger.info("alpaca.auth_response", extra={"msg": auth_data})
# Check auth result
if isinstance(auth_data, list):
for msg in auth_data:
if msg.get("T") == "error":
raise ConnectionError(f"Alpaca auth failed: {msg}")
if msg.get("T") == "success" and msg.get("msg") == "authenticated":
self._authenticated = True
self._connected = True
logger.info("alpaca.connected", extra={"authenticated": self._authenticated})
async def subscribe(self, symbols: list[str]) -> None:
"""Subscribe to trades + quotes for symbols."""
self._symbols = [s.upper() for s in symbols]
if self._dry_run:
logger.info(
"alpaca.dry_run_subscribe",
extra={"symbols": self._symbols},
)
return
if not self._ws:
raise RuntimeError("Not connected.")
sub_msg = {
"action": "subscribe",
"trades": self._symbols,
"quotes": self._symbols,
}
await self._ws.send(json.dumps(sub_msg))
# Read subscription confirmation
sub_resp = await self._ws.recv()
logger.info("alpaca.subscribed", extra={"response": json.loads(sub_resp)})
async def stream(self) -> AsyncIterator[Event]:
"""Yield domain events. Dry-run mode emits periodic heartbeats."""
if self._dry_run:
async for event in self._dry_run_stream():
yield event
return
backoff = settings.reconnect_base_delay
while True:
try:
if not self._connected or not self._ws:
await self._reconnect(backoff)
try:
raw = await asyncio.wait_for(
self._ws.recv(), # type: ignore
timeout=settings.heartbeat_timeout,
)
except asyncio.TimeoutError:
logger.warning("alpaca.heartbeat_timeout")
self._connected = False
continue
backoff = settings.reconnect_base_delay
messages = json.loads(raw)
# Alpaca sends arrays of messages
if not isinstance(messages, list):
messages = [messages]
for msg in messages:
event = self._parse(msg)
if event:
yield event
except ConnectionClosed as e:
logger.warning(
"alpaca.connection_closed",
extra={"code": e.code, "reason": str(e.reason)},
)
self._connected = False
backoff = min(backoff * 2, settings.reconnect_max_delay)
except Exception as e:
logger.error("alpaca.stream_error", extra={"error": str(e)})
self._connected = False
backoff = min(backoff * 2, settings.reconnect_max_delay)
async def _dry_run_stream(self) -> AsyncIterator[Event]:
"""Emit heartbeats in dry-run mode (no real data)."""
logger.info("alpaca.dry_run_stream_started")
while True:
yield HeartbeatEvent(provider=self.name)
await asyncio.sleep(5.0)
async def _reconnect(self, delay: float) -> None:
self._reconnect_count += 1
logger.info(
"alpaca.reconnecting",
extra={"delay": delay, "attempt": self._reconnect_count},
)
await asyncio.sleep(delay)
try:
if self._ws:
await self._ws.close()
except Exception:
pass
self._authenticated = False
await self.connect()
if self._symbols:
await self.subscribe(self._symbols)
def _parse(self, msg: dict) -> Event | None:
"""Parse single Alpaca message into domain event."""
msg_type = msg.get("T")
if msg_type == "t":
return self._parse_trade(msg)
elif msg_type == "q":
return self._parse_quote(msg)
elif msg_type in ("success", "subscription", "error"):
# Control messages — skip
return None
return None
def _parse_trade(self, data: dict) -> TradeEvent:
"""
Alpaca trade:
{"T":"t", "S":"AAPL", "p":150.25, "s":100, "t":"2024-01-15T...", "i":12345, ...}
"""
return TradeEvent(
provider=self.name,
symbol=data.get("S", "").upper(),
price=float(data.get("p", 0)),
size=float(data.get("s", 0)),
ts_exchange=_iso_to_dt(data.get("t")),
trade_id=str(data.get("i", "")),
)
def _parse_quote(self, data: dict) -> QuoteEvent:
"""
Alpaca quote:
{"T":"q", "S":"AAPL", "bp":150.24, "bs":200, "ap":150.26, "as":100,
"t":"2024-01-15T...", ...}
"""
return QuoteEvent(
provider=self.name,
symbol=data.get("S", "").upper(),
bid=float(data.get("bp", 0)),
ask=float(data.get("ap", 0)),
bid_size=float(data.get("bs", 0)),
ask_size=float(data.get("as", 0)),
ts_exchange=_iso_to_dt(data.get("t")),
)
async def close(self) -> None:
self._connected = False
if self._ws:
try:
await self._ws.close()
except Exception:
pass
logger.info(
"alpaca.closed",
extra={"reconnect_count": self._reconnect_count},
)

View File

@@ -0,0 +1,223 @@
"""
Binance public WebSocket provider.
No API key required. Subscribes to:
- <symbol>@trade → TradeEvent
- <symbol>@bookTicker → QuoteEvent
Auto-reconnect with exponential backoff via tenacity.
"""
from __future__ import annotations
import asyncio
import json
import logging
import time
from datetime import datetime, timezone
from typing import AsyncIterator
import websockets
from websockets.exceptions import ConnectionClosed
from app.config import settings
from app.domain.events import (
Event,
HeartbeatEvent,
QuoteEvent,
TradeEvent,
)
from app.providers import MarketDataProvider
logger = logging.getLogger(__name__)
def _ms_to_dt(ms: int | float | None) -> datetime | None:
"""Convert millisecond epoch to UTC datetime."""
if ms is None:
return None
return datetime.fromtimestamp(ms / 1000.0, tz=timezone.utc)
class BinanceProvider(MarketDataProvider):
"""
Binance public WebSocket streams.
Connects to the combined stream endpoint and subscribes to
trade + bookTicker channels for each symbol.
"""
name = "binance"
def __init__(self) -> None:
self._ws: websockets.WebSocketClientProtocol | None = None
self._symbols: list[str] = []
self._connected = False
self._reconnect_count = 0
self._base_url = settings.binance_ws_url
async def connect(self) -> None:
"""Establish WebSocket connection."""
logger.info("binance.connecting", extra={"url": self._base_url})
self._ws = await websockets.connect(
self._base_url,
ping_interval=20,
ping_timeout=10,
close_timeout=5,
)
self._connected = True
logger.info("binance.connected")
async def subscribe(self, symbols: list[str]) -> None:
"""Subscribe to trade + bookTicker for each symbol."""
if not self._ws:
raise RuntimeError("Not connected. Call connect() first.")
self._symbols = [s.lower() for s in symbols]
streams = []
for sym in self._symbols:
streams.append(f"{sym}@trade")
streams.append(f"{sym}@bookTicker")
subscribe_msg = {
"method": "SUBSCRIBE",
"params": streams,
"id": 1,
}
await self._ws.send(json.dumps(subscribe_msg))
logger.info(
"binance.subscribed",
extra={"symbols": self._symbols, "streams": len(streams)},
)
async def stream(self) -> AsyncIterator[Event]:
"""
Yield domain events. Handles reconnect automatically.
"""
backoff = settings.reconnect_base_delay
while True:
try:
if not self._connected or not self._ws:
await self._reconnect(backoff)
# Set timeout for heartbeat detection
try:
raw = await asyncio.wait_for(
self._ws.recv(), # type: ignore
timeout=settings.heartbeat_timeout,
)
except asyncio.TimeoutError:
logger.warning(
"binance.heartbeat_timeout",
extra={"timeout": settings.heartbeat_timeout},
)
self._connected = False
continue
# Reset backoff on successful message
backoff = settings.reconnect_base_delay
data = json.loads(raw)
# Skip subscription confirmations
if "result" in data and "id" in data:
continue
event = self._parse(data)
if event:
yield event
except ConnectionClosed as e:
logger.warning(
"binance.connection_closed",
extra={"code": e.code, "reason": str(e.reason)},
)
self._connected = False
backoff = min(backoff * 2, settings.reconnect_max_delay)
except Exception as e:
logger.error("binance.stream_error", extra={"error": str(e)})
self._connected = False
backoff = min(backoff * 2, settings.reconnect_max_delay)
async def _reconnect(self, delay: float) -> None:
"""Reconnect with delay, then resubscribe."""
self._reconnect_count += 1
logger.info(
"binance.reconnecting",
extra={"delay": delay, "attempt": self._reconnect_count},
)
await asyncio.sleep(delay)
try:
if self._ws:
await self._ws.close()
except Exception:
pass
await self.connect()
if self._symbols:
await self.subscribe(self._symbols)
def _parse(self, data: dict) -> Event | None:
"""Parse raw Binance JSON into domain events."""
event_type = data.get("e")
if event_type == "trade":
return self._parse_trade(data)
elif event_type == "bookTicker" or ("b" in data and "a" in data and "s" in data and "e" not in data):
# bookTicker doesn't always have "e" field in combined stream
return self._parse_book_ticker(data)
return None
def _parse_trade(self, data: dict) -> TradeEvent:
"""
Binance trade payload:
{
"e": "trade", "E": 1672515782136, "s": "BNBBTC",
"t": 12345, "p": "0.001", "q": "100",
"T": 1672515782136, "m": true
}
"""
return TradeEvent(
provider=self.name,
symbol=data.get("s", "").upper(),
price=float(data.get("p", 0)),
size=float(data.get("q", 0)),
ts_exchange=_ms_to_dt(data.get("T")),
side="sell" if data.get("m") else "buy", # m=True → buyer is maker → trade is a sell
trade_id=str(data.get("t", "")),
)
def _parse_book_ticker(self, data: dict) -> QuoteEvent:
"""
Binance bookTicker payload:
{
"u": 400900217, "s": "BNBUSDT",
"b": "25.35190000", "B": "31.21000000",
"a": "25.36520000", "A": "40.66000000"
}
"""
return QuoteEvent(
provider=self.name,
symbol=data.get("s", "").upper(),
bid=float(data.get("b", 0)),
ask=float(data.get("a", 0)),
bid_size=float(data.get("B", 0)),
ask_size=float(data.get("A", 0)),
ts_exchange=_ms_to_dt(data.get("E")),
)
async def close(self) -> None:
"""Close the WebSocket connection."""
self._connected = False
if self._ws:
try:
await self._ws.close()
except Exception:
pass
logger.info(
"binance.closed",
extra={"reconnect_count": self._reconnect_count},
)