feat(matrix-bridge-dagi): scaffold service with health, metrics and config (PR-M1.0)

New service: services/matrix-bridge-dagi/
- app/config.py: BridgeConfig dataclass, load_config() with full env validation
  (MATRIX_HOMESERVER_URL, MATRIX_ACCESS_TOKEN, MATRIX_USER_ID, SOFIIA_ROOM_ID,
   DAGI_GATEWAY_URL, SOFIIA_CONSOLE_URL, SOFIIA_INTERNAL_TOKEN, rate limits)
- app/main.py: FastAPI app with lifespan, GET /health, GET /metrics (prometheus)
  health returns: ok, node_id, homeserver, bridge_user, sofiia_room_id,
  allowed_agents, gateway, uptime_s; graceful error state when config missing
- requirements.txt: fastapi, uvicorn, httpx, prometheus-client, pyyaml
- Dockerfile: python:3.11-slim, port 7030, BUILD_SHA/BUILD_TIME args

docker-compose.matrix-bridge-node1.yml:
- standalone override file (node1 network, port 127.0.0.1:7030)
- all env vars wired: MATRIX_*, SOFIIA_ROOM_ID, DAGI_GATEWAY_URL,
  SOFIIA_CONSOLE_URL, SOFIIA_INTERNAL_TOKEN, rate limit policy
- healthcheck, restart: unless-stopped

DoD: config validates, health/metrics respond, imports clean
Made-with: Cursor
This commit is contained in:
Apple
2026-03-03 07:28:24 -08:00
parent 5994a3a56f
commit 1d8482f4c1
6 changed files with 292 additions and 0 deletions

View File

@@ -0,0 +1,65 @@
# Matrix Bridge DAGI — Phase M1
# Include into the main NODA1 stack or run standalone:
# docker compose -f docker-compose.node1.yml -f docker-compose.matrix-bridge-node1.yml up -d matrix-bridge-dagi
version: "3.9"
services:
matrix-bridge-dagi:
build:
context: ./services/matrix-bridge-dagi
args:
BUILD_SHA: "${BUILD_SHA:-dev}"
BUILD_TIME: "${BUILD_TIME:-local}"
container_name: matrix-bridge-dagi-node1
ports:
- "127.0.0.1:7030:7030" # internal only — not exposed publicly
environment:
- PORT=7030
- ENV=prod
- NODE_ID=NODA1
- BUILD_SHA=${BUILD_SHA:-dev}
- BUILD_TIME=${BUILD_TIME:-local}
# ── Matrix homeserver ────────────────────────────────────────────────
# Required: set in .env on NODA1 before first launch
- MATRIX_HOMESERVER_URL=${MATRIX_HOMESERVER_URL:-}
- MATRIX_ACCESS_TOKEN=${MATRIX_ACCESS_TOKEN:-}
- MATRIX_USER_ID=${MATRIX_USER_ID:-}
# ── Room → Agent mapping (M1: single room for Sofiia) ────────────────
# Create the room manually, then paste the room_id here
- SOFIIA_ROOM_ID=${SOFIIA_ROOM_ID:-}
# ── DAGI backend ─────────────────────────────────────────────────────
- DAGI_GATEWAY_URL=http://dagi-gateway-node1:9300
- DEFAULT_NODE_ID=NODA1
# ── Sofiia Console (audit write) ─────────────────────────────────────
- SOFIIA_CONSOLE_URL=http://dagi-sofiia-console-node1:8002
- SOFIIA_INTERNAL_TOKEN=${SOFIIA_INTERNAL_TOKEN:-}
# ── Policy ───────────────────────────────────────────────────────────
- BRIDGE_ALLOWED_AGENTS=sofiia
- RATE_LIMIT_ROOM_RPM=20
- RATE_LIMIT_SENDER_RPM=10
healthcheck:
test:
- "CMD"
- "python3"
- "-c"
- "import urllib.request; urllib.request.urlopen('http://localhost:7030/health', timeout=5)"
interval: 30s
timeout: 10s
retries: 3
start_period: 15s
networks:
- dagi-network
restart: unless-stopped
networks:
dagi-network:
external: true

View File

@@ -0,0 +1,17 @@
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY app/ ./app/
ARG BUILD_SHA=dev
ARG BUILD_TIME=local
ENV BUILD_SHA=${BUILD_SHA}
ENV BUILD_TIME=${BUILD_TIME}
ENV PYTHONUNBUFFERED=1
ENV PORT=7030
EXPOSE 7030
CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-7030}"]

View File

@@ -0,0 +1,68 @@
"""
matrix-bridge-dagi — configuration and validation
"""
import os
from dataclasses import dataclass, field
from typing import FrozenSet
@dataclass(frozen=True)
class BridgeConfig:
# Matrix homeserver
matrix_homeserver_url: str
matrix_access_token: str
matrix_user_id: str # e.g. @dagi_bridge:daarion.space
# Room → agent mapping (M1: single room)
sofiia_room_id: str # e.g. !abcdef:daarion.space
# DAGI backend
dagi_gateway_url: str # e.g. http://dagi-gateway-node1:9300
default_node_id: str # e.g. NODA1
# Sofiia Console (audit write)
sofiia_console_url: str # e.g. http://dagi-sofiia-console-node1:8002
sofiia_internal_token: str # X-Internal-Service-Token for audit ingest
# Policy
bridge_allowed_agents: FrozenSet[str]
rate_limit_room_rpm: int # max messages per room per minute
rate_limit_sender_rpm: int # max messages per sender per minute
# Service identity
node_id: str
build_sha: str
build_time: str
def load_config() -> BridgeConfig:
"""Load and validate config from environment variables."""
def _require(key: str) -> str:
v = os.getenv(key, "").strip()
if not v:
raise RuntimeError(f"Required env var {key!r} is not set")
return v
def _optional(key: str, default: str = "") -> str:
return os.getenv(key, default).strip()
allowed_raw = _optional("BRIDGE_ALLOWED_AGENTS", "sofiia")
allowed = frozenset(a.strip() for a in allowed_raw.split(",") if a.strip())
return BridgeConfig(
matrix_homeserver_url=_require("MATRIX_HOMESERVER_URL").rstrip("/"),
matrix_access_token=_require("MATRIX_ACCESS_TOKEN"),
matrix_user_id=_require("MATRIX_USER_ID"),
sofiia_room_id=_require("SOFIIA_ROOM_ID"),
dagi_gateway_url=_require("DAGI_GATEWAY_URL").rstrip("/"),
default_node_id=_optional("DEFAULT_NODE_ID", "NODA1"),
sofiia_console_url=_optional("SOFIIA_CONSOLE_URL", "").rstrip("/"),
sofiia_internal_token=_optional("SOFIIA_INTERNAL_TOKEN", ""),
bridge_allowed_agents=allowed,
rate_limit_room_rpm=int(_optional("RATE_LIMIT_ROOM_RPM", "20")),
rate_limit_sender_rpm=int(_optional("RATE_LIMIT_SENDER_RPM", "10")),
node_id=_optional("NODE_ID", "NODA1"),
build_sha=_optional("BUILD_SHA", "dev"),
build_time=_optional("BUILD_TIME", "local"),
)

View File

@@ -0,0 +1,136 @@
"""
matrix-bridge-dagi — Phase M1 scaffold
Bridges Matrix/Element rooms to DAGI agents via Gateway.
M1 scope: 1 room ↔ 1 agent (Sofiia), audit via sofiia-console internal endpoint.
"""
import logging
import os
import time
from contextlib import asynccontextmanager
from typing import Any, Dict
from fastapi import FastAPI, Response
from fastapi.middleware.cors import CORSMiddleware
try:
from prometheus_client import (
Counter, Histogram, Gauge,
generate_latest, CONTENT_TYPE_LATEST,
CollectorRegistry, REGISTRY,
)
_PROM_OK = True
except ImportError: # pragma: no cover
_PROM_OK = False
from .config import BridgeConfig, load_config
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s %(message)s",
)
logger = logging.getLogger("matrix-bridge-dagi")
# ── Prometheus metrics ────────────────────────────────────────────────────────
if _PROM_OK:
_messages_received = Counter(
"matrix_bridge_messages_received_total",
"Total Matrix messages received",
["room_id", "agent_id"],
)
_messages_replied = Counter(
"matrix_bridge_messages_replied_total",
"Total agent replies sent to Matrix",
["room_id", "agent_id", "status"],
)
_gateway_errors = Counter(
"matrix_bridge_gateway_errors_total",
"Errors calling DAGI gateway",
["error_type"],
)
_invoke_latency = Histogram(
"matrix_bridge_invoke_duration_seconds",
"Duration of DAGI invoke call",
["agent_id"],
)
_bridge_up = Gauge(
"matrix_bridge_up",
"1 if bridge started successfully",
)
# ── Startup state ─────────────────────────────────────────────────────────────
_START_TIME = time.monotonic()
_cfg: BridgeConfig | None = None
_config_error: str | None = None
# ── Lifespan ──────────────────────────────────────────────────────────────────
@asynccontextmanager
async def lifespan(app_: Any):
global _cfg, _config_error
try:
_cfg = load_config()
logger.info(
"✅ matrix-bridge-dagi started | node=%s build=%s homeserver=%s room=%s agents=%s",
_cfg.node_id, _cfg.build_sha, _cfg.matrix_homeserver_url,
_cfg.sofiia_room_id, list(_cfg.bridge_allowed_agents),
)
if _PROM_OK:
_bridge_up.set(1)
except RuntimeError as exc:
_config_error = str(exc)
logger.error("❌ Config error: %s", _config_error)
if _PROM_OK:
_bridge_up.set(0)
yield
logger.info("matrix-bridge-dagi shutting down")
# ── App ───────────────────────────────────────────────────────────────────────
app = FastAPI(
title="matrix-bridge-dagi",
version="0.1.0",
lifespan=lifespan,
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["GET", "POST"],
allow_headers=["*"],
)
# ── Health ────────────────────────────────────────────────────────────────────
@app.get("/health")
async def health() -> Dict[str, Any]:
uptime = int(time.monotonic() - _START_TIME)
if _config_error or _cfg is None:
return {
"ok": False,
"service": "matrix-bridge-dagi",
"version": "0.1.0",
"build": os.getenv("BUILD_SHA", "dev"),
"uptime_s": uptime,
"error": _config_error or "service not initialised",
}
return {
"ok": True,
"service": "matrix-bridge-dagi",
"version": "0.1.0",
"build": _cfg.build_sha,
"build_time": _cfg.build_time,
"env": os.getenv("ENV", "dev"),
"uptime_s": uptime,
"node_id": _cfg.node_id,
"homeserver": _cfg.matrix_homeserver_url,
"bridge_user": _cfg.matrix_user_id,
"sofiia_room_id": _cfg.sofiia_room_id,
"allowed_agents": list(_cfg.bridge_allowed_agents),
"gateway": _cfg.dagi_gateway_url,
"config_ok": True,
}
# ── Metrics ───────────────────────────────────────────────────────────────────
@app.get("/metrics")
async def metrics():
if not _PROM_OK:
return Response("# prometheus_client not available\n", media_type="text/plain")
return Response(generate_latest(REGISTRY), media_type=CONTENT_TYPE_LATEST)

View File

@@ -0,0 +1,6 @@
fastapi>=0.104.0
uvicorn[standard]>=0.24.0
httpx>=0.25.0
python-dotenv>=1.0.0
prometheus-client>=0.20.0
pyyaml>=6.0