Files
microdao-daarion/services/matrix-bridge-dagi/app/main.py
Apple dbfab78f02 feat(matrix-bridge-dagi): add room mapping, ingress loop, synapse setup (PR-M1.2 + PR-M1.3)
PR-M1.2 — room-to-agent mapping:
- adds room_mapping.py: parse BRIDGE_ROOM_MAP (format: agent:!room_id:server)
- RoomMappingConfig with O(1) room→agent lookup, agent allowlist check
- /bridge/mappings endpoint (read-only ops summary, no secrets)
- health endpoint now includes mappings_count
- 21 tests for parsing, validation, allowlist, summary

PR-M1.3 — Matrix ingress loop:
- adds ingress.py: MatrixIngressLoop asyncio task
- sync_poll → extract → dedupe → _invoke_gateway (POST /v1/invoke)
- gateway payload: agent_id, node_id, message, metadata (transport, room_id, event_id, sender)
- exponential backoff on errors (2s..60s)
- joins all mapped rooms at startup
- metric callbacks: on_message_received, on_gateway_error
- graceful shutdown via asyncio.Event
- 5 ingress tests (invoke, dedupe, callbacks, empty-map idle)

Synapse setup (docker-compose.synapse-node1.yml):
- fixed volume: bind mount ./synapse-data instead of named volume
- added port mapping 127.0.0.1:8008:8008

Synapse running on NODA1 (localhost:8008), bot @dagi_bridge:daarion.space created,
room !QwHczWXgefDHBEVkTH:daarion.space created, all 4 values in .env on NODA1.

Made-with: Cursor
2026-03-03 07:51:13 -08:00

257 lines
9.4 KiB
Python

"""
matrix-bridge-dagi — Phase M1 scaffold
Bridges Matrix/Element rooms to DAGI agents via Gateway.
M1 scope: 1 room ↔ 1 agent (Sofiia), audit via sofiia-console internal endpoint.
"""
import asyncio
import logging
import os
import time
from contextlib import asynccontextmanager
from typing import Any, Dict, Optional
from fastapi import FastAPI, Response
from fastapi.middleware.cors import CORSMiddleware
try:
import httpx as _httpx
_HTTPX_OK = True
except ImportError: # pragma: no cover
_httpx = None # type: ignore
_HTTPX_OK = False
try:
from prometheus_client import (
Counter, Histogram, Gauge,
generate_latest, CONTENT_TYPE_LATEST,
CollectorRegistry, REGISTRY,
)
_PROM_OK = True
except ImportError: # pragma: no cover
_PROM_OK = False
from .config import BridgeConfig, load_config
from .ingress import MatrixIngressLoop
from .room_mapping import RoomMappingConfig, parse_room_map
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s %(message)s",
)
logger = logging.getLogger("matrix-bridge-dagi")
# ── Prometheus metrics ────────────────────────────────────────────────────────
if _PROM_OK:
_messages_received = Counter(
"matrix_bridge_messages_received_total",
"Total Matrix messages received",
["room_id", "agent_id"],
)
_messages_replied = Counter(
"matrix_bridge_messages_replied_total",
"Total agent replies sent to Matrix",
["room_id", "agent_id", "status"],
)
_gateway_errors = Counter(
"matrix_bridge_gateway_errors_total",
"Errors calling DAGI gateway",
["error_type"],
)
_invoke_latency = Histogram(
"matrix_bridge_invoke_duration_seconds",
"Duration of DAGI invoke call",
["agent_id"],
)
_bridge_up = Gauge(
"matrix_bridge_up",
"1 if bridge started successfully",
)
# ── Startup state ─────────────────────────────────────────────────────────────
_START_TIME = time.monotonic()
_cfg: Optional[BridgeConfig] = None
_config_error: Optional[str] = None
_matrix_reachable: Optional[bool] = None # probed at startup
_gateway_reachable: Optional[bool] = None # probed at startup
_room_map: Optional[RoomMappingConfig] = None
_ingress_task: Optional[asyncio.Task] = None
_ingress_stop: Optional[asyncio.Event] = None
async def _probe_url(url: str, timeout: float = 5.0) -> bool:
"""Quick GET probe — returns True if HTTP 2xx."""
if not _HTTPX_OK or not url:
return False
try:
async with _httpx.AsyncClient(timeout=timeout) as client:
r = await client.get(url)
return r.status_code < 400
except Exception:
return False
# ── Lifespan ──────────────────────────────────────────────────────────────────
@asynccontextmanager
async def lifespan(app_: Any):
global _cfg, _config_error, _matrix_reachable, _gateway_reachable, _room_map
try:
_cfg = load_config()
# Parse room mapping
_room_map = parse_room_map(
os.getenv("BRIDGE_ROOM_MAP", ""),
_cfg.bridge_allowed_agents,
)
logger.info(
"✅ matrix-bridge-dagi started | node=%s build=%s homeserver=%s "
"room=%s agents=%s mappings=%d",
_cfg.node_id, _cfg.build_sha, _cfg.matrix_homeserver_url,
_cfg.sofiia_room_id, list(_cfg.bridge_allowed_agents),
_room_map.total_mappings,
)
# Connectivity smoke probes (non-blocking failures)
_matrix_reachable = await _probe_url(
f"{_cfg.matrix_homeserver_url}/_matrix/client/versions"
)
_gateway_reachable = await _probe_url(
f"{_cfg.dagi_gateway_url}/health"
)
if _matrix_reachable:
logger.info("✅ Matrix homeserver reachable: %s", _cfg.matrix_homeserver_url)
else:
logger.warning("⚠️ Matrix homeserver NOT reachable: %s", _cfg.matrix_homeserver_url)
if _gateway_reachable:
logger.info("✅ DAGI Gateway reachable: %s", _cfg.dagi_gateway_url)
else:
logger.warning("⚠️ DAGI Gateway NOT reachable: %s", _cfg.dagi_gateway_url)
if _PROM_OK:
_bridge_up.set(1)
# Start ingress loop (fire-and-forget asyncio task)
if _room_map and _room_map.total_mappings > 0:
_ingress_stop = asyncio.Event()
def _on_msg(room_id: str, agent_id: str) -> None:
if _PROM_OK:
_messages_received.labels(room_id=room_id, agent_id=agent_id).inc()
def _on_gw_error(error_type: str) -> None:
if _PROM_OK:
_gateway_errors.labels(error_type=error_type).inc()
ingress = MatrixIngressLoop(
matrix_homeserver_url=_cfg.matrix_homeserver_url,
matrix_access_token=_cfg.matrix_access_token,
matrix_user_id=_cfg.matrix_user_id,
gateway_url=_cfg.dagi_gateway_url,
node_id=_cfg.node_id,
room_map=_room_map,
on_message_received=_on_msg,
on_gateway_error=_on_gw_error,
)
_ingress_task = asyncio.create_task(
ingress.run(_ingress_stop),
name="matrix_ingress_loop",
)
logger.info("✅ Ingress loop task started")
else:
logger.warning("⚠️ No room mappings — ingress loop NOT started")
except (RuntimeError, ValueError) as exc:
_config_error = str(exc)
logger.error("❌ Config error: %s", _config_error)
if _PROM_OK:
_bridge_up.set(0)
yield
# Shutdown: cancel ingress loop
if _ingress_stop:
_ingress_stop.set()
if _ingress_task and not _ingress_task.done():
_ingress_task.cancel()
try:
await asyncio.wait_for(_ingress_task, timeout=5.0)
except (asyncio.CancelledError, asyncio.TimeoutError):
pass
logger.info("matrix-bridge-dagi shutting down")
# ── App ───────────────────────────────────────────────────────────────────────
app = FastAPI(
title="matrix-bridge-dagi",
version="0.1.0",
lifespan=lifespan,
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["GET", "POST"],
allow_headers=["*"],
)
# ── Health ────────────────────────────────────────────────────────────────────
@app.get("/health")
async def health() -> Dict[str, Any]:
uptime = int(time.monotonic() - _START_TIME)
if _config_error or _cfg is None:
return {
"ok": False,
"service": "matrix-bridge-dagi",
"version": "0.1.0",
"build": os.getenv("BUILD_SHA", "dev"),
"uptime_s": uptime,
"error": _config_error or "service not initialised",
}
matrix_ok = _matrix_reachable is True
gateway_ok = _gateway_reachable is True
overall_ok = matrix_ok and gateway_ok
return {
"ok": overall_ok,
"service": "matrix-bridge-dagi",
"version": "0.1.0",
"build": _cfg.build_sha,
"build_time": _cfg.build_time,
"env": os.getenv("ENV", "dev"),
"uptime_s": uptime,
"node_id": _cfg.node_id,
"homeserver": _cfg.matrix_homeserver_url,
"matrix_reachable": _matrix_reachable,
"bridge_user": _cfg.matrix_user_id,
"sofiia_room_id": _cfg.sofiia_room_id,
"allowed_agents": list(_cfg.bridge_allowed_agents),
"gateway": _cfg.dagi_gateway_url,
"gateway_reachable": _gateway_reachable,
"mappings_count": _room_map.total_mappings if _room_map else 0,
"config_ok": True,
}
# ── Bridge Mappings (read-only ops endpoint) ───────────────────────────────────
@app.get("/bridge/mappings")
async def bridge_mappings() -> Dict[str, Any]:
"""
Returns room-to-agent mapping summary.
Safe for ops visibility — no secrets included.
"""
if _cfg is None or _room_map is None:
return {
"ok": False,
"error": _config_error or "service not initialised",
"mappings": [],
}
return {
"ok": True,
"total": _room_map.total_mappings,
"allowed_agents": list(_cfg.bridge_allowed_agents),
"mappings": _room_map.as_summary(),
}
# ── Metrics ───────────────────────────────────────────────────────────────────
@app.get("/metrics")
async def metrics():
if not _PROM_OK:
return Response("# prometheus_client not available\n", media_type="text/plain")
return Response(generate_latest(REGISTRY), media_type=CONTENT_TYPE_LATEST)