""" matrix-bridge-dagi — Phase M1 scaffold Bridges Matrix/Element rooms to DAGI agents via Gateway. M1 scope: 1 room ↔ 1 agent (Sofiia), audit via sofiia-console internal endpoint. """ import asyncio import logging import os import time from contextlib import asynccontextmanager from typing import Any, Dict, Optional from fastapi import FastAPI, Response from fastapi.middleware.cors import CORSMiddleware try: import httpx as _httpx _HTTPX_OK = True except ImportError: # pragma: no cover _httpx = None # type: ignore _HTTPX_OK = False try: from prometheus_client import ( Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST, CollectorRegistry, REGISTRY, ) _PROM_OK = True except ImportError: # pragma: no cover _PROM_OK = False from .config import BridgeConfig, load_config from .ingress import MatrixIngressLoop from .room_mapping import RoomMappingConfig, parse_room_map logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s", ) logger = logging.getLogger("matrix-bridge-dagi") # ── Prometheus metrics ──────────────────────────────────────────────────────── if _PROM_OK: _messages_received = Counter( "matrix_bridge_messages_received_total", "Total Matrix messages received", ["room_id", "agent_id"], ) _messages_replied = Counter( "matrix_bridge_messages_replied_total", "Total agent replies sent to Matrix", ["room_id", "agent_id", "status"], ) _gateway_errors = Counter( "matrix_bridge_gateway_errors_total", "Errors calling DAGI gateway", ["error_type"], ) _invoke_latency = Histogram( "matrix_bridge_invoke_duration_seconds", "Duration of DAGI invoke call", ["agent_id"], ) _bridge_up = Gauge( "matrix_bridge_up", "1 if bridge started successfully", ) # ── Startup state ───────────────────────────────────────────────────────────── _START_TIME = time.monotonic() _cfg: Optional[BridgeConfig] = None _config_error: Optional[str] = None _matrix_reachable: Optional[bool] = None # probed at startup _gateway_reachable: Optional[bool] = None # probed at startup _room_map: Optional[RoomMappingConfig] = None _ingress_task: Optional[asyncio.Task] = None _ingress_stop: Optional[asyncio.Event] = None async def _probe_url(url: str, timeout: float = 5.0) -> bool: """Quick GET probe — returns True if HTTP 2xx.""" if not _HTTPX_OK or not url: return False try: async with _httpx.AsyncClient(timeout=timeout) as client: r = await client.get(url) return r.status_code < 400 except Exception: return False # ── Lifespan ────────────────────────────────────────────────────────────────── @asynccontextmanager async def lifespan(app_: Any): global _cfg, _config_error, _matrix_reachable, _gateway_reachable, _room_map try: _cfg = load_config() # Parse room mapping _room_map = parse_room_map( os.getenv("BRIDGE_ROOM_MAP", ""), _cfg.bridge_allowed_agents, ) logger.info( "✅ matrix-bridge-dagi started | node=%s build=%s homeserver=%s " "room=%s agents=%s mappings=%d", _cfg.node_id, _cfg.build_sha, _cfg.matrix_homeserver_url, _cfg.sofiia_room_id, list(_cfg.bridge_allowed_agents), _room_map.total_mappings, ) # Connectivity smoke probes (non-blocking failures) _matrix_reachable = await _probe_url( f"{_cfg.matrix_homeserver_url}/_matrix/client/versions" ) _gateway_reachable = await _probe_url( f"{_cfg.dagi_gateway_url}/health" ) if _matrix_reachable: logger.info("✅ Matrix homeserver reachable: %s", _cfg.matrix_homeserver_url) else: logger.warning("⚠️ Matrix homeserver NOT reachable: %s", _cfg.matrix_homeserver_url) if _gateway_reachable: logger.info("✅ DAGI Gateway reachable: %s", _cfg.dagi_gateway_url) else: logger.warning("⚠️ DAGI Gateway NOT reachable: %s", _cfg.dagi_gateway_url) if _PROM_OK: _bridge_up.set(1) # Start ingress loop (fire-and-forget asyncio task) if _room_map and _room_map.total_mappings > 0: _ingress_stop = asyncio.Event() def _on_msg(room_id: str, agent_id: str) -> None: if _PROM_OK: _messages_received.labels(room_id=room_id, agent_id=agent_id).inc() def _on_gw_error(error_type: str) -> None: if _PROM_OK: _gateway_errors.labels(error_type=error_type).inc() def _on_replied(room_id: str, agent_id: str, status: str) -> None: if _PROM_OK: _messages_replied.labels( room_id=room_id, agent_id=agent_id, status=status ).inc() ingress = MatrixIngressLoop( matrix_homeserver_url=_cfg.matrix_homeserver_url, matrix_access_token=_cfg.matrix_access_token, matrix_user_id=_cfg.matrix_user_id, router_url=_cfg.dagi_gateway_url, node_id=_cfg.node_id, room_map=_room_map, sofiia_console_url=_cfg.sofiia_console_url, sofiia_internal_token=_cfg.sofiia_internal_token, on_message_received=_on_msg, on_message_replied=_on_replied, on_gateway_error=_on_gw_error, ) _ingress_task = asyncio.create_task( ingress.run(_ingress_stop), name="matrix_ingress_loop", ) logger.info("✅ Ingress loop task started") else: logger.warning("⚠️ No room mappings — ingress loop NOT started") except (RuntimeError, ValueError) as exc: _config_error = str(exc) logger.error("❌ Config error: %s", _config_error) if _PROM_OK: _bridge_up.set(0) yield # Shutdown: cancel ingress loop if _ingress_stop: _ingress_stop.set() if _ingress_task and not _ingress_task.done(): _ingress_task.cancel() try: await asyncio.wait_for(_ingress_task, timeout=5.0) except (asyncio.CancelledError, asyncio.TimeoutError): pass logger.info("matrix-bridge-dagi shutting down") # ── App ─────────────────────────────────────────────────────────────────────── app = FastAPI( title="matrix-bridge-dagi", version="0.1.0", lifespan=lifespan, ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["GET", "POST"], allow_headers=["*"], ) # ── Health ──────────────────────────────────────────────────────────────────── @app.get("/health") async def health() -> Dict[str, Any]: uptime = int(time.monotonic() - _START_TIME) if _config_error or _cfg is None: return { "ok": False, "service": "matrix-bridge-dagi", "version": "0.1.0", "build": os.getenv("BUILD_SHA", "dev"), "uptime_s": uptime, "error": _config_error or "service not initialised", } matrix_ok = _matrix_reachable is True gateway_ok = _gateway_reachable is True overall_ok = matrix_ok and gateway_ok return { "ok": overall_ok, "service": "matrix-bridge-dagi", "version": "0.1.0", "build": _cfg.build_sha, "build_time": _cfg.build_time, "env": os.getenv("ENV", "dev"), "uptime_s": uptime, "node_id": _cfg.node_id, "homeserver": _cfg.matrix_homeserver_url, "matrix_reachable": _matrix_reachable, "bridge_user": _cfg.matrix_user_id, "sofiia_room_id": _cfg.sofiia_room_id, "allowed_agents": list(_cfg.bridge_allowed_agents), "gateway": _cfg.dagi_gateway_url, "gateway_reachable": _gateway_reachable, "mappings_count": _room_map.total_mappings if _room_map else 0, "config_ok": True, } # ── Bridge Mappings (read-only ops endpoint) ─────────────────────────────────── @app.get("/bridge/mappings") async def bridge_mappings() -> Dict[str, Any]: """ Returns room-to-agent mapping summary. Safe for ops visibility — no secrets included. """ if _cfg is None or _room_map is None: return { "ok": False, "error": _config_error or "service not initialised", "mappings": [], } return { "ok": True, "total": _room_map.total_mappings, "allowed_agents": list(_cfg.bridge_allowed_agents), "mappings": _room_map.as_summary(), } # ── Metrics ─────────────────────────────────────────────────────────────────── @app.get("/metrics") async def metrics(): if not _PROM_OK: return Response("# prometheus_client not available\n", media_type="text/plain") return Response(generate_latest(REGISTRY), media_type=CONTENT_TYPE_LATEST)