merge: integrate remote codex/sync-node1-runtime with fabric layer changes
Resolve conflicts in docker-compose.node1.yml, services/router/main.py, and gateway-bot/services/doc_service.py — keeping both fabric layer (NCS, node-worker, Prometheus) and document ingest/query endpoints. Made-with: Cursor
This commit is contained in:
@@ -3,7 +3,7 @@ FROM python:3.11-slim
|
||||
|
||||
LABEL maintainer="DAARION.city Team"
|
||||
LABEL description="Bot Gateway - Telegram/Discord webhook handler with DAARWIZZ"
|
||||
LABEL version="0.2.0"
|
||||
LABEL version="0.2.1"
|
||||
|
||||
WORKDIR /app/gateway-bot
|
||||
|
||||
@@ -15,7 +15,15 @@ RUN pip install --no-cache-dir \
|
||||
uvicorn==0.27.0 \
|
||||
httpx==0.26.0 \
|
||||
pydantic==2.5.3 \
|
||||
python-multipart==0.0.6 prometheus-client>=0.20.0 PyPDF2>=3.0.0 crewai nats-py pandas openpyxl
|
||||
python-multipart==0.0.6 \
|
||||
prometheus-client==0.22.1 \
|
||||
PyPDF2>=3.0.0 \
|
||||
crewai \
|
||||
nats-py \
|
||||
pandas \
|
||||
openpyxl \
|
||||
python-docx \
|
||||
redis==5.0.1
|
||||
|
||||
# Copy gateway code and DAARWIZZ prompt
|
||||
COPY . .
|
||||
|
||||
@@ -19,7 +19,8 @@
|
||||
"onboarding",
|
||||
"ecosystem"
|
||||
],
|
||||
"mentor": null
|
||||
"mentor": null,
|
||||
"district_id": "city-core"
|
||||
},
|
||||
"helion": {
|
||||
"display_name": "Helion",
|
||||
@@ -35,7 +36,8 @@
|
||||
"market_analysis",
|
||||
"biominer"
|
||||
],
|
||||
"mentor": null
|
||||
"mentor": null,
|
||||
"district_id": "helion"
|
||||
},
|
||||
"alateya": {
|
||||
"display_name": "Aletheia",
|
||||
@@ -58,7 +60,8 @@
|
||||
"email": "alverjob@gmail.com",
|
||||
"site": "https://alverjob.xyz",
|
||||
"youtube": "https://www.youtube.com/@alverjob72"
|
||||
}
|
||||
},
|
||||
"district_id": "alateya"
|
||||
},
|
||||
"druid": {
|
||||
"display_name": "DRUID",
|
||||
@@ -76,7 +79,8 @@
|
||||
"inci",
|
||||
"safety_basics"
|
||||
],
|
||||
"mentor": null
|
||||
"mentor": null,
|
||||
"district_id": "druid"
|
||||
},
|
||||
"nutra": {
|
||||
"display_name": "NUTRA",
|
||||
@@ -93,7 +97,8 @@
|
||||
"vitamins",
|
||||
"microbiome"
|
||||
],
|
||||
"mentor": null
|
||||
"mentor": null,
|
||||
"district_id": "nutra"
|
||||
},
|
||||
"agromatrix": {
|
||||
"display_name": "Степан Матрікс",
|
||||
@@ -110,7 +115,8 @@
|
||||
"logistics",
|
||||
"farm_economics"
|
||||
],
|
||||
"mentor": null
|
||||
"mentor": null,
|
||||
"district_id": "agromatrix"
|
||||
},
|
||||
"greenfood": {
|
||||
"display_name": "GREENFOOD",
|
||||
@@ -127,7 +133,8 @@
|
||||
"food_production",
|
||||
"sales"
|
||||
],
|
||||
"mentor": null
|
||||
"mentor": null,
|
||||
"district_id": "greenfood"
|
||||
},
|
||||
"clan": {
|
||||
"display_name": "CLAN",
|
||||
@@ -143,7 +150,8 @@
|
||||
"culture",
|
||||
"facilitation"
|
||||
],
|
||||
"mentor": null
|
||||
"mentor": null,
|
||||
"district_id": "clan"
|
||||
},
|
||||
"eonarch": {
|
||||
"display_name": "EONARCH",
|
||||
@@ -159,7 +167,8 @@
|
||||
"transformation",
|
||||
"spirituality"
|
||||
],
|
||||
"mentor": null
|
||||
"mentor": null,
|
||||
"district_id": "eonarch"
|
||||
},
|
||||
"yaromir": {
|
||||
"display_name": "YAROMIR",
|
||||
@@ -175,7 +184,8 @@
|
||||
"code_review",
|
||||
"strategy"
|
||||
],
|
||||
"mentor": null
|
||||
"mentor": null,
|
||||
"district_id": "city-core"
|
||||
},
|
||||
"soul": {
|
||||
"display_name": "SOUL",
|
||||
@@ -191,7 +201,8 @@
|
||||
"values",
|
||||
"wellbeing"
|
||||
],
|
||||
"mentor": null
|
||||
"mentor": null,
|
||||
"district_id": "soul"
|
||||
},
|
||||
"senpai": {
|
||||
"display_name": "SENPAI",
|
||||
@@ -207,7 +218,8 @@
|
||||
"defi",
|
||||
"portfolio"
|
||||
],
|
||||
"mentor": null
|
||||
"mentor": null,
|
||||
"district_id": "senpai"
|
||||
},
|
||||
"oneok": {
|
||||
"display_name": "1OK",
|
||||
@@ -227,7 +239,8 @@
|
||||
"mentor": {
|
||||
"name": "Ілля Титар",
|
||||
"telegram": "@Titar240581"
|
||||
}
|
||||
},
|
||||
"district_id": "city-core"
|
||||
},
|
||||
"sofiia": {
|
||||
"display_name": "Sophia",
|
||||
@@ -242,7 +255,24 @@
|
||||
"platform_evolution",
|
||||
"technical_leadership"
|
||||
],
|
||||
"mentor": null
|
||||
"mentor": null,
|
||||
"district_id": "city-core"
|
||||
},
|
||||
"dario": {
|
||||
"display_name": "DARIO",
|
||||
"canonical_role": "Future DAARION Agent (planned, not launched)",
|
||||
"prompt_file": "dario_prompt.txt",
|
||||
"telegram_mode": "disabled",
|
||||
"visibility": "private",
|
||||
"status": "planned",
|
||||
"district_id": "city-core",
|
||||
"domains": [
|
||||
"city_ops",
|
||||
"coordination",
|
||||
"support"
|
||||
],
|
||||
"mentor": null,
|
||||
"launch_state": "planned"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,7 +32,9 @@
|
||||
|
||||
## B. SHORT-FIRST
|
||||
|
||||
**За замовчуванням: структурована відповідь з 3-5 пунктів.**
|
||||
**За замовчуванням: жива коротка відповідь 1-3 речення природною мовою.**
|
||||
**Маркерні списки/шаблони 3-5 пунктів використовуй тільки коли користувач просить детально, план, чеклист або розрахунок.**
|
||||
**Перше повідомлення в новій темі — розмовне, без канцеляриту та без "робото-тону".**
|
||||
|
||||
ЗАБОРОНЕНО:
|
||||
- "Радий допомогти", "Готовий до співпраці"
|
||||
@@ -55,7 +57,9 @@
|
||||
|
||||
**ВАЖЛИВО:**
|
||||
- Ніколи не кажи "я не можу слухати аудіо" — голосові повідомлення вже перетворені на текст!
|
||||
- Ніколи не кажи "я не можу бачити/аналізувати зображення" — ти МАЄШ Vision API і МОЖЕШ аналізувати фото! Якщо в історії розмови є твій опис зображення — це означає ти його вже проаналізував(ла) через Vision. Не заперечуй це.
|
||||
- Фото аналізуй по доступному поточному контексту: якщо зображення є у запиті або щойно надіслане — коментуй по суті.
|
||||
- Якщо для точного висновку бракує самого файлу чи чіткості, поясни це простою людською мовою і попроси надіслати фото повторно з уточненням, що саме перевірити.
|
||||
- Не використовуй службові фрази типу "text-only", "vision unavailable", "технічне обмеження моделі".
|
||||
|
||||
Початковий режим: учень. Спочатку став уточнювальні питання і вчися у ментора.
|
||||
Публічна група: @agromatrix.
|
||||
@@ -94,7 +98,8 @@
|
||||
- Мислиш далекоглядно: пропонуєш архітектуру рішення, а не латання симптомів.
|
||||
- Будь креативним, але не фантазуй дані: якщо фактів нема — позначай як припущення і пропонуй, що зібрати.
|
||||
- Спілкуйся українською (якщо користувач не перейшов на іншу мову).
|
||||
- Форматуй відповіді структуровано: заголовки, списки, короткі блоки, пріоритети.
|
||||
- Тримай розмовний тон: короткі природні фрази, без надмірної шаблонності.
|
||||
- Структурований формат (заголовки/списки/таблиці) вмикай лише для складних задач або коли це прямо запитали.
|
||||
|
||||
### 4) Принципи роботи з користувачем
|
||||
1. Спочатку контекст → потім рішення. Якщо контексту бракує — зроби мінімальний набір припущень і паралельно запропонуй, які дані уточнити.
|
||||
@@ -113,6 +118,8 @@
|
||||
- “Підготуй текст/структуру сторінки/презентації для продукту AgroMatrix”
|
||||
|
||||
### 6) Як ти формуєш відповіді (стандартний шаблон)
|
||||
Використовуй цей шаблон ТІЛЬКИ для комплексних запитів (планування сезону, економіка, SOP, інтеграції, ТЗ).
|
||||
Для звичайних коротких питань відповідай в 1-3 речення органічно, без обов'язкових секцій.
|
||||
1. Ціль (1–2 речення)
|
||||
2. Вхідні дані (що відомо / які припущення)
|
||||
3. Рішення (план/алгоритм/кроки)
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
"""
|
||||
FastAPI app instance for Gateway Bot
|
||||
"""
|
||||
"""FastAPI app instance for Gateway Bot."""
|
||||
import logging
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from http_api import router as gateway_router
|
||||
from http_api_doc import router as doc_router
|
||||
from daarion_facade.invoke_api import router as invoke_router
|
||||
from daarion_facade.registry_api import router as registry_router
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
@@ -15,36 +16,47 @@ logging.basicConfig(
|
||||
|
||||
app = FastAPI(
|
||||
title="Bot Gateway with DAARWIZZ",
|
||||
version="1.0.0",
|
||||
description="Gateway service for Telegram/Discord bots → DAGI Router"
|
||||
version="1.1.0",
|
||||
description="Gateway service for Telegram/Discord bots + DAARION public facade"
|
||||
)
|
||||
|
||||
# CORS middleware
|
||||
# CORS for web UI clients (gateway only).
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_origins=[
|
||||
"https://daarion.city",
|
||||
"https://www.daarion.city",
|
||||
"http://localhost:3000",
|
||||
],
|
||||
allow_origin_regex=r"https://.*\.lovable\.app",
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
allow_methods=["GET", "POST", "OPTIONS"],
|
||||
allow_headers=["Authorization", "Content-Type"],
|
||||
)
|
||||
|
||||
# Include gateway routes
|
||||
# Existing gateway routes.
|
||||
app.include_router(gateway_router, prefix="", tags=["gateway"])
|
||||
app.include_router(doc_router, prefix="", tags=["docs"])
|
||||
|
||||
# Public facade routes for DAARION.city UI.
|
||||
app.include_router(registry_router)
|
||||
app.include_router(invoke_router)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {
|
||||
"service": "bot-gateway",
|
||||
"version": "1.0.0",
|
||||
"version": "1.1.0",
|
||||
"agent": "DAARWIZZ",
|
||||
"endpoints": [
|
||||
"POST /telegram/webhook",
|
||||
"POST /discord/webhook",
|
||||
"POST /api/doc/parse",
|
||||
"POST /api/doc/ingest",
|
||||
"POST /api/doc/ask",
|
||||
"GET /api/doc/context/{session_id}",
|
||||
"GET /health"
|
||||
"GET /v1/registry/agents",
|
||||
"GET /v1/registry/districts",
|
||||
"GET /v1/metrics",
|
||||
"POST /v1/invoke",
|
||||
"GET /v1/jobs/{job_id}",
|
||||
"GET /health",
|
||||
]
|
||||
}
|
||||
|
||||
1
gateway-bot/daarion_facade/__init__.py
Normal file
1
gateway-bot/daarion_facade/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""DAARION public facade package."""
|
||||
212
gateway-bot/daarion_facade/invoke_api.py
Normal file
212
gateway-bot/daarion_facade/invoke_api.py
Normal file
@@ -0,0 +1,212 @@
|
||||
import asyncio
|
||||
from datetime import datetime, timezone
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter, HTTPException, Request, status
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .redis_jobs import create_job, enqueue_job, get_job
|
||||
from .registry_api import _load_registry
|
||||
|
||||
router = APIRouter(prefix="/v1", tags=["daarion-facade"])
|
||||
|
||||
EVENT_TERMINAL_STATUSES = {"done", "failed"}
|
||||
EVENT_KNOWN_STATUSES = {"queued", "running", "done", "failed"}
|
||||
EVENT_POLL_SECONDS = float(os.getenv("DAARION_JOB_EVENTS_POLL_SECONDS", "0.5"))
|
||||
ROUTER_URL = os.getenv("ROUTER_URL", "http://router:8000").rstrip("/")
|
||||
ROUTER_REVIEW_TIMEOUT = float(os.getenv("DAARION_ROUTER_REVIEW_TIMEOUT_SECONDS", "20"))
|
||||
AGROMATRIX_REVIEW_AUTH_MODE = os.getenv("AGROMATRIX_REVIEW_AUTH_MODE", "bearer").strip().lower()
|
||||
AGROMATRIX_REVIEW_BEARER_TOKENS = [
|
||||
part.strip()
|
||||
for part in os.getenv("AGROMATRIX_REVIEW_BEARER_TOKENS", "").replace(";", ",").split(",")
|
||||
if part.strip()
|
||||
]
|
||||
|
||||
|
||||
class InvokeInput(BaseModel):
|
||||
prompt: str = Field(min_length=1)
|
||||
images: List[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class InvokeRequest(BaseModel):
|
||||
agent_id: str
|
||||
input: InvokeInput
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class InvokeResponse(BaseModel):
|
||||
job_id: str
|
||||
status: str
|
||||
status_url: str
|
||||
|
||||
|
||||
class SharedMemoryReviewRequest(BaseModel):
|
||||
point_id: str
|
||||
approve: bool
|
||||
reviewer: str | None = None
|
||||
note: str | None = None
|
||||
|
||||
|
||||
def _extract_bearer_token(request: Request) -> str:
|
||||
auth_header = request.headers.get("Authorization", "")
|
||||
if not auth_header.startswith("Bearer "):
|
||||
raise HTTPException(status_code=401, detail="Missing Bearer token")
|
||||
token = auth_header[len("Bearer ") :].strip()
|
||||
if not token:
|
||||
raise HTTPException(status_code=401, detail="Empty Bearer token")
|
||||
return token
|
||||
|
||||
|
||||
def _require_mentor_auth(request: Request) -> str:
|
||||
mode = AGROMATRIX_REVIEW_AUTH_MODE
|
||||
if mode in {"off", "none", "disabled"}:
|
||||
return ""
|
||||
if mode != "bearer":
|
||||
raise HTTPException(status_code=500, detail=f"Unsupported AGROMATRIX_REVIEW_AUTH_MODE={mode}")
|
||||
if not AGROMATRIX_REVIEW_BEARER_TOKENS:
|
||||
raise HTTPException(status_code=503, detail="Review auth is not configured")
|
||||
token = _extract_bearer_token(request)
|
||||
if not any(hmac.compare_digest(token, candidate) for candidate in AGROMATRIX_REVIEW_BEARER_TOKENS):
|
||||
raise HTTPException(status_code=403, detail="Invalid mentor token")
|
||||
return token
|
||||
|
||||
|
||||
async def _router_json(
|
||||
method: str,
|
||||
path: str,
|
||||
*,
|
||||
payload: Dict[str, Any] | None = None,
|
||||
params: Dict[str, Any] | None = None,
|
||||
authorization: str | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
headers: Dict[str, str] = {}
|
||||
if authorization:
|
||||
headers["Authorization"] = authorization
|
||||
url = f"{ROUTER_URL}{path}"
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=ROUTER_REVIEW_TIMEOUT) as client:
|
||||
resp = await client.request(method, url, json=payload, params=params, headers=headers)
|
||||
except httpx.TimeoutException:
|
||||
raise HTTPException(status_code=504, detail="Router timeout")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=502, detail=f"Router unavailable: {e}")
|
||||
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception:
|
||||
body = {"raw": resp.text}
|
||||
|
||||
if resp.status_code >= 400:
|
||||
detail = body.get("detail") if isinstance(body, dict) else body
|
||||
raise HTTPException(status_code=resp.status_code, detail=detail or f"Router error {resp.status_code}")
|
||||
return body if isinstance(body, dict) else {"data": body}
|
||||
|
||||
|
||||
def _sse_message(event: str, payload: Dict[str, Any]) -> str:
|
||||
return f"event: {event}\ndata: {json.dumps(payload, ensure_ascii=False)}\n\n"
|
||||
|
||||
|
||||
@router.post("/invoke", status_code=status.HTTP_202_ACCEPTED, response_model=InvokeResponse)
|
||||
async def invoke(payload: InvokeRequest) -> InvokeResponse:
|
||||
registry = _load_registry().get("agents", {})
|
||||
if payload.agent_id not in registry:
|
||||
raise HTTPException(status_code=404, detail=f"Unknown agent_id: {payload.agent_id}")
|
||||
|
||||
job_id = f"job_{uuid.uuid4().hex}"
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
job_doc = {
|
||||
"job_id": job_id,
|
||||
"status": "queued",
|
||||
"agent_id": payload.agent_id,
|
||||
"input": payload.input.model_dump(),
|
||||
"metadata": payload.metadata,
|
||||
"result": None,
|
||||
"error": None,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"started_at": None,
|
||||
"finished_at": None,
|
||||
}
|
||||
await create_job(job_id, job_doc)
|
||||
await enqueue_job(job_id)
|
||||
return InvokeResponse(job_id=job_id, status="queued", status_url=f"/v1/jobs/{job_id}")
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}")
|
||||
async def job_status(job_id: str) -> Dict[str, Any]:
|
||||
job = await get_job(job_id)
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
return job
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}/events")
|
||||
async def job_events(job_id: str, request: Request) -> StreamingResponse:
|
||||
existing = await get_job(job_id)
|
||||
if not existing:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
async def event_stream():
|
||||
last_state = None
|
||||
yield "retry: 1000\n\n"
|
||||
|
||||
while True:
|
||||
if await request.is_disconnected():
|
||||
break
|
||||
|
||||
job = await get_job(job_id)
|
||||
if not job:
|
||||
yield _sse_message("failed", {"job_id": job_id, "status": "failed", "error": {"message": "Job not found"}})
|
||||
break
|
||||
|
||||
status_value = str(job.get("status", "unknown"))
|
||||
updated_at = str(job.get("updated_at", ""))
|
||||
state = (status_value, updated_at)
|
||||
|
||||
if state != last_state:
|
||||
event_name = status_value if status_value in EVENT_KNOWN_STATUSES else "status"
|
||||
yield _sse_message(event_name, job)
|
||||
last_state = state
|
||||
|
||||
if status_value in EVENT_TERMINAL_STATUSES:
|
||||
break
|
||||
|
||||
await asyncio.sleep(EVENT_POLL_SECONDS)
|
||||
|
||||
return StreamingResponse(
|
||||
event_stream(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/agromatrix/shared-memory/pending")
|
||||
async def agromatrix_shared_pending(limit: int = 50) -> Dict[str, Any]:
|
||||
return await _router_json(
|
||||
"GET",
|
||||
"/v1/agromatrix/shared-memory/pending",
|
||||
params={"limit": max(1, min(limit, 200))},
|
||||
)
|
||||
|
||||
|
||||
@router.post("/agromatrix/shared-memory/review")
|
||||
async def agromatrix_shared_review(req: SharedMemoryReviewRequest, request: Request) -> Dict[str, Any]:
|
||||
token = _require_mentor_auth(request)
|
||||
auth_header = f"Bearer {token}" if token else None
|
||||
return await _router_json(
|
||||
"POST",
|
||||
"/v1/agromatrix/shared-memory/review",
|
||||
payload=req.model_dump(),
|
||||
authorization=auth_header,
|
||||
)
|
||||
287
gateway-bot/daarion_facade/metrics_poller.py
Normal file
287
gateway-bot/daarion_facade/metrics_poller.py
Normal file
@@ -0,0 +1,287 @@
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import httpx
|
||||
from redis.asyncio import Redis
|
||||
|
||||
from .registry_api import _load_crewai_roles, _load_district_registry, _load_registry
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
|
||||
logger = logging.getLogger("daarion-metrics-poller")
|
||||
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
||||
POLL_INTERVAL_SECONDS = int(os.getenv("DAARION_METRICS_POLL_INTERVAL_SECONDS", "10"))
|
||||
METRICS_TTL_SECONDS = int(os.getenv("DAARION_METRICS_TTL_SECONDS", "60"))
|
||||
HTTP_CONNECT_TIMEOUT_SECONDS = float(os.getenv("DAARION_METRICS_HTTP_CONNECT_TIMEOUT_SECONDS", "2"))
|
||||
HTTP_TOTAL_TIMEOUT_SECONDS = float(os.getenv("DAARION_METRICS_HTTP_TOTAL_TIMEOUT_SECONDS", "5"))
|
||||
NODES_TOTAL = int(os.getenv("DAARION_NODE_COUNT", "1"))
|
||||
MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
|
||||
|
||||
DASHBOARD_KEY = "daarion:metrics:dashboard"
|
||||
DISTRICT_KEY_PREFIX = "daarion:metrics:district"
|
||||
|
||||
_redis: Optional[Redis] = None
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _ensure_url(value: str) -> str:
|
||||
value = (value or "").strip()
|
||||
if not value:
|
||||
return ""
|
||||
if value.startswith("http://") or value.startswith("https://"):
|
||||
return value
|
||||
return f"https://{value}"
|
||||
|
||||
|
||||
def _health_candidates(district: Dict[str, Any]) -> List[str]:
|
||||
base = _ensure_url(str(district.get("domain") or ""))
|
||||
candidates: List[str] = []
|
||||
|
||||
explicit = str(district.get("health_url") or "").strip()
|
||||
if explicit:
|
||||
candidates.append(_ensure_url(explicit))
|
||||
|
||||
if base:
|
||||
candidates.extend(
|
||||
[
|
||||
f"{base}/.well-known/daarion-health.json",
|
||||
f"{base}/health",
|
||||
f"{base}/v1/health",
|
||||
]
|
||||
)
|
||||
|
||||
dedup: List[str] = []
|
||||
seen = set()
|
||||
for url in candidates:
|
||||
if url and url not in seen:
|
||||
dedup.append(url)
|
||||
seen.add(url)
|
||||
return dedup
|
||||
|
||||
|
||||
def _extract_agents_online(payload: Dict[str, Any], agents_total: int) -> Optional[int]:
|
||||
raw = payload.get("agents_online")
|
||||
if isinstance(raw, bool):
|
||||
return agents_total if raw else 0
|
||||
if isinstance(raw, int):
|
||||
return max(0, min(raw, agents_total))
|
||||
|
||||
agents = payload.get("agents")
|
||||
if isinstance(agents, list):
|
||||
count = 0
|
||||
for agent in agents:
|
||||
if not isinstance(agent, dict):
|
||||
continue
|
||||
status = str(agent.get("status", "")).lower()
|
||||
if status in {"online", "active", "ok"}:
|
||||
count += 1
|
||||
return min(count, agents_total)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def redis_client() -> Redis:
|
||||
global _redis
|
||||
if _redis is None:
|
||||
_redis = Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
return _redis
|
||||
|
||||
|
||||
async def close_redis() -> None:
|
||||
global _redis
|
||||
if _redis is not None:
|
||||
await _redis.close()
|
||||
_redis = None
|
||||
|
||||
|
||||
async def _fetch_json_with_latency(
|
||||
client: httpx.AsyncClient,
|
||||
url: str,
|
||||
) -> Tuple[bool, Optional[Dict[str, Any]], Optional[float], Optional[str]]:
|
||||
started = time.perf_counter()
|
||||
try:
|
||||
response = await client.get(url)
|
||||
latency_ms = round((time.perf_counter() - started) * 1000, 2)
|
||||
if response.status_code >= 400:
|
||||
return False, None, latency_ms, f"HTTP {response.status_code}"
|
||||
|
||||
data: Optional[Dict[str, Any]] = None
|
||||
try:
|
||||
parsed = response.json()
|
||||
if isinstance(parsed, dict):
|
||||
data = parsed
|
||||
except Exception:
|
||||
data = None
|
||||
|
||||
return True, data, latency_ms, None
|
||||
except Exception as e:
|
||||
latency_ms = round((time.perf_counter() - started) * 1000, 2)
|
||||
return False, None, latency_ms, str(e)
|
||||
|
||||
|
||||
async def _read_memory_vectors(client: httpx.AsyncClient) -> int:
|
||||
try:
|
||||
ok, payload, _, _ = await _fetch_json_with_latency(client, f"{MEMORY_SERVICE_URL}/health")
|
||||
if not ok or not payload:
|
||||
return 0
|
||||
return int(payload.get("vector_store", {}).get("memories", {}).get("vectors_count", 0) or 0)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
async def _registry_snapshot() -> Tuple[List[Dict[str, Any]], Dict[str, List[Dict[str, Any]]], int, int]:
|
||||
raw_districts = _load_district_registry().get("districts", [])
|
||||
districts = [d for d in raw_districts if isinstance(d, dict) and d.get("district_id")]
|
||||
|
||||
agents_map = _load_registry().get("agents", {})
|
||||
role_counts = await _load_crewai_roles()
|
||||
|
||||
by_district: Dict[str, List[Dict[str, Any]]] = {}
|
||||
subagents_total = 0
|
||||
|
||||
for aid, cfg in agents_map.items():
|
||||
if not isinstance(cfg, dict):
|
||||
continue
|
||||
aid_str = str(aid)
|
||||
district_id = str(cfg.get("district_id") or "city-core")
|
||||
subagents_total += int(role_counts.get(aid_str, 0))
|
||||
|
||||
by_district.setdefault(district_id, []).append(
|
||||
{
|
||||
"agent_id": aid_str,
|
||||
"status": str(cfg.get("status", "active")),
|
||||
}
|
||||
)
|
||||
|
||||
return districts, by_district, len(agents_map), subagents_total
|
||||
|
||||
|
||||
async def build_dashboard() -> Dict[str, Any]:
|
||||
districts, agents_by_district, agents_total, subagents_total = await _registry_snapshot()
|
||||
timeout = httpx.Timeout(timeout=HTTP_TOTAL_TIMEOUT_SECONDS, connect=HTTP_CONNECT_TIMEOUT_SECONDS)
|
||||
|
||||
by_district: List[Dict[str, Any]] = []
|
||||
districts_online = 0
|
||||
agents_online_total = 0
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
|
||||
memory_vectors = await _read_memory_vectors(client)
|
||||
|
||||
for district in districts:
|
||||
district_id = str(district.get("district_id"))
|
||||
title = district.get("title") or district_id
|
||||
domain = str(district.get("domain") or "")
|
||||
status = district.get("status") or "active"
|
||||
members = agents_by_district.get(district_id, [])
|
||||
agents_total_district = len(members)
|
||||
|
||||
sample = {
|
||||
"district_id": district_id,
|
||||
"title": title,
|
||||
"domain": domain,
|
||||
"status": status,
|
||||
"ok": False,
|
||||
"agents_total": agents_total_district,
|
||||
"agents_online": 0,
|
||||
"latency_ms": None,
|
||||
"last_check_ts": _now_iso(),
|
||||
"error": None,
|
||||
}
|
||||
|
||||
last_error = "No health endpoint configured"
|
||||
for candidate in _health_candidates(district):
|
||||
ok, payload, latency_ms, error_message = await _fetch_json_with_latency(client, candidate)
|
||||
sample["latency_ms"] = latency_ms
|
||||
if ok:
|
||||
sample["ok"] = True
|
||||
sample["error"] = None
|
||||
inferred = _extract_agents_online(payload or {}, agents_total_district)
|
||||
sample["agents_online"] = inferred if inferred is not None else agents_total_district
|
||||
break
|
||||
last_error = error_message or "health check failed"
|
||||
|
||||
if sample["ok"]:
|
||||
districts_online += 1
|
||||
agents_online_total += int(sample.get("agents_online") or 0)
|
||||
else:
|
||||
sample["error"] = {"message": last_error}
|
||||
|
||||
by_district.append(sample)
|
||||
|
||||
return {
|
||||
"global": {
|
||||
"nodes": NODES_TOTAL,
|
||||
"districts": len(districts),
|
||||
"agents": agents_total,
|
||||
"subagents": subagents_total,
|
||||
"memory_vectors": memory_vectors,
|
||||
"districts_online": districts_online,
|
||||
"agents_online": agents_online_total,
|
||||
},
|
||||
"by_district": by_district,
|
||||
"updated_at": _now_iso(),
|
||||
}
|
||||
|
||||
|
||||
async def publish_dashboard(dashboard: Dict[str, Any]) -> None:
|
||||
redis = await redis_client()
|
||||
payload = json.dumps(dashboard, ensure_ascii=False)
|
||||
await redis.set(DASHBOARD_KEY, payload, ex=METRICS_TTL_SECONDS)
|
||||
|
||||
for row in dashboard.get("by_district", []):
|
||||
district_id = row.get("district_id")
|
||||
if not district_id:
|
||||
continue
|
||||
key = f"{DISTRICT_KEY_PREFIX}:{district_id}"
|
||||
await redis.set(key, json.dumps(row, ensure_ascii=False), ex=METRICS_TTL_SECONDS)
|
||||
|
||||
|
||||
async def run_once() -> None:
|
||||
dashboard = await build_dashboard()
|
||||
await publish_dashboard(dashboard)
|
||||
logger.info(
|
||||
"dashboard_updated districts=%s districts_online=%s agents=%s agents_online=%s",
|
||||
dashboard["global"].get("districts"),
|
||||
dashboard["global"].get("districts_online"),
|
||||
dashboard["global"].get("agents"),
|
||||
dashboard["global"].get("agents_online"),
|
||||
)
|
||||
|
||||
|
||||
async def worker_loop() -> None:
|
||||
logger.info(
|
||||
"metrics_poller_started interval=%ss ttl=%ss redis=%s",
|
||||
POLL_INTERVAL_SECONDS,
|
||||
METRICS_TTL_SECONDS,
|
||||
REDIS_URL,
|
||||
)
|
||||
while True:
|
||||
started = time.perf_counter()
|
||||
try:
|
||||
await run_once()
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception:
|
||||
logger.exception("metrics_poller_cycle_failed")
|
||||
|
||||
elapsed = time.perf_counter() - started
|
||||
sleep_for = max(1.0, POLL_INTERVAL_SECONDS - elapsed)
|
||||
await asyncio.sleep(sleep_for)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(worker_loop())
|
||||
finally:
|
||||
try:
|
||||
asyncio.run(close_redis())
|
||||
except Exception:
|
||||
pass
|
||||
84
gateway-bot/daarion_facade/redis_jobs.py
Normal file
84
gateway-bot/daarion_facade/redis_jobs.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from redis.asyncio import Redis
|
||||
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
||||
JOB_KEY_PREFIX = "daarion:jobs"
|
||||
QUEUE_KEY = "daarion:jobs:queue"
|
||||
JOB_TTL_SECONDS = int(os.getenv("DAARION_JOB_TTL_SECONDS", str(72 * 3600)))
|
||||
|
||||
_redis: Optional[Redis] = None
|
||||
|
||||
|
||||
def _job_key(job_id: str) -> str:
|
||||
return f"{JOB_KEY_PREFIX}:{job_id}"
|
||||
|
||||
|
||||
async def redis_client() -> Redis:
|
||||
global _redis
|
||||
if _redis is None:
|
||||
_redis = Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
return _redis
|
||||
|
||||
|
||||
async def close_redis() -> None:
|
||||
global _redis
|
||||
if _redis is not None:
|
||||
await _redis.close()
|
||||
_redis = None
|
||||
|
||||
|
||||
async def create_job(job_id: str, payload: Dict[str, Any]) -> None:
|
||||
r = await redis_client()
|
||||
key = _job_key(job_id)
|
||||
await r.set(key, json.dumps(payload, ensure_ascii=False), ex=JOB_TTL_SECONDS)
|
||||
|
||||
|
||||
async def get_job(job_id: str) -> Optional[Dict[str, Any]]:
|
||||
r = await redis_client()
|
||||
raw = await r.get(_job_key(job_id))
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
|
||||
async def update_job(job_id: str, patch: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
current = await get_job(job_id)
|
||||
if not current:
|
||||
return None
|
||||
current.update(patch)
|
||||
await create_job(job_id, current)
|
||||
return current
|
||||
|
||||
|
||||
async def enqueue_job(job_id: str) -> None:
|
||||
r = await redis_client()
|
||||
await r.lpush(QUEUE_KEY, job_id)
|
||||
|
||||
|
||||
async def dequeue_job(block_seconds: int = 5) -> Optional[str]:
|
||||
r = await redis_client()
|
||||
result = await r.brpop(QUEUE_KEY, timeout=block_seconds)
|
||||
if not result:
|
||||
return None
|
||||
_, job_id = result
|
||||
return job_id
|
||||
|
||||
|
||||
async def wait_for_redis(timeout_seconds: int = 30) -> None:
|
||||
deadline = asyncio.get_running_loop().time() + timeout_seconds
|
||||
while True:
|
||||
try:
|
||||
r = await redis_client()
|
||||
await r.ping()
|
||||
return
|
||||
except Exception:
|
||||
if asyncio.get_running_loop().time() >= deadline:
|
||||
raise
|
||||
await asyncio.sleep(1)
|
||||
268
gateway-bot/daarion_facade/registry_api.py
Normal file
268
gateway-bot/daarion_facade/registry_api.py
Normal file
@@ -0,0 +1,268 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter
|
||||
from redis.asyncio import Redis
|
||||
|
||||
router = APIRouter(prefix="/v1", tags=["daarion-facade"])
|
||||
|
||||
REGISTRY_CACHE_TTL = int(os.getenv("REGISTRY_CACHE_TTL", "30"))
|
||||
MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
|
||||
CREWAI_SERVICE_URL = os.getenv("CREWAI_SERVICE_URL", "http://dagi-staging-crewai-service:9010")
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
||||
METRICS_DASHBOARD_KEY = "daarion:metrics:dashboard"
|
||||
|
||||
_REGISTRY_CACHE: Dict[str, Any] = {"loaded_at": 0.0, "data": None}
|
||||
_DISTRICT_CACHE: Dict[str, Any] = {"loaded_at": 0.0, "data": None}
|
||||
_CREWAI_CACHE: Dict[str, Any] = {"loaded_at": 0.0, "data": {}}
|
||||
_REDIS: Optional[Redis] = None
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _registry_paths() -> List[Path]:
|
||||
return [
|
||||
Path("/app/gateway-bot/agent_registry.json"),
|
||||
Path("/opt/microdao-daarion/config/agent_registry.json"),
|
||||
Path(__file__).resolve().parents[1] / "agent_registry.json",
|
||||
]
|
||||
|
||||
|
||||
def _district_paths() -> List[Path]:
|
||||
return [
|
||||
Path("/app/gateway-bot/district_registry.json"),
|
||||
Path(__file__).resolve().parents[1] / "district_registry.json",
|
||||
]
|
||||
|
||||
|
||||
def _load_registry() -> Dict[str, Any]:
|
||||
now = time.time()
|
||||
if _REGISTRY_CACHE.get("data") and (now - _REGISTRY_CACHE.get("loaded_at", 0.0) < REGISTRY_CACHE_TTL):
|
||||
return _REGISTRY_CACHE["data"]
|
||||
|
||||
for path in _registry_paths():
|
||||
if path.exists():
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
_REGISTRY_CACHE.update({"loaded_at": now, "data": data})
|
||||
return data
|
||||
|
||||
data = {"agents": {}}
|
||||
_REGISTRY_CACHE.update({"loaded_at": now, "data": data})
|
||||
return data
|
||||
|
||||
|
||||
def _load_district_registry() -> Dict[str, Any]:
|
||||
now = time.time()
|
||||
if _DISTRICT_CACHE.get("data") and (now - _DISTRICT_CACHE.get("loaded_at", 0.0) < REGISTRY_CACHE_TTL):
|
||||
return _DISTRICT_CACHE["data"]
|
||||
|
||||
for path in _district_paths():
|
||||
if path.exists():
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
_DISTRICT_CACHE.update({"loaded_at": now, "data": data})
|
||||
return data
|
||||
|
||||
data = {"districts": []}
|
||||
_DISTRICT_CACHE.update({"loaded_at": now, "data": data})
|
||||
return data
|
||||
|
||||
|
||||
async def _redis_client() -> Redis:
|
||||
global _REDIS
|
||||
if _REDIS is None:
|
||||
_REDIS = Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
return _REDIS
|
||||
|
||||
|
||||
async def _load_cached_dashboard() -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
r = await _redis_client()
|
||||
raw = await r.get(METRICS_DASHBOARD_KEY)
|
||||
if not raw:
|
||||
return None
|
||||
return json.loads(raw)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
async def _load_crewai_roles() -> Dict[str, int]:
|
||||
now = time.time()
|
||||
if now - _CREWAI_CACHE.get("loaded_at", 0.0) < REGISTRY_CACHE_TTL:
|
||||
return _CREWAI_CACHE.get("data", {})
|
||||
|
||||
out: Dict[str, int] = {}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=8.0) as client:
|
||||
resp = await client.get(f"{CREWAI_SERVICE_URL}/crew/agents")
|
||||
if resp.status_code == 200:
|
||||
payload = resp.json()
|
||||
for aid, info in payload.items():
|
||||
default_roles = info.get("default_roles")
|
||||
out[str(aid)] = int(default_roles) if isinstance(default_roles, int) else 0
|
||||
except Exception:
|
||||
out = {}
|
||||
|
||||
_CREWAI_CACHE.update({"loaded_at": now, "data": out})
|
||||
return out
|
||||
|
||||
|
||||
@router.get("/registry/agents")
|
||||
async def get_agents() -> Dict[str, Any]:
|
||||
reg = _load_registry()
|
||||
agents = reg.get("agents", {}) if isinstance(reg, dict) else {}
|
||||
role_counts = await _load_crewai_roles()
|
||||
|
||||
items: List[Dict[str, Any]] = []
|
||||
for agent_id, cfg in agents.items():
|
||||
if not isinstance(cfg, dict):
|
||||
continue
|
||||
domains = cfg.get("domains") or []
|
||||
district_id = cfg.get("district_id") or "city-core"
|
||||
items.append(
|
||||
{
|
||||
"agent_id": agent_id,
|
||||
"title": cfg.get("display_name") or agent_id,
|
||||
"role": cfg.get("canonical_role") or "",
|
||||
"domain_primary": domains[0] if domains else "general",
|
||||
"domain_aliases": domains[1:] if len(domains) > 1 else [],
|
||||
"visibility": cfg.get("visibility", "public"),
|
||||
"status": cfg.get("status", "active"),
|
||||
"team": {"subagents_total": role_counts.get(agent_id, 0)},
|
||||
"district_id": district_id,
|
||||
"avatar_url": cfg.get("avatar_url"),
|
||||
"health_url": cfg.get("health_url"),
|
||||
}
|
||||
)
|
||||
|
||||
return {"items": items, "total": len(items)}
|
||||
|
||||
|
||||
@router.get("/registry/districts")
|
||||
async def get_districts() -> Dict[str, Any]:
|
||||
agents_payload = await get_agents()
|
||||
agents = agents_payload.get("items", [])
|
||||
by_district: Dict[str, List[Dict[str, Any]]] = {}
|
||||
for a in agents:
|
||||
by_district.setdefault(a.get("district_id", "city-core"), []).append(a)
|
||||
|
||||
catalog = _load_district_registry().get("districts", [])
|
||||
catalog_by_id: Dict[str, Dict[str, Any]] = {
|
||||
str(d.get("district_id")): d for d in catalog if isinstance(d, dict) and d.get("district_id")
|
||||
}
|
||||
|
||||
district_ids = sorted(set(catalog_by_id.keys()) | set(by_district.keys()))
|
||||
items: List[Dict[str, Any]] = []
|
||||
|
||||
for district_id in district_ids:
|
||||
members = by_district.get(district_id, [])
|
||||
base = catalog_by_id.get(district_id, {})
|
||||
domain = base.get("domain") or ("daarion.city" if district_id == "city-core" else f"{district_id}.daarion.city")
|
||||
|
||||
lead_agent_id = base.get("lead_agent_id")
|
||||
if not lead_agent_id:
|
||||
if district_id == "city-core" and any(m.get("agent_id") == "daarwizz" for m in members):
|
||||
lead_agent_id = "daarwizz"
|
||||
elif members:
|
||||
lead_agent_id = members[0].get("agent_id")
|
||||
else:
|
||||
lead_agent_id = None
|
||||
|
||||
items.append(
|
||||
{
|
||||
"district_id": district_id,
|
||||
"title": base.get("title") or district_id.replace("-", " ").title(),
|
||||
"domain": domain,
|
||||
"status": base.get("status", "active"),
|
||||
"logo_url": base.get("logo_url"),
|
||||
"health_url": base.get("health_url"),
|
||||
"well_known": {
|
||||
"manifest": f"https://{domain}/.well-known/daarion-district.json",
|
||||
"health": f"https://{domain}/.well-known/daarion-health.json",
|
||||
"capabilities": f"https://{domain}/.well-known/daarion-capabilities.json",
|
||||
},
|
||||
"lead_agent_id": lead_agent_id,
|
||||
"agents_total": len(members),
|
||||
}
|
||||
)
|
||||
|
||||
return {"items": items, "total": len(items)}
|
||||
|
||||
|
||||
@router.get("/metrics")
|
||||
async def get_metrics() -> Dict[str, Any]:
|
||||
agents_payload = await get_agents()
|
||||
districts_payload = await get_districts()
|
||||
agents = agents_payload.get("items", [])
|
||||
|
||||
memory_vectors = 0
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
resp = await client.get(f"{MEMORY_SERVICE_URL}/health")
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
memory_vectors = int(
|
||||
data.get("vector_store", {})
|
||||
.get("memories", {})
|
||||
.get("vectors_count", 0)
|
||||
)
|
||||
except Exception:
|
||||
memory_vectors = 0
|
||||
|
||||
return {
|
||||
"nodes": 1,
|
||||
"districts": districts_payload.get("total", 0),
|
||||
"agents": len(agents),
|
||||
"subagents": sum(int((a.get("team") or {}).get("subagents_total", 0)) for a in agents),
|
||||
"memory_vectors": memory_vectors,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/metrics/dashboard")
|
||||
async def get_metrics_dashboard() -> Dict[str, Any]:
|
||||
cached = await _load_cached_dashboard()
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
metrics = await get_metrics()
|
||||
districts_payload = await get_districts()
|
||||
districts = districts_payload.get("items", [])
|
||||
|
||||
by_district = []
|
||||
for d in districts:
|
||||
by_district.append(
|
||||
{
|
||||
"district_id": d.get("district_id"),
|
||||
"title": d.get("title"),
|
||||
"domain": d.get("domain"),
|
||||
"status": d.get("status"),
|
||||
"ok": None,
|
||||
"agents_total": d.get("agents_total", 0),
|
||||
"agents_online": None,
|
||||
"latency_ms": None,
|
||||
"last_check_ts": None,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"global": {
|
||||
"nodes": metrics.get("nodes", 1),
|
||||
"districts": metrics.get("districts", 0),
|
||||
"agents": metrics.get("agents", 0),
|
||||
"subagents": metrics.get("subagents", 0),
|
||||
"memory_vectors": metrics.get("memory_vectors", 0),
|
||||
"districts_online": 0,
|
||||
"agents_online": 0,
|
||||
},
|
||||
"by_district": by_district,
|
||||
"updated_at": _now_iso(),
|
||||
"source": "fallback_registry",
|
||||
}
|
||||
100
gateway-bot/daarion_facade/reminder_worker.py
Normal file
100
gateway-bot/daarion_facade/reminder_worker.py
Normal file
@@ -0,0 +1,100 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from typing import Dict
|
||||
|
||||
import httpx
|
||||
|
||||
from .reminders import close_redis, pop_due_reminders
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
|
||||
logger = logging.getLogger("daarion-reminder-worker")
|
||||
|
||||
POLL_SECONDS = float(os.getenv("DAARION_REMINDER_POLL_SECONDS", "2"))
|
||||
TELEGRAM_TIMEOUT = float(os.getenv("DAARION_REMINDER_TELEGRAM_TIMEOUT", "20"))
|
||||
|
||||
AGENT_TOKEN_ENV: Dict[str, str] = {
|
||||
"daarwizz": "DAARWIZZ_TELEGRAM_BOT_TOKEN",
|
||||
"helion": "HELION_TELEGRAM_BOT_TOKEN",
|
||||
"greenfood": "GREENFOOD_TELEGRAM_BOT_TOKEN",
|
||||
"agromatrix": "AGROMATRIX_TELEGRAM_BOT_TOKEN",
|
||||
"alateya": "ALATEYA_TELEGRAM_BOT_TOKEN",
|
||||
"nutra": "NUTRA_TELEGRAM_BOT_TOKEN",
|
||||
"druid": "DRUID_TELEGRAM_BOT_TOKEN",
|
||||
"clan": "CLAN_TELEGRAM_BOT_TOKEN",
|
||||
"eonarch": "EONARCH_TELEGRAM_BOT_TOKEN",
|
||||
"senpai": "SENPAI_TELEGRAM_BOT_TOKEN",
|
||||
"oneok": "ONEOK_TELEGRAM_BOT_TOKEN",
|
||||
"soul": "SOUL_TELEGRAM_BOT_TOKEN",
|
||||
"yaromir": "YAROMIR_TELEGRAM_BOT_TOKEN",
|
||||
"sofiia": "SOFIIA_TELEGRAM_BOT_TOKEN",
|
||||
}
|
||||
|
||||
|
||||
def _token_for_agent(agent_id: str) -> str:
|
||||
env = AGENT_TOKEN_ENV.get((agent_id or "").lower(), "")
|
||||
return os.getenv(env, "") if env else ""
|
||||
|
||||
|
||||
async def _send_reminder(item: Dict[str, str]) -> bool:
|
||||
agent_id = str(item.get("agent_id", ""))
|
||||
chat_id = str(item.get("chat_id", ""))
|
||||
reminder_text = str(item.get("text", "")).strip()
|
||||
due_at = str(item.get("due_at", ""))
|
||||
|
||||
token = _token_for_agent(agent_id)
|
||||
if not token:
|
||||
logger.warning("reminder_skip_no_token agent=%s reminder_id=%s", agent_id, item.get("reminder_id"))
|
||||
return False
|
||||
|
||||
if not chat_id or not reminder_text:
|
||||
logger.warning("reminder_skip_invalid_payload reminder_id=%s", item.get("reminder_id"))
|
||||
return False
|
||||
|
||||
body = {
|
||||
"chat_id": chat_id,
|
||||
"text": f"⏰ Нагадування ({agent_id})\n\n{reminder_text}\n\n🕒 {due_at}",
|
||||
}
|
||||
|
||||
url = f"https://api.telegram.org/bot{token}/sendMessage"
|
||||
async with httpx.AsyncClient(timeout=TELEGRAM_TIMEOUT) as client:
|
||||
resp = await client.post(url, json=body)
|
||||
if resp.status_code != 200:
|
||||
logger.warning(
|
||||
"reminder_send_failed reminder_id=%s status=%s body=%s",
|
||||
item.get("reminder_id"),
|
||||
resp.status_code,
|
||||
resp.text[:300],
|
||||
)
|
||||
return False
|
||||
|
||||
logger.info("reminder_sent reminder_id=%s agent=%s chat=%s", item.get("reminder_id"), agent_id, chat_id)
|
||||
return True
|
||||
|
||||
|
||||
async def worker_loop() -> None:
|
||||
logger.info("reminder_worker_started poll_seconds=%s", POLL_SECONDS)
|
||||
while True:
|
||||
try:
|
||||
items = await pop_due_reminders(limit=20)
|
||||
if items:
|
||||
for item in items:
|
||||
try:
|
||||
await _send_reminder(item)
|
||||
except Exception:
|
||||
logger.exception("reminder_send_exception reminder_id=%s", item.get("reminder_id"))
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception:
|
||||
logger.exception("reminder_worker_cycle_failed")
|
||||
await asyncio.sleep(POLL_SECONDS)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(worker_loop())
|
||||
finally:
|
||||
try:
|
||||
asyncio.run(close_redis())
|
||||
except Exception:
|
||||
pass
|
||||
154
gateway-bot/daarion_facade/reminders.py
Normal file
154
gateway-bot/daarion_facade/reminders.py
Normal file
@@ -0,0 +1,154 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from redis.asyncio import Redis
|
||||
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
||||
REMINDER_PREFIX = "daarion:reminders"
|
||||
REMINDER_BY_ID = f"{REMINDER_PREFIX}:by_id"
|
||||
REMINDER_SCHEDULE = f"{REMINDER_PREFIX}:schedule"
|
||||
REMINDER_TTL_SECONDS = int(os.getenv("DAARION_REMINDER_TTL_SECONDS", str(30 * 24 * 3600)))
|
||||
|
||||
_redis: Optional[Redis] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Reminder:
|
||||
reminder_id: str
|
||||
agent_id: str
|
||||
chat_id: str
|
||||
user_id: str
|
||||
text: str
|
||||
due_ts: int
|
||||
created_at: str
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"reminder_id": self.reminder_id,
|
||||
"agent_id": self.agent_id,
|
||||
"chat_id": self.chat_id,
|
||||
"user_id": self.user_id,
|
||||
"text": self.text,
|
||||
"due_ts": self.due_ts,
|
||||
"created_at": self.created_at,
|
||||
}
|
||||
|
||||
|
||||
async def redis_client() -> Redis:
|
||||
global _redis
|
||||
if _redis is None:
|
||||
_redis = Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
return _redis
|
||||
|
||||
|
||||
async def close_redis() -> None:
|
||||
global _redis
|
||||
if _redis is not None:
|
||||
await _redis.close()
|
||||
_redis = None
|
||||
|
||||
|
||||
def _iso_now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _iso_from_ts(ts: int) -> str:
|
||||
return datetime.fromtimestamp(ts, tz=timezone.utc).isoformat()
|
||||
|
||||
|
||||
async def create_reminder(agent_id: str, chat_id: str, user_id: str, text: str, due_ts: int) -> Dict[str, Any]:
|
||||
reminder = Reminder(
|
||||
reminder_id=f"rem_{uuid.uuid4().hex[:16]}",
|
||||
agent_id=agent_id,
|
||||
chat_id=str(chat_id),
|
||||
user_id=str(user_id),
|
||||
text=text.strip(),
|
||||
due_ts=int(due_ts),
|
||||
created_at=_iso_now(),
|
||||
)
|
||||
|
||||
r = await redis_client()
|
||||
key = f"{REMINDER_BY_ID}:{reminder.reminder_id}"
|
||||
payload = json.dumps(reminder.to_dict(), ensure_ascii=False)
|
||||
|
||||
await r.set(key, payload, ex=REMINDER_TTL_SECONDS)
|
||||
await r.zadd(REMINDER_SCHEDULE, {reminder.reminder_id: float(reminder.due_ts)})
|
||||
|
||||
result = reminder.to_dict()
|
||||
result["due_at"] = _iso_from_ts(reminder.due_ts)
|
||||
return result
|
||||
|
||||
|
||||
async def list_reminders(agent_id: str, chat_id: str, user_id: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
r = await redis_client()
|
||||
now_ts = int(time.time())
|
||||
ids = await r.zrangebyscore(REMINDER_SCHEDULE, min=now_ts - 365 * 24 * 3600, max="+inf", start=0, num=max(1, limit * 5))
|
||||
|
||||
out: List[Dict[str, Any]] = []
|
||||
for reminder_id in ids:
|
||||
raw = await r.get(f"{REMINDER_BY_ID}:{reminder_id}")
|
||||
if not raw:
|
||||
continue
|
||||
try:
|
||||
item = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
if item.get("agent_id") != agent_id:
|
||||
continue
|
||||
if str(item.get("chat_id")) != str(chat_id):
|
||||
continue
|
||||
if str(item.get("user_id")) != str(user_id):
|
||||
continue
|
||||
item["due_at"] = _iso_from_ts(int(item.get("due_ts", 0)))
|
||||
out.append(item)
|
||||
if len(out) >= limit:
|
||||
break
|
||||
return out
|
||||
|
||||
|
||||
async def cancel_reminder(reminder_id: str, agent_id: str, chat_id: str, user_id: str) -> bool:
|
||||
r = await redis_client()
|
||||
key = f"{REMINDER_BY_ID}:{reminder_id}"
|
||||
raw = await r.get(key)
|
||||
if not raw:
|
||||
return False
|
||||
try:
|
||||
item = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
return False
|
||||
|
||||
if item.get("agent_id") != agent_id or str(item.get("chat_id")) != str(chat_id) or str(item.get("user_id")) != str(user_id):
|
||||
return False
|
||||
|
||||
await r.delete(key)
|
||||
await r.zrem(REMINDER_SCHEDULE, reminder_id)
|
||||
return True
|
||||
|
||||
|
||||
async def pop_due_reminders(limit: int = 20) -> List[Dict[str, Any]]:
|
||||
r = await redis_client()
|
||||
now_ts = int(time.time())
|
||||
ids = await r.zrangebyscore(REMINDER_SCHEDULE, min="-inf", max=now_ts, start=0, num=max(1, limit))
|
||||
out: List[Dict[str, Any]] = []
|
||||
|
||||
for reminder_id in ids:
|
||||
removed = await r.zrem(REMINDER_SCHEDULE, reminder_id)
|
||||
if removed == 0:
|
||||
continue
|
||||
raw = await r.get(f"{REMINDER_BY_ID}:{reminder_id}")
|
||||
if not raw:
|
||||
continue
|
||||
await r.delete(f"{REMINDER_BY_ID}:{reminder_id}")
|
||||
try:
|
||||
item = json.loads(raw)
|
||||
item["due_at"] = _iso_from_ts(int(item.get("due_ts", now_ts)))
|
||||
out.append(item)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return out
|
||||
107
gateway-bot/daarion_facade/worker.py
Normal file
107
gateway-bot/daarion_facade/worker.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import asyncio
|
||||
from datetime import datetime, timezone
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
import httpx
|
||||
|
||||
from .redis_jobs import close_redis, dequeue_job, get_job, update_job, wait_for_redis
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
|
||||
logger = logging.getLogger("daarion-gateway-worker")
|
||||
|
||||
ROUTER_BASE_URL = os.getenv("ROUTER_BASE_URL", os.getenv("ROUTER_URL", "http://router:8000"))
|
||||
ROUTER_TIMEOUT_SECONDS = float(os.getenv("ROUTER_WORKER_TIMEOUT", "60"))
|
||||
|
||||
|
||||
def _now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
async def _call_router(agent_id: str, input_payload: Dict[str, Any], metadata: Dict[str, Any]) -> Dict[str, Any]:
|
||||
body: Dict[str, Any] = {
|
||||
"prompt": input_payload.get("prompt", ""),
|
||||
"metadata": metadata or {},
|
||||
}
|
||||
images = input_payload.get("images") or []
|
||||
if images:
|
||||
body["images"] = images
|
||||
|
||||
url = f"{ROUTER_BASE_URL}/v1/agents/{agent_id}/infer"
|
||||
async with httpx.AsyncClient(timeout=ROUTER_TIMEOUT_SECONDS) as client:
|
||||
resp = await client.post(url, json=body)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
return {
|
||||
"response": data.get("response", ""),
|
||||
"model": data.get("model"),
|
||||
"backend": data.get("backend"),
|
||||
"tokens_used": data.get("tokens_used"),
|
||||
}
|
||||
|
||||
|
||||
async def run_once(job_id: str) -> None:
|
||||
job = await get_job(job_id)
|
||||
if not job:
|
||||
logger.warning("job_missing: %s", job_id)
|
||||
return
|
||||
|
||||
await update_job(job_id, {"status": "running", "started_at": _now(), "updated_at": _now()})
|
||||
|
||||
agent_id = job.get("agent_id")
|
||||
input_payload = job.get("input") or {}
|
||||
metadata = job.get("metadata") or {}
|
||||
|
||||
try:
|
||||
result = await _call_router(agent_id, input_payload, metadata)
|
||||
await update_job(
|
||||
job_id,
|
||||
{
|
||||
"status": "done",
|
||||
"result": result,
|
||||
"error": None,
|
||||
"finished_at": _now(),
|
||||
"updated_at": _now(),
|
||||
},
|
||||
)
|
||||
logger.info("job_done: %s agent=%s", job_id, agent_id)
|
||||
except Exception as e:
|
||||
await update_job(
|
||||
job_id,
|
||||
{
|
||||
"status": "failed",
|
||||
"error": {"type": e.__class__.__name__, "message": str(e)},
|
||||
"finished_at": _now(),
|
||||
"updated_at": _now(),
|
||||
},
|
||||
)
|
||||
logger.exception("job_failed: %s agent=%s", job_id, agent_id)
|
||||
|
||||
|
||||
async def worker_loop() -> None:
|
||||
await wait_for_redis(60)
|
||||
logger.info("worker_started router=%s", ROUTER_BASE_URL)
|
||||
|
||||
while True:
|
||||
try:
|
||||
job_id = await dequeue_job(block_seconds=10)
|
||||
if not job_id:
|
||||
continue
|
||||
await run_once(job_id)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception:
|
||||
logger.exception("worker_loop_error")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(worker_loop())
|
||||
finally:
|
||||
try:
|
||||
asyncio.run(close_redis())
|
||||
except Exception:
|
||||
pass
|
||||
92
gateway-bot/district_registry.json
Normal file
92
gateway-bot/district_registry.json
Normal file
@@ -0,0 +1,92 @@
|
||||
{
|
||||
"districts": [
|
||||
{
|
||||
"district_id": "city-core",
|
||||
"title": "City Core - DAARION.city",
|
||||
"domain": "daarion.city",
|
||||
"status": "active",
|
||||
"logo_url": null,
|
||||
"lead_agent_id": "daarwizz"
|
||||
},
|
||||
{
|
||||
"district_id": "helion",
|
||||
"title": "Helion District",
|
||||
"domain": "helion.daarion.city",
|
||||
"status": "active",
|
||||
"logo_url": null,
|
||||
"lead_agent_id": "helion"
|
||||
},
|
||||
{
|
||||
"district_id": "alateya",
|
||||
"title": "Alateya District",
|
||||
"domain": "alateya.daarion.city",
|
||||
"status": "active",
|
||||
"logo_url": null,
|
||||
"lead_agent_id": "alateya"
|
||||
},
|
||||
{
|
||||
"district_id": "druid",
|
||||
"title": "Druid District",
|
||||
"domain": "druid.daarion.city",
|
||||
"status": "active",
|
||||
"logo_url": null,
|
||||
"lead_agent_id": "druid"
|
||||
},
|
||||
{
|
||||
"district_id": "nutra",
|
||||
"title": "Nutra District",
|
||||
"domain": "nutra.daarion.city",
|
||||
"status": "active",
|
||||
"logo_url": null,
|
||||
"lead_agent_id": "nutra"
|
||||
},
|
||||
{
|
||||
"district_id": "agromatrix",
|
||||
"title": "AgroMatrix District",
|
||||
"domain": "agromatrix.daarion.city",
|
||||
"status": "active",
|
||||
"logo_url": null,
|
||||
"lead_agent_id": "agromatrix"
|
||||
},
|
||||
{
|
||||
"district_id": "greenfood",
|
||||
"title": "GreenFood District",
|
||||
"domain": "greenfood.daarion.city",
|
||||
"status": "active",
|
||||
"logo_url": null,
|
||||
"lead_agent_id": "greenfood"
|
||||
},
|
||||
{
|
||||
"district_id": "clan",
|
||||
"title": "Clan District",
|
||||
"domain": "clan.daarion.city",
|
||||
"status": "active",
|
||||
"logo_url": null,
|
||||
"lead_agent_id": "clan"
|
||||
},
|
||||
{
|
||||
"district_id": "eonarch",
|
||||
"title": "Eonarch District",
|
||||
"domain": "eonarch.daarion.city",
|
||||
"status": "active",
|
||||
"logo_url": null,
|
||||
"lead_agent_id": "eonarch"
|
||||
},
|
||||
{
|
||||
"district_id": "soul",
|
||||
"title": "Soul District",
|
||||
"domain": "soul.daarion.city",
|
||||
"status": "active",
|
||||
"logo_url": null,
|
||||
"lead_agent_id": "soul"
|
||||
},
|
||||
{
|
||||
"district_id": "senpai",
|
||||
"title": "Senpai District",
|
||||
"domain": "senpai.daarion.city",
|
||||
"status": "active",
|
||||
"logo_url": null,
|
||||
"lead_agent_id": "senpai"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1871,23 +1871,53 @@ async def process_document(
|
||||
Dict з результатом обробки
|
||||
"""
|
||||
mime_type = document.get("mime_type", "")
|
||||
mime_type_l = (mime_type or "").lower()
|
||||
file_name = document.get("file_name", "")
|
||||
file_id = document.get("file_id")
|
||||
|
||||
file_name_lower = file_name.lower()
|
||||
allowed_exts = {".pdf", ".docx", ".txt", ".md", ".csv", ".xlsx", ".zip"}
|
||||
allowed_exts = {
|
||||
".pdf", ".doc", ".docx", ".rtf", ".odt",
|
||||
".txt", ".md", ".markdown",
|
||||
".csv", ".tsv", ".xls", ".xlsx", ".xlsm", ".ods",
|
||||
".ppt", ".pptx", ".odp",
|
||||
".json", ".yaml", ".yml", ".xml", ".html", ".htm",
|
||||
".zip",
|
||||
".jpg", ".jpeg", ".png", ".webp", ".gif", ".bmp", ".tiff",
|
||||
}
|
||||
is_allowed = any(file_name_lower.endswith(ext) for ext in allowed_exts)
|
||||
if mime_type == "application/pdf":
|
||||
if mime_type_l == "application/pdf":
|
||||
is_allowed = True
|
||||
if mime_type in {
|
||||
if mime_type_l in {
|
||||
"application/msword",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"application/rtf",
|
||||
"text/rtf",
|
||||
"application/vnd.oasis.opendocument.text",
|
||||
"application/vnd.ms-excel",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"application/vnd.ms-excel.sheet.macroenabled.12",
|
||||
"application/vnd.oasis.opendocument.spreadsheet",
|
||||
"application/vnd.ms-powerpoint",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"application/vnd.oasis.opendocument.presentation",
|
||||
"text/plain",
|
||||
"text/markdown",
|
||||
"text/csv",
|
||||
"text/tab-separated-values",
|
||||
"application/json",
|
||||
"application/yaml",
|
||||
"application/x-yaml",
|
||||
"text/yaml",
|
||||
"application/xml",
|
||||
"text/xml",
|
||||
"text/html",
|
||||
"application/zip",
|
||||
"application/x-zip-compressed",
|
||||
}:
|
||||
is_allowed = True
|
||||
if mime_type_l.startswith("image/"):
|
||||
is_allowed = True
|
||||
|
||||
if is_allowed and file_id:
|
||||
logger.info(f"{agent_config.name}: Document from {username} (tg:{user_id}), file_id: {file_id}, file_name: {file_name}")
|
||||
@@ -2027,7 +2057,7 @@ async def process_document(
|
||||
telegram_token = agent_config.get_telegram_token()
|
||||
await send_telegram_message(
|
||||
chat_id,
|
||||
"Наразі підтримуються формати: PDF, DOCX, TXT, MD, CSV, XLSX, ZIP.",
|
||||
"Підтримуються формати: PDF/DOC/DOCX/RTF/ODT, TXT/MD/CSV/TSV, XLS/XLSX/XLSM/ODS, PPT/PPTX/ODP, JSON/YAML/XML/HTML, ZIP, зображення.",
|
||||
telegram_token,
|
||||
)
|
||||
return {"ok": False, "error": "Unsupported document type"}
|
||||
@@ -3681,7 +3711,8 @@ async def _old_telegram_webhook(update: TelegramUpdate):
|
||||
doc_url=file_url,
|
||||
file_name=file_name,
|
||||
dao_id=dao_id,
|
||||
user_id=f"tg:{user_id}"
|
||||
user_id=f"tg:{user_id}",
|
||||
agent_id=agent_config.agent_id,
|
||||
)
|
||||
|
||||
if result.success:
|
||||
@@ -3705,7 +3736,8 @@ async def _old_telegram_webhook(update: TelegramUpdate):
|
||||
result = await ingest_document(
|
||||
session_id=session_id,
|
||||
dao_id=dao_id,
|
||||
user_id=f"tg:{user_id}"
|
||||
user_id=f"tg:{user_id}",
|
||||
agent_id=agent_config.agent_id,
|
||||
)
|
||||
|
||||
if result.success:
|
||||
|
||||
@@ -6,20 +6,32 @@ Endpoints:
|
||||
- POST /api/doc/parse - Parse a document
|
||||
- POST /api/doc/ingest - Ingest document to RAG
|
||||
- POST /api/doc/ask - Ask question about document
|
||||
- POST /api/doc/update - Update existing document text (versioned)
|
||||
- POST /api/doc/publish - Publish physical file version via artifact registry
|
||||
- GET /api/doc/versions/{doc_id} - List document versions
|
||||
- GET /api/doc/artifacts/{artifact_id}/versions/{version_id}/download - Download via gateway proxy
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Optional, Dict, Any
|
||||
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
|
||||
from fastapi.responses import Response
|
||||
from pydantic import BaseModel
|
||||
import httpx
|
||||
|
||||
from services.doc_service import (
|
||||
doc_service,
|
||||
parse_document,
|
||||
ingest_document,
|
||||
ask_about_document,
|
||||
update_document,
|
||||
list_document_versions,
|
||||
publish_document_artifact,
|
||||
get_doc_context,
|
||||
ParsedResult,
|
||||
IngestResult,
|
||||
UpdateResult,
|
||||
QAResult,
|
||||
DocContext
|
||||
)
|
||||
@@ -27,6 +39,8 @@ from services.doc_service import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
ARTIFACT_REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/")
|
||||
DOC_DOWNLOAD_TIMEOUT_SECONDS = float(os.getenv("DOC_DOWNLOAD_TIMEOUT_SECONDS", "60"))
|
||||
|
||||
|
||||
# ========================================
|
||||
@@ -52,6 +66,7 @@ class IngestDocumentRequest(BaseModel):
|
||||
file_name: Optional[str] = None
|
||||
dao_id: Optional[str] = None
|
||||
user_id: Optional[str] = None
|
||||
agent_id: str = "daarwizz"
|
||||
|
||||
|
||||
class AskDocumentRequest(BaseModel):
|
||||
@@ -61,6 +76,40 @@ class AskDocumentRequest(BaseModel):
|
||||
doc_id: Optional[str] = None
|
||||
dao_id: Optional[str] = None
|
||||
user_id: Optional[str] = None
|
||||
agent_id: str = "daarwizz"
|
||||
|
||||
|
||||
class UpdateDocumentRequest(BaseModel):
|
||||
"""Request to update existing document content."""
|
||||
session_id: str
|
||||
doc_id: Optional[str] = None
|
||||
doc_url: Optional[str] = None
|
||||
file_name: Optional[str] = None
|
||||
text: Optional[str] = None
|
||||
dao_id: Optional[str] = None
|
||||
user_id: Optional[str] = None
|
||||
agent_id: str = "daarwizz"
|
||||
storage_ref: Optional[str] = None
|
||||
publish_artifact: bool = False
|
||||
artifact_id: Optional[str] = None
|
||||
target_format: Optional[str] = None
|
||||
artifact_label: Optional[str] = None
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class PublishDocumentRequest(BaseModel):
|
||||
"""Request to publish document as physical artifact version."""
|
||||
session_id: str
|
||||
doc_id: Optional[str] = None
|
||||
doc_url: Optional[str] = None
|
||||
file_name: Optional[str] = None
|
||||
text: Optional[str] = None
|
||||
dao_id: Optional[str] = None
|
||||
user_id: Optional[str] = None
|
||||
artifact_id: Optional[str] = None
|
||||
target_format: Optional[str] = None
|
||||
artifact_label: Optional[str] = None
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
# ========================================
|
||||
@@ -167,7 +216,8 @@ async def ingest_document_endpoint(request: IngestDocumentRequest):
|
||||
doc_url=request.doc_url,
|
||||
file_name=request.file_name,
|
||||
dao_id=request.dao_id,
|
||||
user_id=request.user_id
|
||||
user_id=request.user_id,
|
||||
agent_id=request.agent_id,
|
||||
)
|
||||
|
||||
if not result.success:
|
||||
@@ -209,7 +259,8 @@ async def ask_about_document_endpoint(request: AskDocumentRequest):
|
||||
question=request.question,
|
||||
doc_id=doc_id,
|
||||
dao_id=request.dao_id,
|
||||
user_id=request.user_id
|
||||
user_id=request.user_id,
|
||||
agent_id=request.agent_id,
|
||||
)
|
||||
|
||||
if not result.success:
|
||||
@@ -227,6 +278,107 @@ async def ask_about_document_endpoint(request: AskDocumentRequest):
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/api/doc/update")
|
||||
async def update_document_endpoint(request: UpdateDocumentRequest):
|
||||
"""
|
||||
Update a document and bump its version.
|
||||
If text is omitted and doc_url exists, text is re-parsed from the source document.
|
||||
"""
|
||||
try:
|
||||
result = await update_document(
|
||||
session_id=request.session_id,
|
||||
doc_id=request.doc_id,
|
||||
doc_url=request.doc_url,
|
||||
file_name=request.file_name,
|
||||
text=request.text,
|
||||
dao_id=request.dao_id,
|
||||
user_id=request.user_id,
|
||||
agent_id=request.agent_id,
|
||||
storage_ref=request.storage_ref,
|
||||
publish_artifact=request.publish_artifact,
|
||||
artifact_id=request.artifact_id,
|
||||
target_format=request.target_format,
|
||||
artifact_label=request.artifact_label,
|
||||
metadata=request.metadata,
|
||||
)
|
||||
if not result.success:
|
||||
raise HTTPException(status_code=400, detail=result.error)
|
||||
response = {
|
||||
"ok": True,
|
||||
"doc_id": result.doc_id,
|
||||
"version_no": result.version_no,
|
||||
"version_id": result.version_id,
|
||||
"updated_chunks": result.updated_chunks,
|
||||
"status": result.status,
|
||||
"publish_error": result.publish_error,
|
||||
"artifact_id": result.artifact_id,
|
||||
"artifact_version_id": result.artifact_version_id,
|
||||
"artifact_storage_key": result.artifact_storage_key,
|
||||
"artifact_mime": result.artifact_mime,
|
||||
"artifact_download_url": result.artifact_download_url,
|
||||
}
|
||||
return response
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Update document error: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/api/doc/publish")
|
||||
async def publish_document_endpoint(request: PublishDocumentRequest):
|
||||
"""
|
||||
Publish current document text as physical file artifact version.
|
||||
"""
|
||||
try:
|
||||
result = await publish_document_artifact(
|
||||
session_id=request.session_id,
|
||||
doc_id=request.doc_id,
|
||||
doc_url=request.doc_url,
|
||||
file_name=request.file_name,
|
||||
text=request.text,
|
||||
dao_id=request.dao_id,
|
||||
user_id=request.user_id,
|
||||
artifact_id=request.artifact_id,
|
||||
target_format=request.target_format,
|
||||
artifact_label=request.artifact_label,
|
||||
metadata=request.metadata,
|
||||
)
|
||||
if not result.success:
|
||||
raise HTTPException(status_code=400, detail=result.error)
|
||||
return {
|
||||
"ok": True,
|
||||
"artifact_id": result.artifact_id,
|
||||
"version_id": result.version_id,
|
||||
"storage_key": result.storage_key,
|
||||
"mime": result.mime,
|
||||
"file_name": result.file_name,
|
||||
"download_url": result.download_url,
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Publish document error: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/api/doc/versions/{doc_id}")
|
||||
async def list_document_versions_endpoint(doc_id: str, agent_id: str = "daarwizz", limit: int = 20):
|
||||
"""
|
||||
List document versions for agent/doc pair.
|
||||
"""
|
||||
try:
|
||||
data = await list_document_versions(agent_id=agent_id, doc_id=doc_id, limit=limit)
|
||||
if not data.get("ok"):
|
||||
raise HTTPException(status_code=400, detail=data.get("error", "Failed to load versions"))
|
||||
return data
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"List document versions error: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/api/doc/context/{session_id}")
|
||||
async def get_document_context(session_id: str):
|
||||
"""
|
||||
@@ -258,3 +410,56 @@ async def get_document_context(session_id: str):
|
||||
logger.error(f"Get document context error: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/api/doc/artifacts/{artifact_id}/versions/{version_id}/download")
|
||||
async def download_artifact_version_via_gateway(
|
||||
artifact_id: str,
|
||||
version_id: str,
|
||||
filename: Optional[str] = None,
|
||||
inline: bool = False,
|
||||
):
|
||||
"""
|
||||
Proxy download for artifact version to avoid exposing internal MinIO host to browser clients.
|
||||
"""
|
||||
aid = (artifact_id or "").strip()
|
||||
vid = (version_id or "").strip()
|
||||
if not aid or not vid:
|
||||
raise HTTPException(status_code=400, detail="artifact_id and version_id are required")
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=DOC_DOWNLOAD_TIMEOUT_SECONDS) as client:
|
||||
meta_resp = await client.get(
|
||||
f"{ARTIFACT_REGISTRY_URL}/artifacts/{aid}/versions/{vid}/download"
|
||||
)
|
||||
if meta_resp.status_code >= 400:
|
||||
detail = ""
|
||||
try:
|
||||
detail = meta_resp.json().get("detail") # type: ignore[assignment]
|
||||
except Exception:
|
||||
detail = meta_resp.text[:200]
|
||||
raise HTTPException(status_code=meta_resp.status_code, detail=detail or "Version download info failed")
|
||||
meta = meta_resp.json()
|
||||
signed_url = (meta.get("url") or "").strip()
|
||||
if not signed_url:
|
||||
raise HTTPException(status_code=502, detail="artifact-registry returned empty download URL")
|
||||
|
||||
file_resp = await client.get(signed_url)
|
||||
if file_resp.status_code >= 400:
|
||||
raise HTTPException(status_code=502, detail=f"Artifact storage download failed: {file_resp.status_code}")
|
||||
|
||||
mime = (meta.get("mime") or file_resp.headers.get("content-type") or "application/octet-stream").strip()
|
||||
storage_key = str(meta.get("storage_key") or "")
|
||||
inferred_name = storage_key.rsplit("/", 1)[-1] if "/" in storage_key else storage_key
|
||||
out_name = (filename or inferred_name or f"{aid}_{vid}.bin").strip()
|
||||
out_name = re.sub(r"[^A-Za-z0-9._-]+", "_", out_name).strip("._") or f"{aid}_{vid}.bin"
|
||||
disposition = "inline" if inline else "attachment"
|
||||
headers = {
|
||||
"Content-Disposition": f'{disposition}; filename="{out_name}"',
|
||||
"Cache-Control": "private, max-age=60",
|
||||
}
|
||||
return Response(content=file_resp.content, media_type=mime, headers=headers)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Artifact version proxy download failed: aid={aid}, vid={vid}, err={e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail="Artifact proxy download failed")
|
||||
|
||||
@@ -143,6 +143,10 @@ class MemoryClient:
|
||||
"body_text": e.get("content", ""),
|
||||
"kind": e.get("kind", "message"),
|
||||
"type": "user" if e.get("role") == "user" else "agent",
|
||||
"role": e.get("role", "unknown"),
|
||||
"timestamp": e.get("timestamp"),
|
||||
"user_id": e.get("user_id"),
|
||||
"sender_name": e.get("sender_name"),
|
||||
}
|
||||
for e in events
|
||||
if e.get("content")
|
||||
@@ -445,4 +449,3 @@ class MemoryClient:
|
||||
|
||||
# Глобальний екземпляр клієнта
|
||||
memory_client = MemoryClient()
|
||||
|
||||
|
||||
@@ -11,18 +11,23 @@ This service can be used by:
|
||||
import os
|
||||
import logging
|
||||
import hashlib
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
from typing import Optional, Dict, Any, List
|
||||
from pydantic import BaseModel
|
||||
from datetime import datetime
|
||||
from io import BytesIO
|
||||
|
||||
from router_client import send_to_router
|
||||
from memory_client import memory_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SHARED_EXCEL_POLICY_AGENTS = {"agromatrix", "helion", "nutra", "greenfood"}
|
||||
ROUTER_URL = os.getenv("ROUTER_URL", "http://router:8000")
|
||||
ARTIFACT_REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/")
|
||||
DOC_WRITEBACK_CREATED_BY = os.getenv("DOC_WRITEBACK_CREATED_BY", "gateway-doc-service")
|
||||
GATEWAY_PUBLIC_BASE_URL = os.getenv("GATEWAY_PUBLIC_BASE_URL", "").rstrip("/")
|
||||
|
||||
|
||||
class QAItem(BaseModel):
|
||||
@@ -51,6 +56,35 @@ class IngestResult(BaseModel):
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class UpdateResult(BaseModel):
|
||||
"""Result of document update with version bump."""
|
||||
success: bool
|
||||
doc_id: Optional[str] = None
|
||||
version_no: Optional[int] = None
|
||||
version_id: Optional[int] = None
|
||||
updated_chunks: int = 0
|
||||
status: str = "unknown"
|
||||
publish_error: Optional[str] = None
|
||||
artifact_id: Optional[str] = None
|
||||
artifact_version_id: Optional[str] = None
|
||||
artifact_storage_key: Optional[str] = None
|
||||
artifact_mime: Optional[str] = None
|
||||
artifact_download_url: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class PublishResult(BaseModel):
|
||||
"""Result of artifact write-back publish."""
|
||||
success: bool
|
||||
artifact_id: Optional[str] = None
|
||||
version_id: Optional[str] = None
|
||||
storage_key: Optional[str] = None
|
||||
mime: Optional[str] = None
|
||||
file_name: Optional[str] = None
|
||||
download_url: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class QAResult(BaseModel):
|
||||
"""Result of RAG query about a document"""
|
||||
success: bool
|
||||
@@ -84,6 +118,266 @@ class DocumentService:
|
||||
"""Initialize document service"""
|
||||
self.memory_client = memory_client
|
||||
|
||||
async def _router_post_json(
|
||||
self,
|
||||
path: str,
|
||||
payload: Dict[str, Any],
|
||||
timeout: float = 45.0,
|
||||
) -> Dict[str, Any]:
|
||||
import httpx
|
||||
|
||||
base = ROUTER_URL.rstrip("/")
|
||||
url = f"{base}{path}"
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
resp = await client.post(url, json=payload)
|
||||
body = {}
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception:
|
||||
body = {"ok": False, "error": f"Invalid JSON from router ({resp.status_code})"}
|
||||
if resp.status_code >= 400:
|
||||
err = body.get("detail") or body.get("error") or f"HTTP {resp.status_code}"
|
||||
raise RuntimeError(f"Router error on {path}: {err}")
|
||||
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid router response type"}
|
||||
|
||||
async def _router_get_json(
|
||||
self,
|
||||
path: str,
|
||||
timeout: float = 30.0,
|
||||
) -> Dict[str, Any]:
|
||||
import httpx
|
||||
|
||||
base = ROUTER_URL.rstrip("/")
|
||||
url = f"{base}{path}"
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
resp = await client.get(url)
|
||||
body = {}
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception:
|
||||
body = {"ok": False, "error": f"Invalid JSON from router ({resp.status_code})"}
|
||||
if resp.status_code >= 400:
|
||||
err = body.get("detail") or body.get("error") or f"HTTP {resp.status_code}"
|
||||
raise RuntimeError(f"Router error on {path}: {err}")
|
||||
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid router response type"}
|
||||
|
||||
async def _artifact_post_json(
|
||||
self,
|
||||
path: str,
|
||||
payload: Dict[str, Any],
|
||||
timeout: float = 45.0,
|
||||
) -> Dict[str, Any]:
|
||||
import httpx
|
||||
|
||||
base = ARTIFACT_REGISTRY_URL.rstrip("/")
|
||||
url = f"{base}{path}"
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
resp = await client.post(url, json=payload)
|
||||
body = {}
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception:
|
||||
body = {"ok": False, "error": f"Invalid JSON from artifact-registry ({resp.status_code})"}
|
||||
if resp.status_code >= 400:
|
||||
err = body.get("detail") or body.get("error") or f"HTTP {resp.status_code}"
|
||||
raise RuntimeError(f"Artifact registry error on {path}: {err}")
|
||||
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid artifact response type"}
|
||||
|
||||
async def _artifact_get_json(
|
||||
self,
|
||||
path: str,
|
||||
timeout: float = 30.0,
|
||||
) -> Dict[str, Any]:
|
||||
import httpx
|
||||
|
||||
base = ARTIFACT_REGISTRY_URL.rstrip("/")
|
||||
url = f"{base}{path}"
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
resp = await client.get(url)
|
||||
body = {}
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception:
|
||||
body = {"ok": False, "error": f"Invalid JSON from artifact-registry ({resp.status_code})"}
|
||||
if resp.status_code >= 400:
|
||||
err = body.get("detail") or body.get("error") or f"HTTP {resp.status_code}"
|
||||
raise RuntimeError(f"Artifact registry error on {path}: {err}")
|
||||
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid artifact response type"}
|
||||
|
||||
def _resolve_format(self, file_name: Optional[str], target_format: Optional[str]) -> str:
|
||||
fmt = (target_format or "").strip().lower().lstrip(".")
|
||||
if fmt:
|
||||
return fmt
|
||||
if file_name and "." in file_name:
|
||||
return file_name.rsplit(".", 1)[1].strip().lower()
|
||||
return "txt"
|
||||
|
||||
def _compose_output_name(self, file_name: Optional[str], doc_id: str, fmt: str) -> str:
|
||||
base = "document"
|
||||
if file_name:
|
||||
base = file_name.rsplit("/", 1)[-1].rsplit("\\", 1)[-1]
|
||||
if "." in base:
|
||||
base = base.rsplit(".", 1)[0]
|
||||
elif doc_id:
|
||||
base = doc_id
|
||||
safe_base = re.sub(r"[^A-Za-z0-9._-]+", "_", base).strip("._") or "document"
|
||||
return f"{safe_base}.{fmt}"
|
||||
|
||||
def _gateway_artifact_download_path(self, artifact_id: str, version_id: str) -> str:
|
||||
aid = (artifact_id or "").strip()
|
||||
vid = (version_id or "").strip()
|
||||
return f"/api/doc/artifacts/{aid}/versions/{vid}/download"
|
||||
|
||||
def _gateway_artifact_download_url(self, artifact_id: str, version_id: str) -> str:
|
||||
path = self._gateway_artifact_download_path(artifact_id, version_id)
|
||||
if GATEWAY_PUBLIC_BASE_URL:
|
||||
return f"{GATEWAY_PUBLIC_BASE_URL}{path}"
|
||||
return path
|
||||
|
||||
def _render_document_bytes(
|
||||
self,
|
||||
text: str,
|
||||
file_name: Optional[str],
|
||||
doc_id: str,
|
||||
target_format: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
body = (text or "").strip()
|
||||
if not body:
|
||||
raise ValueError("Cannot render empty document text")
|
||||
|
||||
fmt = self._resolve_format(file_name=file_name, target_format=target_format)
|
||||
output_name = self._compose_output_name(file_name=file_name, doc_id=doc_id, fmt=fmt)
|
||||
|
||||
if fmt in {"txt"}:
|
||||
payload = body.encode("utf-8")
|
||||
return {"bytes": payload, "mime": "text/plain; charset=utf-8", "file_name": output_name}
|
||||
if fmt in {"md", "markdown"}:
|
||||
payload = body.encode("utf-8")
|
||||
return {"bytes": payload, "mime": "text/markdown; charset=utf-8", "file_name": output_name}
|
||||
if fmt in {"json"}:
|
||||
parsed: Any
|
||||
try:
|
||||
parsed = json.loads(body)
|
||||
except Exception:
|
||||
parsed = {"text": body}
|
||||
payload = json.dumps(parsed, ensure_ascii=False, indent=2).encode("utf-8")
|
||||
return {"bytes": payload, "mime": "application/json", "file_name": output_name}
|
||||
if fmt in {"csv"}:
|
||||
payload = body.encode("utf-8")
|
||||
return {"bytes": payload, "mime": "text/csv; charset=utf-8", "file_name": output_name}
|
||||
if fmt in {"xlsx", "xlsm", "xls"}:
|
||||
try:
|
||||
from openpyxl import Workbook
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"openpyxl is required for {fmt} rendering: {e}")
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "Document"
|
||||
lines = [ln for ln in body.splitlines()] or [body]
|
||||
for idx, line in enumerate(lines, start=1):
|
||||
ws.cell(row=idx, column=1, value=line)
|
||||
buf = BytesIO()
|
||||
wb.save(buf)
|
||||
mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
return {"bytes": buf.getvalue(), "mime": mime, "file_name": self._compose_output_name(file_name, doc_id, "xlsx")}
|
||||
if fmt in {"docx"}:
|
||||
try:
|
||||
from docx import Document
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"python-docx is required for docx rendering: {e}")
|
||||
doc = Document()
|
||||
for line in body.splitlines():
|
||||
doc.add_paragraph(line if line else " ")
|
||||
buf = BytesIO()
|
||||
doc.save(buf)
|
||||
mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
return {"bytes": buf.getvalue(), "mime": mime, "file_name": self._compose_output_name(file_name, doc_id, "docx")}
|
||||
|
||||
payload = body.encode("utf-8")
|
||||
fallback_name = self._compose_output_name(file_name=file_name, doc_id=doc_id, fmt="txt")
|
||||
return {"bytes": payload, "mime": "text/plain; charset=utf-8", "file_name": fallback_name}
|
||||
|
||||
async def _publish_text_artifact(
|
||||
self,
|
||||
text: str,
|
||||
doc_id: str,
|
||||
file_name: Optional[str] = None,
|
||||
dao_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
artifact_id: Optional[str] = None,
|
||||
target_format: Optional[str] = None,
|
||||
label: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> PublishResult:
|
||||
try:
|
||||
rendered = self._render_document_bytes(
|
||||
text=text,
|
||||
file_name=file_name,
|
||||
doc_id=doc_id,
|
||||
target_format=target_format,
|
||||
)
|
||||
content_bytes = rendered["bytes"]
|
||||
content_b64 = base64.b64encode(content_bytes).decode("ascii")
|
||||
|
||||
effective_artifact_id = (artifact_id or "").strip()
|
||||
if not effective_artifact_id:
|
||||
create_resp = await self._artifact_post_json(
|
||||
"/artifacts",
|
||||
{
|
||||
"type": "doc",
|
||||
"title": file_name or doc_id,
|
||||
"project_id": dao_id,
|
||||
"acl_ref": dao_id,
|
||||
"created_by": user_id or DOC_WRITEBACK_CREATED_BY,
|
||||
},
|
||||
timeout=30.0,
|
||||
)
|
||||
effective_artifact_id = str(create_resp.get("artifact_id") or "").strip()
|
||||
if not effective_artifact_id:
|
||||
return PublishResult(success=False, error="Artifact create failed: empty artifact_id")
|
||||
|
||||
meta = {"doc_id": doc_id, "source": "doc_update_publish"}
|
||||
if isinstance(metadata, dict):
|
||||
meta.update(metadata)
|
||||
|
||||
version_resp = await self._artifact_post_json(
|
||||
f"/artifacts/{effective_artifact_id}/versions/from_base64",
|
||||
{
|
||||
"content_base64": content_b64,
|
||||
"mime": rendered["mime"],
|
||||
"filename": rendered["file_name"],
|
||||
"label": label or "edited",
|
||||
"meta_json": meta,
|
||||
},
|
||||
timeout=45.0,
|
||||
)
|
||||
version_id = str(version_resp.get("version_id") or "").strip()
|
||||
storage_key = version_resp.get("storage_key")
|
||||
if not version_id:
|
||||
return PublishResult(
|
||||
success=False,
|
||||
artifact_id=effective_artifact_id,
|
||||
error="Artifact version create failed: empty version_id",
|
||||
)
|
||||
|
||||
download_url = self._gateway_artifact_download_url(
|
||||
artifact_id=effective_artifact_id,
|
||||
version_id=version_id,
|
||||
)
|
||||
|
||||
return PublishResult(
|
||||
success=True,
|
||||
artifact_id=effective_artifact_id,
|
||||
version_id=version_id,
|
||||
storage_key=storage_key,
|
||||
mime=rendered["mime"],
|
||||
file_name=rendered["file_name"],
|
||||
download_url=download_url,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"publish_text_artifact failed: {e}", exc_info=True)
|
||||
return PublishResult(success=False, error=str(e))
|
||||
|
||||
def _is_excel_filename(self, file_name: Optional[str]) -> bool:
|
||||
if not file_name:
|
||||
return False
|
||||
@@ -462,7 +756,8 @@ class DocumentService:
|
||||
doc_url: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
dao_id: str = None,
|
||||
user_id: str = None
|
||||
user_id: str = None,
|
||||
agent_id: str = "daarwizz",
|
||||
) -> IngestResult:
|
||||
"""
|
||||
Ingest document chunks into RAG/Memory.
|
||||
@@ -488,64 +783,60 @@ class DocumentService:
|
||||
file_name = file_name or doc_context.file_name
|
||||
dao_id = dao_id or doc_context.dao_id
|
||||
|
||||
if not doc_id and not doc_url:
|
||||
if not doc_url:
|
||||
return IngestResult(
|
||||
success=False,
|
||||
error="No document ID or URL provided"
|
||||
error="No document URL available for ingest"
|
||||
)
|
||||
|
||||
# Build request to Router with ingest flag
|
||||
router_request = {
|
||||
"mode": "doc_parse",
|
||||
"agent": "parser",
|
||||
|
||||
parsed = await self.parse_document(
|
||||
session_id=session_id,
|
||||
doc_url=doc_url,
|
||||
file_name=file_name or "document",
|
||||
dao_id=dao_id or "",
|
||||
user_id=user_id or "",
|
||||
output_mode="markdown",
|
||||
metadata={"source": self._extract_source(session_id), "mode": "ingest"},
|
||||
)
|
||||
if not parsed.success:
|
||||
return IngestResult(success=False, error=parsed.error or "Document parse failed")
|
||||
|
||||
effective_doc_id = doc_id or parsed.doc_id
|
||||
if not effective_doc_id:
|
||||
effective_doc_id = hashlib.md5(f"{session_id}:{file_name}:{datetime.utcnow().isoformat()}".encode()).hexdigest()[:12]
|
||||
|
||||
doc_text = (parsed.markdown or "").strip()
|
||||
if not doc_text:
|
||||
return IngestResult(success=False, error="No extractable text for ingestion")
|
||||
|
||||
payload = {
|
||||
"agent_id": (agent_id or "daarwizz").lower(),
|
||||
"doc_id": effective_doc_id,
|
||||
"file_name": file_name or "document",
|
||||
"text": doc_text,
|
||||
"dao_id": dao_id,
|
||||
"user_id": user_id,
|
||||
"metadata": {
|
||||
"source": self._extract_source(session_id),
|
||||
"dao_id": dao_id,
|
||||
"user_id": user_id,
|
||||
"session_id": session_id,
|
||||
},
|
||||
"payload": {
|
||||
"output_mode": "chunks", # Use chunks for RAG ingestion
|
||||
"dao_id": dao_id,
|
||||
"user_id": user_id,
|
||||
"ingest": True, # Flag for ingestion
|
||||
"source": self._extract_source(session_id),
|
||||
},
|
||||
}
|
||||
|
||||
if doc_url:
|
||||
router_request["payload"]["doc_url"] = doc_url
|
||||
router_request["payload"]["file_name"] = file_name or "document.pdf"
|
||||
|
||||
if doc_id:
|
||||
router_request["payload"]["doc_id"] = doc_id
|
||||
|
||||
logger.info(f"Ingesting document: session={session_id}, doc_id={doc_id}")
|
||||
|
||||
# Send to Router
|
||||
response = await send_to_router(router_request)
|
||||
|
||||
if not isinstance(response, dict):
|
||||
return IngestResult(
|
||||
success=False,
|
||||
error="Invalid response from router"
|
||||
)
|
||||
|
||||
data = response.get("data", {})
|
||||
chunks = data.get("chunks", [])
|
||||
|
||||
if chunks:
|
||||
response = await self._router_post_json("/v1/documents/ingest", payload, timeout=90.0)
|
||||
|
||||
if response.get("ok"):
|
||||
return IngestResult(
|
||||
success=True,
|
||||
doc_id=doc_id or data.get("doc_id"),
|
||||
ingested_chunks=len(chunks),
|
||||
status="ingested"
|
||||
)
|
||||
else:
|
||||
return IngestResult(
|
||||
success=False,
|
||||
status="failed",
|
||||
error="No chunks to ingest"
|
||||
doc_id=response.get("doc_id") or effective_doc_id,
|
||||
ingested_chunks=int(response.get("chunks_stored", 0) or 0),
|
||||
status="ingested",
|
||||
)
|
||||
|
||||
return IngestResult(
|
||||
success=False,
|
||||
doc_id=effective_doc_id,
|
||||
status="failed",
|
||||
error=response.get("error", "Router ingest failed"),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Document ingestion failed: {e}", exc_info=True)
|
||||
@@ -553,6 +844,245 @@ class DocumentService:
|
||||
success=False,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def update_document(
|
||||
self,
|
||||
session_id: str,
|
||||
doc_id: Optional[str] = None,
|
||||
doc_url: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
text: Optional[str] = None,
|
||||
dao_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
agent_id: str = "daarwizz",
|
||||
storage_ref: Optional[str] = None,
|
||||
publish_artifact: bool = False,
|
||||
artifact_id: Optional[str] = None,
|
||||
target_format: Optional[str] = None,
|
||||
artifact_label: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> UpdateResult:
|
||||
"""
|
||||
Update existing document content and bump version in router memory.
|
||||
"""
|
||||
try:
|
||||
context = await self.get_doc_context(session_id)
|
||||
if context:
|
||||
if not doc_id:
|
||||
doc_id = context.doc_id
|
||||
if not doc_url:
|
||||
doc_url = context.doc_url
|
||||
if not file_name:
|
||||
file_name = context.file_name
|
||||
if not dao_id:
|
||||
dao_id = context.dao_id
|
||||
|
||||
if not doc_id:
|
||||
return UpdateResult(
|
||||
success=False,
|
||||
status="failed",
|
||||
error="No document context found. Provide doc_id or parse/ingest first.",
|
||||
)
|
||||
|
||||
effective_text = (text or "").strip()
|
||||
if not effective_text:
|
||||
if not doc_url:
|
||||
return UpdateResult(
|
||||
success=False,
|
||||
doc_id=doc_id,
|
||||
status="failed",
|
||||
error="No text or doc_url provided for update",
|
||||
)
|
||||
parsed = await self.parse_document(
|
||||
session_id=session_id,
|
||||
doc_url=doc_url,
|
||||
file_name=file_name or "document",
|
||||
dao_id=dao_id or "",
|
||||
user_id=user_id or "",
|
||||
output_mode="markdown",
|
||||
metadata={"source": self._extract_source(session_id), "mode": "update"},
|
||||
)
|
||||
if not parsed.success:
|
||||
return UpdateResult(
|
||||
success=False,
|
||||
doc_id=doc_id,
|
||||
status="failed",
|
||||
error=parsed.error or "Document parse failed",
|
||||
)
|
||||
effective_text = (parsed.markdown or "").strip()
|
||||
|
||||
if not effective_text:
|
||||
return UpdateResult(
|
||||
success=False,
|
||||
doc_id=doc_id,
|
||||
status="failed",
|
||||
error="No extractable text for update",
|
||||
)
|
||||
|
||||
meta = {
|
||||
"session_id": session_id,
|
||||
"source": self._extract_source(session_id),
|
||||
}
|
||||
if isinstance(metadata, dict):
|
||||
meta.update(metadata)
|
||||
|
||||
response = await self._router_post_json(
|
||||
"/v1/documents/update",
|
||||
{
|
||||
"agent_id": (agent_id or "daarwizz").lower(),
|
||||
"doc_id": doc_id,
|
||||
"file_name": file_name,
|
||||
"text": effective_text,
|
||||
"dao_id": dao_id,
|
||||
"user_id": user_id,
|
||||
"storage_ref": storage_ref,
|
||||
"metadata": meta,
|
||||
},
|
||||
timeout=90.0,
|
||||
)
|
||||
|
||||
if not response.get("ok"):
|
||||
return UpdateResult(
|
||||
success=False,
|
||||
doc_id=doc_id,
|
||||
status="failed",
|
||||
error=response.get("error", "Router update failed"),
|
||||
)
|
||||
|
||||
await self.save_doc_context(
|
||||
session_id=session_id,
|
||||
doc_id=doc_id,
|
||||
doc_url=doc_url,
|
||||
file_name=file_name,
|
||||
dao_id=dao_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
publish = PublishResult(success=False)
|
||||
if publish_artifact:
|
||||
publish = await self._publish_text_artifact(
|
||||
text=effective_text,
|
||||
doc_id=doc_id,
|
||||
file_name=file_name,
|
||||
dao_id=dao_id,
|
||||
user_id=user_id,
|
||||
artifact_id=artifact_id,
|
||||
target_format=target_format,
|
||||
label=artifact_label,
|
||||
metadata=meta,
|
||||
)
|
||||
|
||||
return UpdateResult(
|
||||
success=True,
|
||||
doc_id=response.get("doc_id") or doc_id,
|
||||
version_no=int(response.get("version_no", 0) or 0) or None,
|
||||
version_id=int(response.get("version_id", 0) or 0) or None,
|
||||
updated_chunks=int(response.get("chunks_stored", 0) or 0),
|
||||
status="updated_published" if publish_artifact and publish.success else ("updated_publish_failed" if publish_artifact else "updated"),
|
||||
publish_error=publish.error if publish_artifact and not publish.success else None,
|
||||
artifact_id=publish.artifact_id if publish_artifact else None,
|
||||
artifact_version_id=publish.version_id if publish_artifact else None,
|
||||
artifact_storage_key=publish.storage_key if publish_artifact else None,
|
||||
artifact_mime=publish.mime if publish_artifact else None,
|
||||
artifact_download_url=publish.download_url if publish_artifact else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Document update failed: {e}", exc_info=True)
|
||||
return UpdateResult(
|
||||
success=False,
|
||||
doc_id=doc_id,
|
||||
status="failed",
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
async def list_document_versions(
|
||||
self,
|
||||
agent_id: str,
|
||||
doc_id: str,
|
||||
limit: int = 20,
|
||||
) -> Dict[str, Any]:
|
||||
aid = (agent_id or "daarwizz").lower()
|
||||
did = (doc_id or "").strip()
|
||||
if not did:
|
||||
return {"ok": False, "error": "doc_id is required", "items": []}
|
||||
try:
|
||||
response = await self._router_get_json(
|
||||
f"/v1/documents/{did}/versions?agent_id={aid}&limit={max(1, min(int(limit or 20), 200))}",
|
||||
timeout=30.0,
|
||||
)
|
||||
return response if isinstance(response, dict) else {"ok": False, "error": "invalid_response", "items": []}
|
||||
except Exception as e:
|
||||
logger.error(f"list_document_versions failed: {e}")
|
||||
return {"ok": False, "error": str(e), "items": []}
|
||||
|
||||
async def publish_document_artifact(
|
||||
self,
|
||||
session_id: str,
|
||||
doc_id: Optional[str] = None,
|
||||
doc_url: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
text: Optional[str] = None,
|
||||
dao_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
artifact_id: Optional[str] = None,
|
||||
target_format: Optional[str] = None,
|
||||
artifact_label: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> PublishResult:
|
||||
"""
|
||||
Publish text as a physical artifact version (.docx/.xlsx/.txt/...) without changing RAG index.
|
||||
"""
|
||||
try:
|
||||
context = await self.get_doc_context(session_id)
|
||||
if context:
|
||||
if not doc_id:
|
||||
doc_id = context.doc_id
|
||||
if not doc_url:
|
||||
doc_url = context.doc_url
|
||||
if not file_name:
|
||||
file_name = context.file_name
|
||||
if not dao_id:
|
||||
dao_id = context.dao_id
|
||||
if not user_id:
|
||||
user_id = context.user_id
|
||||
|
||||
if not doc_id:
|
||||
return PublishResult(success=False, error="doc_id is required")
|
||||
|
||||
body = (text or "").strip()
|
||||
if not body:
|
||||
if not doc_url:
|
||||
return PublishResult(success=False, error="text or doc_url is required")
|
||||
parsed = await self.parse_document(
|
||||
session_id=session_id,
|
||||
doc_url=doc_url,
|
||||
file_name=file_name or "document",
|
||||
dao_id=dao_id or "",
|
||||
user_id=user_id or "",
|
||||
output_mode="markdown",
|
||||
metadata={"source": self._extract_source(session_id), "mode": "publish"},
|
||||
)
|
||||
if not parsed.success:
|
||||
return PublishResult(success=False, error=parsed.error or "Document parse failed")
|
||||
body = (parsed.markdown or "").strip()
|
||||
|
||||
if not body:
|
||||
return PublishResult(success=False, error="No text available for publish")
|
||||
|
||||
return await self._publish_text_artifact(
|
||||
text=body,
|
||||
doc_id=doc_id,
|
||||
file_name=file_name,
|
||||
dao_id=dao_id,
|
||||
user_id=user_id,
|
||||
artifact_id=artifact_id,
|
||||
target_format=target_format,
|
||||
label=artifact_label,
|
||||
metadata=metadata,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"publish_document_artifact failed: {e}", exc_info=True)
|
||||
return PublishResult(success=False, error=str(e))
|
||||
|
||||
async def ask_about_document(
|
||||
self,
|
||||
@@ -625,38 +1155,30 @@ class DocumentService:
|
||||
}],
|
||||
)
|
||||
|
||||
# Build RAG query request
|
||||
router_request = {
|
||||
"mode": "rag_query",
|
||||
"agent": agent_id,
|
||||
"metadata": {
|
||||
"source": self._extract_source(session_id),
|
||||
"dao_id": dao_id,
|
||||
"user_id": user_id,
|
||||
"session_id": session_id,
|
||||
},
|
||||
"payload": {
|
||||
"question": question,
|
||||
"dao_id": dao_id,
|
||||
"user_id": user_id,
|
||||
"doc_id": doc_id,
|
||||
},
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"RAG query: agent={agent_id}, session={session_id}, question={question[:50]}, doc_id={doc_id}"
|
||||
)
|
||||
|
||||
# Send to Router
|
||||
response = await send_to_router(router_request)
|
||||
|
||||
if not isinstance(response, dict):
|
||||
|
||||
response = await self._router_post_json(
|
||||
"/v1/documents/query",
|
||||
{
|
||||
"agent_id": (agent_id or "daarwizz").lower(),
|
||||
"question": question,
|
||||
"doc_id": doc_id,
|
||||
"dao_id": dao_id,
|
||||
"user_id": user_id,
|
||||
"limit": 5,
|
||||
},
|
||||
timeout=60.0,
|
||||
)
|
||||
|
||||
if isinstance(response, dict) and not response.get("ok", False):
|
||||
return QAResult(
|
||||
success=False,
|
||||
error="Invalid response from router"
|
||||
error=response.get("error", "Document query failed"),
|
||||
)
|
||||
|
||||
data = response.get("data", {})
|
||||
|
||||
data = response.get("data", {}) if isinstance(response, dict) else {}
|
||||
answer = data.get("answer") or data.get("text")
|
||||
sources = data.get("citations", []) or data.get("sources", [])
|
||||
|
||||
@@ -717,7 +1239,8 @@ async def ingest_document(
|
||||
doc_url: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
dao_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None
|
||||
user_id: Optional[str] = None,
|
||||
agent_id: str = "daarwizz",
|
||||
) -> IngestResult:
|
||||
"""Ingest document chunks into RAG/Memory"""
|
||||
return await doc_service.ingest_document(
|
||||
@@ -726,7 +1249,8 @@ async def ingest_document(
|
||||
doc_url=doc_url,
|
||||
file_name=file_name,
|
||||
dao_id=dao_id,
|
||||
user_id=user_id
|
||||
user_id=user_id,
|
||||
agent_id=agent_id,
|
||||
)
|
||||
|
||||
|
||||
@@ -749,6 +1273,79 @@ async def ask_about_document(
|
||||
)
|
||||
|
||||
|
||||
async def update_document(
|
||||
session_id: str,
|
||||
doc_id: Optional[str] = None,
|
||||
doc_url: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
text: Optional[str] = None,
|
||||
dao_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
agent_id: str = "daarwizz",
|
||||
storage_ref: Optional[str] = None,
|
||||
publish_artifact: bool = False,
|
||||
artifact_id: Optional[str] = None,
|
||||
target_format: Optional[str] = None,
|
||||
artifact_label: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> UpdateResult:
|
||||
"""Update document chunks and bump version."""
|
||||
return await doc_service.update_document(
|
||||
session_id=session_id,
|
||||
doc_id=doc_id,
|
||||
doc_url=doc_url,
|
||||
file_name=file_name,
|
||||
text=text,
|
||||
dao_id=dao_id,
|
||||
user_id=user_id,
|
||||
agent_id=agent_id,
|
||||
storage_ref=storage_ref,
|
||||
publish_artifact=publish_artifact,
|
||||
artifact_id=artifact_id,
|
||||
target_format=target_format,
|
||||
artifact_label=artifact_label,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
|
||||
async def list_document_versions(agent_id: str, doc_id: str, limit: int = 20) -> Dict[str, Any]:
|
||||
"""List document versions from router."""
|
||||
return await doc_service.list_document_versions(
|
||||
agent_id=agent_id,
|
||||
doc_id=doc_id,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
async def publish_document_artifact(
|
||||
session_id: str,
|
||||
doc_id: Optional[str] = None,
|
||||
doc_url: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
text: Optional[str] = None,
|
||||
dao_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
artifact_id: Optional[str] = None,
|
||||
target_format: Optional[str] = None,
|
||||
artifact_label: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> PublishResult:
|
||||
"""Publish physical artifact version for document text."""
|
||||
return await doc_service.publish_document_artifact(
|
||||
session_id=session_id,
|
||||
doc_id=doc_id,
|
||||
doc_url=doc_url,
|
||||
file_name=file_name,
|
||||
text=text,
|
||||
dao_id=dao_id,
|
||||
user_id=user_id,
|
||||
artifact_id=artifact_id,
|
||||
target_format=target_format,
|
||||
artifact_label=artifact_label,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
|
||||
async def save_doc_context(
|
||||
session_id: str,
|
||||
doc_id: str,
|
||||
|
||||
Reference in New Issue
Block a user