merge: integrate remote codex/sync-node1-runtime with fabric layer changes

Resolve conflicts in docker-compose.node1.yml, services/router/main.py,
and gateway-bot/services/doc_service.py — keeping both fabric layer
(NCS, node-worker, Prometheus) and document ingest/query endpoints.

Made-with: Cursor
This commit is contained in:
Apple
2026-02-27 03:09:12 -08:00
76 changed files with 7495 additions and 295 deletions

View File

@@ -3,7 +3,7 @@ FROM python:3.11-slim
LABEL maintainer="DAARION.city Team"
LABEL description="Bot Gateway - Telegram/Discord webhook handler with DAARWIZZ"
LABEL version="0.2.0"
LABEL version="0.2.1"
WORKDIR /app/gateway-bot
@@ -15,7 +15,15 @@ RUN pip install --no-cache-dir \
uvicorn==0.27.0 \
httpx==0.26.0 \
pydantic==2.5.3 \
python-multipart==0.0.6 prometheus-client>=0.20.0 PyPDF2>=3.0.0 crewai nats-py pandas openpyxl
python-multipart==0.0.6 \
prometheus-client==0.22.1 \
PyPDF2>=3.0.0 \
crewai \
nats-py \
pandas \
openpyxl \
python-docx \
redis==5.0.1
# Copy gateway code and DAARWIZZ prompt
COPY . .

View File

@@ -19,7 +19,8 @@
"onboarding",
"ecosystem"
],
"mentor": null
"mentor": null,
"district_id": "city-core"
},
"helion": {
"display_name": "Helion",
@@ -35,7 +36,8 @@
"market_analysis",
"biominer"
],
"mentor": null
"mentor": null,
"district_id": "helion"
},
"alateya": {
"display_name": "Aletheia",
@@ -58,7 +60,8 @@
"email": "alverjob@gmail.com",
"site": "https://alverjob.xyz",
"youtube": "https://www.youtube.com/@alverjob72"
}
},
"district_id": "alateya"
},
"druid": {
"display_name": "DRUID",
@@ -76,7 +79,8 @@
"inci",
"safety_basics"
],
"mentor": null
"mentor": null,
"district_id": "druid"
},
"nutra": {
"display_name": "NUTRA",
@@ -93,7 +97,8 @@
"vitamins",
"microbiome"
],
"mentor": null
"mentor": null,
"district_id": "nutra"
},
"agromatrix": {
"display_name": "Степан Матрікс",
@@ -110,7 +115,8 @@
"logistics",
"farm_economics"
],
"mentor": null
"mentor": null,
"district_id": "agromatrix"
},
"greenfood": {
"display_name": "GREENFOOD",
@@ -127,7 +133,8 @@
"food_production",
"sales"
],
"mentor": null
"mentor": null,
"district_id": "greenfood"
},
"clan": {
"display_name": "CLAN",
@@ -143,7 +150,8 @@
"culture",
"facilitation"
],
"mentor": null
"mentor": null,
"district_id": "clan"
},
"eonarch": {
"display_name": "EONARCH",
@@ -159,7 +167,8 @@
"transformation",
"spirituality"
],
"mentor": null
"mentor": null,
"district_id": "eonarch"
},
"yaromir": {
"display_name": "YAROMIR",
@@ -175,7 +184,8 @@
"code_review",
"strategy"
],
"mentor": null
"mentor": null,
"district_id": "city-core"
},
"soul": {
"display_name": "SOUL",
@@ -191,7 +201,8 @@
"values",
"wellbeing"
],
"mentor": null
"mentor": null,
"district_id": "soul"
},
"senpai": {
"display_name": "SENPAI",
@@ -207,7 +218,8 @@
"defi",
"portfolio"
],
"mentor": null
"mentor": null,
"district_id": "senpai"
},
"oneok": {
"display_name": "1OK",
@@ -227,7 +239,8 @@
"mentor": {
"name": "Ілля Титар",
"telegram": "@Titar240581"
}
},
"district_id": "city-core"
},
"sofiia": {
"display_name": "Sophia",
@@ -242,7 +255,24 @@
"platform_evolution",
"technical_leadership"
],
"mentor": null
"mentor": null,
"district_id": "city-core"
},
"dario": {
"display_name": "DARIO",
"canonical_role": "Future DAARION Agent (planned, not launched)",
"prompt_file": "dario_prompt.txt",
"telegram_mode": "disabled",
"visibility": "private",
"status": "planned",
"district_id": "city-core",
"domains": [
"city_ops",
"coordination",
"support"
],
"mentor": null,
"launch_state": "planned"
}
}
}
}

View File

@@ -32,7 +32,9 @@
## B. SHORT-FIRST
**За замовчуванням: структурована відповідь з 3-5 пунктів.**
**За замовчуванням: жива коротка відповідь 1-3 речення природною мовою.**
**Маркерні списки/шаблони 3-5 пунктів використовуй тільки коли користувач просить детально, план, чеклист або розрахунок.**
**Перше повідомлення в новій темі — розмовне, без канцеляриту та без "робото-тону".**
ЗАБОРОНЕНО:
- "Радий допомогти", "Готовий до співпраці"
@@ -55,7 +57,9 @@
**ВАЖЛИВО:**
- Ніколи не кажи "я не можу слухати аудіо" — голосові повідомлення вже перетворені на текст!
- Ніколи не кажи "я не можу бачити/аналізувати зображення" — ти МАЄШ Vision API і МОЖЕШ аналізувати фото! Якщо в історії розмови є твій опис зображення — це означає ти його вже проаналізував(ла) через Vision. Не заперечуй це.
- Фото аналізуй по доступному поточному контексту: якщо зображення є у запиті або щойно надіслане — коментуй по суті.
- Якщо для точного висновку бракує самого файлу чи чіткості, поясни це простою людською мовою і попроси надіслати фото повторно з уточненням, що саме перевірити.
- Не використовуй службові фрази типу "text-only", "vision unavailable", "технічне обмеження моделі".
Початковий режим: учень. Спочатку став уточнювальні питання і вчися у ментора.
Публічна група: @agromatrix.
@@ -94,7 +98,8 @@
- Мислиш далекоглядно: пропонуєш архітектуру рішення, а не латання симптомів.
- Будь креативним, але не фантазуй дані: якщо фактів нема — позначай як припущення і пропонуй, що зібрати.
- Спілкуйся українською (якщо користувач не перейшов на іншу мову).
- Форматуй відповіді структуровано: заголовки, списки, короткі блоки, пріоритети.
- Тримай розмовний тон: короткі природні фрази, без надмірної шаблонності.
- Структурований формат (заголовки/списки/таблиці) вмикай лише для складних задач або коли це прямо запитали.
### 4) Принципи роботи з користувачем
1. Спочатку контекст → потім рішення. Якщо контексту бракує — зроби мінімальний набір припущень і паралельно запропонуй, які дані уточнити.
@@ -113,6 +118,8 @@
- “Підготуй текст/структуру сторінки/презентації для продукту AgroMatrix”
### 6) Як ти формуєш відповіді (стандартний шаблон)
Використовуй цей шаблон ТІЛЬКИ для комплексних запитів (планування сезону, економіка, SOP, інтеграції, ТЗ).
Для звичайних коротких питань відповідай в 1-3 речення органічно, без обов'язкових секцій.
1. Ціль (12 речення)
2. Вхідні дані (що відомо / які припущення)
3. Рішення (план/алгоритм/кроки)

View File

@@ -1,12 +1,13 @@
"""
FastAPI app instance for Gateway Bot
"""
"""FastAPI app instance for Gateway Bot."""
import logging
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from http_api import router as gateway_router
from http_api_doc import router as doc_router
from daarion_facade.invoke_api import router as invoke_router
from daarion_facade.registry_api import router as registry_router
logging.basicConfig(
level=logging.INFO,
@@ -15,36 +16,47 @@ logging.basicConfig(
app = FastAPI(
title="Bot Gateway with DAARWIZZ",
version="1.0.0",
description="Gateway service for Telegram/Discord bots DAGI Router"
version="1.1.0",
description="Gateway service for Telegram/Discord bots + DAARION public facade"
)
# CORS middleware
# CORS for web UI clients (gateway only).
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_origins=[
"https://daarion.city",
"https://www.daarion.city",
"http://localhost:3000",
],
allow_origin_regex=r"https://.*\.lovable\.app",
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
allow_methods=["GET", "POST", "OPTIONS"],
allow_headers=["Authorization", "Content-Type"],
)
# Include gateway routes
# Existing gateway routes.
app.include_router(gateway_router, prefix="", tags=["gateway"])
app.include_router(doc_router, prefix="", tags=["docs"])
# Public facade routes for DAARION.city UI.
app.include_router(registry_router)
app.include_router(invoke_router)
@app.get("/")
async def root():
return {
"service": "bot-gateway",
"version": "1.0.0",
"version": "1.1.0",
"agent": "DAARWIZZ",
"endpoints": [
"POST /telegram/webhook",
"POST /discord/webhook",
"POST /api/doc/parse",
"POST /api/doc/ingest",
"POST /api/doc/ask",
"GET /api/doc/context/{session_id}",
"GET /health"
"GET /v1/registry/agents",
"GET /v1/registry/districts",
"GET /v1/metrics",
"POST /v1/invoke",
"GET /v1/jobs/{job_id}",
"GET /health",
]
}

View File

@@ -0,0 +1 @@
"""DAARION public facade package."""

View File

@@ -0,0 +1,212 @@
import asyncio
from datetime import datetime, timezone
import hmac
import json
import os
import uuid
from typing import Any, Dict, List
import httpx
from fastapi import APIRouter, HTTPException, Request, status
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from .redis_jobs import create_job, enqueue_job, get_job
from .registry_api import _load_registry
router = APIRouter(prefix="/v1", tags=["daarion-facade"])
EVENT_TERMINAL_STATUSES = {"done", "failed"}
EVENT_KNOWN_STATUSES = {"queued", "running", "done", "failed"}
EVENT_POLL_SECONDS = float(os.getenv("DAARION_JOB_EVENTS_POLL_SECONDS", "0.5"))
ROUTER_URL = os.getenv("ROUTER_URL", "http://router:8000").rstrip("/")
ROUTER_REVIEW_TIMEOUT = float(os.getenv("DAARION_ROUTER_REVIEW_TIMEOUT_SECONDS", "20"))
AGROMATRIX_REVIEW_AUTH_MODE = os.getenv("AGROMATRIX_REVIEW_AUTH_MODE", "bearer").strip().lower()
AGROMATRIX_REVIEW_BEARER_TOKENS = [
part.strip()
for part in os.getenv("AGROMATRIX_REVIEW_BEARER_TOKENS", "").replace(";", ",").split(",")
if part.strip()
]
class InvokeInput(BaseModel):
prompt: str = Field(min_length=1)
images: List[str] = Field(default_factory=list)
class InvokeRequest(BaseModel):
agent_id: str
input: InvokeInput
metadata: Dict[str, Any] = Field(default_factory=dict)
class InvokeResponse(BaseModel):
job_id: str
status: str
status_url: str
class SharedMemoryReviewRequest(BaseModel):
point_id: str
approve: bool
reviewer: str | None = None
note: str | None = None
def _extract_bearer_token(request: Request) -> str:
auth_header = request.headers.get("Authorization", "")
if not auth_header.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Missing Bearer token")
token = auth_header[len("Bearer ") :].strip()
if not token:
raise HTTPException(status_code=401, detail="Empty Bearer token")
return token
def _require_mentor_auth(request: Request) -> str:
mode = AGROMATRIX_REVIEW_AUTH_MODE
if mode in {"off", "none", "disabled"}:
return ""
if mode != "bearer":
raise HTTPException(status_code=500, detail=f"Unsupported AGROMATRIX_REVIEW_AUTH_MODE={mode}")
if not AGROMATRIX_REVIEW_BEARER_TOKENS:
raise HTTPException(status_code=503, detail="Review auth is not configured")
token = _extract_bearer_token(request)
if not any(hmac.compare_digest(token, candidate) for candidate in AGROMATRIX_REVIEW_BEARER_TOKENS):
raise HTTPException(status_code=403, detail="Invalid mentor token")
return token
async def _router_json(
method: str,
path: str,
*,
payload: Dict[str, Any] | None = None,
params: Dict[str, Any] | None = None,
authorization: str | None = None,
) -> Dict[str, Any]:
headers: Dict[str, str] = {}
if authorization:
headers["Authorization"] = authorization
url = f"{ROUTER_URL}{path}"
try:
async with httpx.AsyncClient(timeout=ROUTER_REVIEW_TIMEOUT) as client:
resp = await client.request(method, url, json=payload, params=params, headers=headers)
except httpx.TimeoutException:
raise HTTPException(status_code=504, detail="Router timeout")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Router unavailable: {e}")
try:
body = resp.json()
except Exception:
body = {"raw": resp.text}
if resp.status_code >= 400:
detail = body.get("detail") if isinstance(body, dict) else body
raise HTTPException(status_code=resp.status_code, detail=detail or f"Router error {resp.status_code}")
return body if isinstance(body, dict) else {"data": body}
def _sse_message(event: str, payload: Dict[str, Any]) -> str:
return f"event: {event}\ndata: {json.dumps(payload, ensure_ascii=False)}\n\n"
@router.post("/invoke", status_code=status.HTTP_202_ACCEPTED, response_model=InvokeResponse)
async def invoke(payload: InvokeRequest) -> InvokeResponse:
registry = _load_registry().get("agents", {})
if payload.agent_id not in registry:
raise HTTPException(status_code=404, detail=f"Unknown agent_id: {payload.agent_id}")
job_id = f"job_{uuid.uuid4().hex}"
now = datetime.now(timezone.utc).isoformat()
job_doc = {
"job_id": job_id,
"status": "queued",
"agent_id": payload.agent_id,
"input": payload.input.model_dump(),
"metadata": payload.metadata,
"result": None,
"error": None,
"created_at": now,
"updated_at": now,
"started_at": None,
"finished_at": None,
}
await create_job(job_id, job_doc)
await enqueue_job(job_id)
return InvokeResponse(job_id=job_id, status="queued", status_url=f"/v1/jobs/{job_id}")
@router.get("/jobs/{job_id}")
async def job_status(job_id: str) -> Dict[str, Any]:
job = await get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
return job
@router.get("/jobs/{job_id}/events")
async def job_events(job_id: str, request: Request) -> StreamingResponse:
existing = await get_job(job_id)
if not existing:
raise HTTPException(status_code=404, detail="Job not found")
async def event_stream():
last_state = None
yield "retry: 1000\n\n"
while True:
if await request.is_disconnected():
break
job = await get_job(job_id)
if not job:
yield _sse_message("failed", {"job_id": job_id, "status": "failed", "error": {"message": "Job not found"}})
break
status_value = str(job.get("status", "unknown"))
updated_at = str(job.get("updated_at", ""))
state = (status_value, updated_at)
if state != last_state:
event_name = status_value if status_value in EVENT_KNOWN_STATUSES else "status"
yield _sse_message(event_name, job)
last_state = state
if status_value in EVENT_TERMINAL_STATUSES:
break
await asyncio.sleep(EVENT_POLL_SECONDS)
return StreamingResponse(
event_stream(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
@router.get("/agromatrix/shared-memory/pending")
async def agromatrix_shared_pending(limit: int = 50) -> Dict[str, Any]:
return await _router_json(
"GET",
"/v1/agromatrix/shared-memory/pending",
params={"limit": max(1, min(limit, 200))},
)
@router.post("/agromatrix/shared-memory/review")
async def agromatrix_shared_review(req: SharedMemoryReviewRequest, request: Request) -> Dict[str, Any]:
token = _require_mentor_auth(request)
auth_header = f"Bearer {token}" if token else None
return await _router_json(
"POST",
"/v1/agromatrix/shared-memory/review",
payload=req.model_dump(),
authorization=auth_header,
)

View File

@@ -0,0 +1,287 @@
import asyncio
import json
import logging
import os
import time
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Tuple
import httpx
from redis.asyncio import Redis
from .registry_api import _load_crewai_roles, _load_district_registry, _load_registry
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
logger = logging.getLogger("daarion-metrics-poller")
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
POLL_INTERVAL_SECONDS = int(os.getenv("DAARION_METRICS_POLL_INTERVAL_SECONDS", "10"))
METRICS_TTL_SECONDS = int(os.getenv("DAARION_METRICS_TTL_SECONDS", "60"))
HTTP_CONNECT_TIMEOUT_SECONDS = float(os.getenv("DAARION_METRICS_HTTP_CONNECT_TIMEOUT_SECONDS", "2"))
HTTP_TOTAL_TIMEOUT_SECONDS = float(os.getenv("DAARION_METRICS_HTTP_TOTAL_TIMEOUT_SECONDS", "5"))
NODES_TOTAL = int(os.getenv("DAARION_NODE_COUNT", "1"))
MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
DASHBOARD_KEY = "daarion:metrics:dashboard"
DISTRICT_KEY_PREFIX = "daarion:metrics:district"
_redis: Optional[Redis] = None
def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def _ensure_url(value: str) -> str:
value = (value or "").strip()
if not value:
return ""
if value.startswith("http://") or value.startswith("https://"):
return value
return f"https://{value}"
def _health_candidates(district: Dict[str, Any]) -> List[str]:
base = _ensure_url(str(district.get("domain") or ""))
candidates: List[str] = []
explicit = str(district.get("health_url") or "").strip()
if explicit:
candidates.append(_ensure_url(explicit))
if base:
candidates.extend(
[
f"{base}/.well-known/daarion-health.json",
f"{base}/health",
f"{base}/v1/health",
]
)
dedup: List[str] = []
seen = set()
for url in candidates:
if url and url not in seen:
dedup.append(url)
seen.add(url)
return dedup
def _extract_agents_online(payload: Dict[str, Any], agents_total: int) -> Optional[int]:
raw = payload.get("agents_online")
if isinstance(raw, bool):
return agents_total if raw else 0
if isinstance(raw, int):
return max(0, min(raw, agents_total))
agents = payload.get("agents")
if isinstance(agents, list):
count = 0
for agent in agents:
if not isinstance(agent, dict):
continue
status = str(agent.get("status", "")).lower()
if status in {"online", "active", "ok"}:
count += 1
return min(count, agents_total)
return None
async def redis_client() -> Redis:
global _redis
if _redis is None:
_redis = Redis.from_url(REDIS_URL, decode_responses=True)
return _redis
async def close_redis() -> None:
global _redis
if _redis is not None:
await _redis.close()
_redis = None
async def _fetch_json_with_latency(
client: httpx.AsyncClient,
url: str,
) -> Tuple[bool, Optional[Dict[str, Any]], Optional[float], Optional[str]]:
started = time.perf_counter()
try:
response = await client.get(url)
latency_ms = round((time.perf_counter() - started) * 1000, 2)
if response.status_code >= 400:
return False, None, latency_ms, f"HTTP {response.status_code}"
data: Optional[Dict[str, Any]] = None
try:
parsed = response.json()
if isinstance(parsed, dict):
data = parsed
except Exception:
data = None
return True, data, latency_ms, None
except Exception as e:
latency_ms = round((time.perf_counter() - started) * 1000, 2)
return False, None, latency_ms, str(e)
async def _read_memory_vectors(client: httpx.AsyncClient) -> int:
try:
ok, payload, _, _ = await _fetch_json_with_latency(client, f"{MEMORY_SERVICE_URL}/health")
if not ok or not payload:
return 0
return int(payload.get("vector_store", {}).get("memories", {}).get("vectors_count", 0) or 0)
except Exception:
return 0
async def _registry_snapshot() -> Tuple[List[Dict[str, Any]], Dict[str, List[Dict[str, Any]]], int, int]:
raw_districts = _load_district_registry().get("districts", [])
districts = [d for d in raw_districts if isinstance(d, dict) and d.get("district_id")]
agents_map = _load_registry().get("agents", {})
role_counts = await _load_crewai_roles()
by_district: Dict[str, List[Dict[str, Any]]] = {}
subagents_total = 0
for aid, cfg in agents_map.items():
if not isinstance(cfg, dict):
continue
aid_str = str(aid)
district_id = str(cfg.get("district_id") or "city-core")
subagents_total += int(role_counts.get(aid_str, 0))
by_district.setdefault(district_id, []).append(
{
"agent_id": aid_str,
"status": str(cfg.get("status", "active")),
}
)
return districts, by_district, len(agents_map), subagents_total
async def build_dashboard() -> Dict[str, Any]:
districts, agents_by_district, agents_total, subagents_total = await _registry_snapshot()
timeout = httpx.Timeout(timeout=HTTP_TOTAL_TIMEOUT_SECONDS, connect=HTTP_CONNECT_TIMEOUT_SECONDS)
by_district: List[Dict[str, Any]] = []
districts_online = 0
agents_online_total = 0
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
memory_vectors = await _read_memory_vectors(client)
for district in districts:
district_id = str(district.get("district_id"))
title = district.get("title") or district_id
domain = str(district.get("domain") or "")
status = district.get("status") or "active"
members = agents_by_district.get(district_id, [])
agents_total_district = len(members)
sample = {
"district_id": district_id,
"title": title,
"domain": domain,
"status": status,
"ok": False,
"agents_total": agents_total_district,
"agents_online": 0,
"latency_ms": None,
"last_check_ts": _now_iso(),
"error": None,
}
last_error = "No health endpoint configured"
for candidate in _health_candidates(district):
ok, payload, latency_ms, error_message = await _fetch_json_with_latency(client, candidate)
sample["latency_ms"] = latency_ms
if ok:
sample["ok"] = True
sample["error"] = None
inferred = _extract_agents_online(payload or {}, agents_total_district)
sample["agents_online"] = inferred if inferred is not None else agents_total_district
break
last_error = error_message or "health check failed"
if sample["ok"]:
districts_online += 1
agents_online_total += int(sample.get("agents_online") or 0)
else:
sample["error"] = {"message": last_error}
by_district.append(sample)
return {
"global": {
"nodes": NODES_TOTAL,
"districts": len(districts),
"agents": agents_total,
"subagents": subagents_total,
"memory_vectors": memory_vectors,
"districts_online": districts_online,
"agents_online": agents_online_total,
},
"by_district": by_district,
"updated_at": _now_iso(),
}
async def publish_dashboard(dashboard: Dict[str, Any]) -> None:
redis = await redis_client()
payload = json.dumps(dashboard, ensure_ascii=False)
await redis.set(DASHBOARD_KEY, payload, ex=METRICS_TTL_SECONDS)
for row in dashboard.get("by_district", []):
district_id = row.get("district_id")
if not district_id:
continue
key = f"{DISTRICT_KEY_PREFIX}:{district_id}"
await redis.set(key, json.dumps(row, ensure_ascii=False), ex=METRICS_TTL_SECONDS)
async def run_once() -> None:
dashboard = await build_dashboard()
await publish_dashboard(dashboard)
logger.info(
"dashboard_updated districts=%s districts_online=%s agents=%s agents_online=%s",
dashboard["global"].get("districts"),
dashboard["global"].get("districts_online"),
dashboard["global"].get("agents"),
dashboard["global"].get("agents_online"),
)
async def worker_loop() -> None:
logger.info(
"metrics_poller_started interval=%ss ttl=%ss redis=%s",
POLL_INTERVAL_SECONDS,
METRICS_TTL_SECONDS,
REDIS_URL,
)
while True:
started = time.perf_counter()
try:
await run_once()
except asyncio.CancelledError:
raise
except Exception:
logger.exception("metrics_poller_cycle_failed")
elapsed = time.perf_counter() - started
sleep_for = max(1.0, POLL_INTERVAL_SECONDS - elapsed)
await asyncio.sleep(sleep_for)
if __name__ == "__main__":
try:
asyncio.run(worker_loop())
finally:
try:
asyncio.run(close_redis())
except Exception:
pass

View File

@@ -0,0 +1,84 @@
import asyncio
import json
import os
from typing import Any, Dict, Optional
from redis.asyncio import Redis
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
JOB_KEY_PREFIX = "daarion:jobs"
QUEUE_KEY = "daarion:jobs:queue"
JOB_TTL_SECONDS = int(os.getenv("DAARION_JOB_TTL_SECONDS", str(72 * 3600)))
_redis: Optional[Redis] = None
def _job_key(job_id: str) -> str:
return f"{JOB_KEY_PREFIX}:{job_id}"
async def redis_client() -> Redis:
global _redis
if _redis is None:
_redis = Redis.from_url(REDIS_URL, decode_responses=True)
return _redis
async def close_redis() -> None:
global _redis
if _redis is not None:
await _redis.close()
_redis = None
async def create_job(job_id: str, payload: Dict[str, Any]) -> None:
r = await redis_client()
key = _job_key(job_id)
await r.set(key, json.dumps(payload, ensure_ascii=False), ex=JOB_TTL_SECONDS)
async def get_job(job_id: str) -> Optional[Dict[str, Any]]:
r = await redis_client()
raw = await r.get(_job_key(job_id))
if not raw:
return None
try:
return json.loads(raw)
except json.JSONDecodeError:
return None
async def update_job(job_id: str, patch: Dict[str, Any]) -> Optional[Dict[str, Any]]:
current = await get_job(job_id)
if not current:
return None
current.update(patch)
await create_job(job_id, current)
return current
async def enqueue_job(job_id: str) -> None:
r = await redis_client()
await r.lpush(QUEUE_KEY, job_id)
async def dequeue_job(block_seconds: int = 5) -> Optional[str]:
r = await redis_client()
result = await r.brpop(QUEUE_KEY, timeout=block_seconds)
if not result:
return None
_, job_id = result
return job_id
async def wait_for_redis(timeout_seconds: int = 30) -> None:
deadline = asyncio.get_running_loop().time() + timeout_seconds
while True:
try:
r = await redis_client()
await r.ping()
return
except Exception:
if asyncio.get_running_loop().time() >= deadline:
raise
await asyncio.sleep(1)

View File

@@ -0,0 +1,268 @@
import json
import os
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
import httpx
from fastapi import APIRouter
from redis.asyncio import Redis
router = APIRouter(prefix="/v1", tags=["daarion-facade"])
REGISTRY_CACHE_TTL = int(os.getenv("REGISTRY_CACHE_TTL", "30"))
MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
CREWAI_SERVICE_URL = os.getenv("CREWAI_SERVICE_URL", "http://dagi-staging-crewai-service:9010")
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
METRICS_DASHBOARD_KEY = "daarion:metrics:dashboard"
_REGISTRY_CACHE: Dict[str, Any] = {"loaded_at": 0.0, "data": None}
_DISTRICT_CACHE: Dict[str, Any] = {"loaded_at": 0.0, "data": None}
_CREWAI_CACHE: Dict[str, Any] = {"loaded_at": 0.0, "data": {}}
_REDIS: Optional[Redis] = None
def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def _registry_paths() -> List[Path]:
return [
Path("/app/gateway-bot/agent_registry.json"),
Path("/opt/microdao-daarion/config/agent_registry.json"),
Path(__file__).resolve().parents[1] / "agent_registry.json",
]
def _district_paths() -> List[Path]:
return [
Path("/app/gateway-bot/district_registry.json"),
Path(__file__).resolve().parents[1] / "district_registry.json",
]
def _load_registry() -> Dict[str, Any]:
now = time.time()
if _REGISTRY_CACHE.get("data") and (now - _REGISTRY_CACHE.get("loaded_at", 0.0) < REGISTRY_CACHE_TTL):
return _REGISTRY_CACHE["data"]
for path in _registry_paths():
if path.exists():
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
_REGISTRY_CACHE.update({"loaded_at": now, "data": data})
return data
data = {"agents": {}}
_REGISTRY_CACHE.update({"loaded_at": now, "data": data})
return data
def _load_district_registry() -> Dict[str, Any]:
now = time.time()
if _DISTRICT_CACHE.get("data") and (now - _DISTRICT_CACHE.get("loaded_at", 0.0) < REGISTRY_CACHE_TTL):
return _DISTRICT_CACHE["data"]
for path in _district_paths():
if path.exists():
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
_DISTRICT_CACHE.update({"loaded_at": now, "data": data})
return data
data = {"districts": []}
_DISTRICT_CACHE.update({"loaded_at": now, "data": data})
return data
async def _redis_client() -> Redis:
global _REDIS
if _REDIS is None:
_REDIS = Redis.from_url(REDIS_URL, decode_responses=True)
return _REDIS
async def _load_cached_dashboard() -> Optional[Dict[str, Any]]:
try:
r = await _redis_client()
raw = await r.get(METRICS_DASHBOARD_KEY)
if not raw:
return None
return json.loads(raw)
except Exception:
return None
async def _load_crewai_roles() -> Dict[str, int]:
now = time.time()
if now - _CREWAI_CACHE.get("loaded_at", 0.0) < REGISTRY_CACHE_TTL:
return _CREWAI_CACHE.get("data", {})
out: Dict[str, int] = {}
try:
async with httpx.AsyncClient(timeout=8.0) as client:
resp = await client.get(f"{CREWAI_SERVICE_URL}/crew/agents")
if resp.status_code == 200:
payload = resp.json()
for aid, info in payload.items():
default_roles = info.get("default_roles")
out[str(aid)] = int(default_roles) if isinstance(default_roles, int) else 0
except Exception:
out = {}
_CREWAI_CACHE.update({"loaded_at": now, "data": out})
return out
@router.get("/registry/agents")
async def get_agents() -> Dict[str, Any]:
reg = _load_registry()
agents = reg.get("agents", {}) if isinstance(reg, dict) else {}
role_counts = await _load_crewai_roles()
items: List[Dict[str, Any]] = []
for agent_id, cfg in agents.items():
if not isinstance(cfg, dict):
continue
domains = cfg.get("domains") or []
district_id = cfg.get("district_id") or "city-core"
items.append(
{
"agent_id": agent_id,
"title": cfg.get("display_name") or agent_id,
"role": cfg.get("canonical_role") or "",
"domain_primary": domains[0] if domains else "general",
"domain_aliases": domains[1:] if len(domains) > 1 else [],
"visibility": cfg.get("visibility", "public"),
"status": cfg.get("status", "active"),
"team": {"subagents_total": role_counts.get(agent_id, 0)},
"district_id": district_id,
"avatar_url": cfg.get("avatar_url"),
"health_url": cfg.get("health_url"),
}
)
return {"items": items, "total": len(items)}
@router.get("/registry/districts")
async def get_districts() -> Dict[str, Any]:
agents_payload = await get_agents()
agents = agents_payload.get("items", [])
by_district: Dict[str, List[Dict[str, Any]]] = {}
for a in agents:
by_district.setdefault(a.get("district_id", "city-core"), []).append(a)
catalog = _load_district_registry().get("districts", [])
catalog_by_id: Dict[str, Dict[str, Any]] = {
str(d.get("district_id")): d for d in catalog if isinstance(d, dict) and d.get("district_id")
}
district_ids = sorted(set(catalog_by_id.keys()) | set(by_district.keys()))
items: List[Dict[str, Any]] = []
for district_id in district_ids:
members = by_district.get(district_id, [])
base = catalog_by_id.get(district_id, {})
domain = base.get("domain") or ("daarion.city" if district_id == "city-core" else f"{district_id}.daarion.city")
lead_agent_id = base.get("lead_agent_id")
if not lead_agent_id:
if district_id == "city-core" and any(m.get("agent_id") == "daarwizz" for m in members):
lead_agent_id = "daarwizz"
elif members:
lead_agent_id = members[0].get("agent_id")
else:
lead_agent_id = None
items.append(
{
"district_id": district_id,
"title": base.get("title") or district_id.replace("-", " ").title(),
"domain": domain,
"status": base.get("status", "active"),
"logo_url": base.get("logo_url"),
"health_url": base.get("health_url"),
"well_known": {
"manifest": f"https://{domain}/.well-known/daarion-district.json",
"health": f"https://{domain}/.well-known/daarion-health.json",
"capabilities": f"https://{domain}/.well-known/daarion-capabilities.json",
},
"lead_agent_id": lead_agent_id,
"agents_total": len(members),
}
)
return {"items": items, "total": len(items)}
@router.get("/metrics")
async def get_metrics() -> Dict[str, Any]:
agents_payload = await get_agents()
districts_payload = await get_districts()
agents = agents_payload.get("items", [])
memory_vectors = 0
try:
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(f"{MEMORY_SERVICE_URL}/health")
if resp.status_code == 200:
data = resp.json()
memory_vectors = int(
data.get("vector_store", {})
.get("memories", {})
.get("vectors_count", 0)
)
except Exception:
memory_vectors = 0
return {
"nodes": 1,
"districts": districts_payload.get("total", 0),
"agents": len(agents),
"subagents": sum(int((a.get("team") or {}).get("subagents_total", 0)) for a in agents),
"memory_vectors": memory_vectors,
}
@router.get("/metrics/dashboard")
async def get_metrics_dashboard() -> Dict[str, Any]:
cached = await _load_cached_dashboard()
if cached:
return cached
metrics = await get_metrics()
districts_payload = await get_districts()
districts = districts_payload.get("items", [])
by_district = []
for d in districts:
by_district.append(
{
"district_id": d.get("district_id"),
"title": d.get("title"),
"domain": d.get("domain"),
"status": d.get("status"),
"ok": None,
"agents_total": d.get("agents_total", 0),
"agents_online": None,
"latency_ms": None,
"last_check_ts": None,
}
)
return {
"global": {
"nodes": metrics.get("nodes", 1),
"districts": metrics.get("districts", 0),
"agents": metrics.get("agents", 0),
"subagents": metrics.get("subagents", 0),
"memory_vectors": metrics.get("memory_vectors", 0),
"districts_online": 0,
"agents_online": 0,
},
"by_district": by_district,
"updated_at": _now_iso(),
"source": "fallback_registry",
}

View File

@@ -0,0 +1,100 @@
import asyncio
import logging
import os
from typing import Dict
import httpx
from .reminders import close_redis, pop_due_reminders
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
logger = logging.getLogger("daarion-reminder-worker")
POLL_SECONDS = float(os.getenv("DAARION_REMINDER_POLL_SECONDS", "2"))
TELEGRAM_TIMEOUT = float(os.getenv("DAARION_REMINDER_TELEGRAM_TIMEOUT", "20"))
AGENT_TOKEN_ENV: Dict[str, str] = {
"daarwizz": "DAARWIZZ_TELEGRAM_BOT_TOKEN",
"helion": "HELION_TELEGRAM_BOT_TOKEN",
"greenfood": "GREENFOOD_TELEGRAM_BOT_TOKEN",
"agromatrix": "AGROMATRIX_TELEGRAM_BOT_TOKEN",
"alateya": "ALATEYA_TELEGRAM_BOT_TOKEN",
"nutra": "NUTRA_TELEGRAM_BOT_TOKEN",
"druid": "DRUID_TELEGRAM_BOT_TOKEN",
"clan": "CLAN_TELEGRAM_BOT_TOKEN",
"eonarch": "EONARCH_TELEGRAM_BOT_TOKEN",
"senpai": "SENPAI_TELEGRAM_BOT_TOKEN",
"oneok": "ONEOK_TELEGRAM_BOT_TOKEN",
"soul": "SOUL_TELEGRAM_BOT_TOKEN",
"yaromir": "YAROMIR_TELEGRAM_BOT_TOKEN",
"sofiia": "SOFIIA_TELEGRAM_BOT_TOKEN",
}
def _token_for_agent(agent_id: str) -> str:
env = AGENT_TOKEN_ENV.get((agent_id or "").lower(), "")
return os.getenv(env, "") if env else ""
async def _send_reminder(item: Dict[str, str]) -> bool:
agent_id = str(item.get("agent_id", ""))
chat_id = str(item.get("chat_id", ""))
reminder_text = str(item.get("text", "")).strip()
due_at = str(item.get("due_at", ""))
token = _token_for_agent(agent_id)
if not token:
logger.warning("reminder_skip_no_token agent=%s reminder_id=%s", agent_id, item.get("reminder_id"))
return False
if not chat_id or not reminder_text:
logger.warning("reminder_skip_invalid_payload reminder_id=%s", item.get("reminder_id"))
return False
body = {
"chat_id": chat_id,
"text": f"⏰ Нагадування ({agent_id})\n\n{reminder_text}\n\n🕒 {due_at}",
}
url = f"https://api.telegram.org/bot{token}/sendMessage"
async with httpx.AsyncClient(timeout=TELEGRAM_TIMEOUT) as client:
resp = await client.post(url, json=body)
if resp.status_code != 200:
logger.warning(
"reminder_send_failed reminder_id=%s status=%s body=%s",
item.get("reminder_id"),
resp.status_code,
resp.text[:300],
)
return False
logger.info("reminder_sent reminder_id=%s agent=%s chat=%s", item.get("reminder_id"), agent_id, chat_id)
return True
async def worker_loop() -> None:
logger.info("reminder_worker_started poll_seconds=%s", POLL_SECONDS)
while True:
try:
items = await pop_due_reminders(limit=20)
if items:
for item in items:
try:
await _send_reminder(item)
except Exception:
logger.exception("reminder_send_exception reminder_id=%s", item.get("reminder_id"))
except asyncio.CancelledError:
raise
except Exception:
logger.exception("reminder_worker_cycle_failed")
await asyncio.sleep(POLL_SECONDS)
if __name__ == "__main__":
try:
asyncio.run(worker_loop())
finally:
try:
asyncio.run(close_redis())
except Exception:
pass

View File

@@ -0,0 +1,154 @@
import json
import os
import time
import uuid
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional
from redis.asyncio import Redis
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
REMINDER_PREFIX = "daarion:reminders"
REMINDER_BY_ID = f"{REMINDER_PREFIX}:by_id"
REMINDER_SCHEDULE = f"{REMINDER_PREFIX}:schedule"
REMINDER_TTL_SECONDS = int(os.getenv("DAARION_REMINDER_TTL_SECONDS", str(30 * 24 * 3600)))
_redis: Optional[Redis] = None
@dataclass
class Reminder:
reminder_id: str
agent_id: str
chat_id: str
user_id: str
text: str
due_ts: int
created_at: str
def to_dict(self) -> Dict[str, Any]:
return {
"reminder_id": self.reminder_id,
"agent_id": self.agent_id,
"chat_id": self.chat_id,
"user_id": self.user_id,
"text": self.text,
"due_ts": self.due_ts,
"created_at": self.created_at,
}
async def redis_client() -> Redis:
global _redis
if _redis is None:
_redis = Redis.from_url(REDIS_URL, decode_responses=True)
return _redis
async def close_redis() -> None:
global _redis
if _redis is not None:
await _redis.close()
_redis = None
def _iso_now() -> str:
return datetime.now(timezone.utc).isoformat()
def _iso_from_ts(ts: int) -> str:
return datetime.fromtimestamp(ts, tz=timezone.utc).isoformat()
async def create_reminder(agent_id: str, chat_id: str, user_id: str, text: str, due_ts: int) -> Dict[str, Any]:
reminder = Reminder(
reminder_id=f"rem_{uuid.uuid4().hex[:16]}",
agent_id=agent_id,
chat_id=str(chat_id),
user_id=str(user_id),
text=text.strip(),
due_ts=int(due_ts),
created_at=_iso_now(),
)
r = await redis_client()
key = f"{REMINDER_BY_ID}:{reminder.reminder_id}"
payload = json.dumps(reminder.to_dict(), ensure_ascii=False)
await r.set(key, payload, ex=REMINDER_TTL_SECONDS)
await r.zadd(REMINDER_SCHEDULE, {reminder.reminder_id: float(reminder.due_ts)})
result = reminder.to_dict()
result["due_at"] = _iso_from_ts(reminder.due_ts)
return result
async def list_reminders(agent_id: str, chat_id: str, user_id: str, limit: int = 10) -> List[Dict[str, Any]]:
r = await redis_client()
now_ts = int(time.time())
ids = await r.zrangebyscore(REMINDER_SCHEDULE, min=now_ts - 365 * 24 * 3600, max="+inf", start=0, num=max(1, limit * 5))
out: List[Dict[str, Any]] = []
for reminder_id in ids:
raw = await r.get(f"{REMINDER_BY_ID}:{reminder_id}")
if not raw:
continue
try:
item = json.loads(raw)
except json.JSONDecodeError:
continue
if item.get("agent_id") != agent_id:
continue
if str(item.get("chat_id")) != str(chat_id):
continue
if str(item.get("user_id")) != str(user_id):
continue
item["due_at"] = _iso_from_ts(int(item.get("due_ts", 0)))
out.append(item)
if len(out) >= limit:
break
return out
async def cancel_reminder(reminder_id: str, agent_id: str, chat_id: str, user_id: str) -> bool:
r = await redis_client()
key = f"{REMINDER_BY_ID}:{reminder_id}"
raw = await r.get(key)
if not raw:
return False
try:
item = json.loads(raw)
except json.JSONDecodeError:
return False
if item.get("agent_id") != agent_id or str(item.get("chat_id")) != str(chat_id) or str(item.get("user_id")) != str(user_id):
return False
await r.delete(key)
await r.zrem(REMINDER_SCHEDULE, reminder_id)
return True
async def pop_due_reminders(limit: int = 20) -> List[Dict[str, Any]]:
r = await redis_client()
now_ts = int(time.time())
ids = await r.zrangebyscore(REMINDER_SCHEDULE, min="-inf", max=now_ts, start=0, num=max(1, limit))
out: List[Dict[str, Any]] = []
for reminder_id in ids:
removed = await r.zrem(REMINDER_SCHEDULE, reminder_id)
if removed == 0:
continue
raw = await r.get(f"{REMINDER_BY_ID}:{reminder_id}")
if not raw:
continue
await r.delete(f"{REMINDER_BY_ID}:{reminder_id}")
try:
item = json.loads(raw)
item["due_at"] = _iso_from_ts(int(item.get("due_ts", now_ts)))
out.append(item)
except json.JSONDecodeError:
continue
return out

View File

@@ -0,0 +1,107 @@
import asyncio
from datetime import datetime, timezone
import logging
import os
from typing import Any, Dict
import httpx
from .redis_jobs import close_redis, dequeue_job, get_job, update_job, wait_for_redis
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
logger = logging.getLogger("daarion-gateway-worker")
ROUTER_BASE_URL = os.getenv("ROUTER_BASE_URL", os.getenv("ROUTER_URL", "http://router:8000"))
ROUTER_TIMEOUT_SECONDS = float(os.getenv("ROUTER_WORKER_TIMEOUT", "60"))
def _now() -> str:
return datetime.now(timezone.utc).isoformat()
async def _call_router(agent_id: str, input_payload: Dict[str, Any], metadata: Dict[str, Any]) -> Dict[str, Any]:
body: Dict[str, Any] = {
"prompt": input_payload.get("prompt", ""),
"metadata": metadata or {},
}
images = input_payload.get("images") or []
if images:
body["images"] = images
url = f"{ROUTER_BASE_URL}/v1/agents/{agent_id}/infer"
async with httpx.AsyncClient(timeout=ROUTER_TIMEOUT_SECONDS) as client:
resp = await client.post(url, json=body)
resp.raise_for_status()
data = resp.json()
return {
"response": data.get("response", ""),
"model": data.get("model"),
"backend": data.get("backend"),
"tokens_used": data.get("tokens_used"),
}
async def run_once(job_id: str) -> None:
job = await get_job(job_id)
if not job:
logger.warning("job_missing: %s", job_id)
return
await update_job(job_id, {"status": "running", "started_at": _now(), "updated_at": _now()})
agent_id = job.get("agent_id")
input_payload = job.get("input") or {}
metadata = job.get("metadata") or {}
try:
result = await _call_router(agent_id, input_payload, metadata)
await update_job(
job_id,
{
"status": "done",
"result": result,
"error": None,
"finished_at": _now(),
"updated_at": _now(),
},
)
logger.info("job_done: %s agent=%s", job_id, agent_id)
except Exception as e:
await update_job(
job_id,
{
"status": "failed",
"error": {"type": e.__class__.__name__, "message": str(e)},
"finished_at": _now(),
"updated_at": _now(),
},
)
logger.exception("job_failed: %s agent=%s", job_id, agent_id)
async def worker_loop() -> None:
await wait_for_redis(60)
logger.info("worker_started router=%s", ROUTER_BASE_URL)
while True:
try:
job_id = await dequeue_job(block_seconds=10)
if not job_id:
continue
await run_once(job_id)
except asyncio.CancelledError:
raise
except Exception:
logger.exception("worker_loop_error")
await asyncio.sleep(1)
if __name__ == "__main__":
try:
asyncio.run(worker_loop())
finally:
try:
asyncio.run(close_redis())
except Exception:
pass

View File

@@ -0,0 +1,92 @@
{
"districts": [
{
"district_id": "city-core",
"title": "City Core - DAARION.city",
"domain": "daarion.city",
"status": "active",
"logo_url": null,
"lead_agent_id": "daarwizz"
},
{
"district_id": "helion",
"title": "Helion District",
"domain": "helion.daarion.city",
"status": "active",
"logo_url": null,
"lead_agent_id": "helion"
},
{
"district_id": "alateya",
"title": "Alateya District",
"domain": "alateya.daarion.city",
"status": "active",
"logo_url": null,
"lead_agent_id": "alateya"
},
{
"district_id": "druid",
"title": "Druid District",
"domain": "druid.daarion.city",
"status": "active",
"logo_url": null,
"lead_agent_id": "druid"
},
{
"district_id": "nutra",
"title": "Nutra District",
"domain": "nutra.daarion.city",
"status": "active",
"logo_url": null,
"lead_agent_id": "nutra"
},
{
"district_id": "agromatrix",
"title": "AgroMatrix District",
"domain": "agromatrix.daarion.city",
"status": "active",
"logo_url": null,
"lead_agent_id": "agromatrix"
},
{
"district_id": "greenfood",
"title": "GreenFood District",
"domain": "greenfood.daarion.city",
"status": "active",
"logo_url": null,
"lead_agent_id": "greenfood"
},
{
"district_id": "clan",
"title": "Clan District",
"domain": "clan.daarion.city",
"status": "active",
"logo_url": null,
"lead_agent_id": "clan"
},
{
"district_id": "eonarch",
"title": "Eonarch District",
"domain": "eonarch.daarion.city",
"status": "active",
"logo_url": null,
"lead_agent_id": "eonarch"
},
{
"district_id": "soul",
"title": "Soul District",
"domain": "soul.daarion.city",
"status": "active",
"logo_url": null,
"lead_agent_id": "soul"
},
{
"district_id": "senpai",
"title": "Senpai District",
"domain": "senpai.daarion.city",
"status": "active",
"logo_url": null,
"lead_agent_id": "senpai"
}
]
}

View File

@@ -1871,23 +1871,53 @@ async def process_document(
Dict з результатом обробки
"""
mime_type = document.get("mime_type", "")
mime_type_l = (mime_type or "").lower()
file_name = document.get("file_name", "")
file_id = document.get("file_id")
file_name_lower = file_name.lower()
allowed_exts = {".pdf", ".docx", ".txt", ".md", ".csv", ".xlsx", ".zip"}
allowed_exts = {
".pdf", ".doc", ".docx", ".rtf", ".odt",
".txt", ".md", ".markdown",
".csv", ".tsv", ".xls", ".xlsx", ".xlsm", ".ods",
".ppt", ".pptx", ".odp",
".json", ".yaml", ".yml", ".xml", ".html", ".htm",
".zip",
".jpg", ".jpeg", ".png", ".webp", ".gif", ".bmp", ".tiff",
}
is_allowed = any(file_name_lower.endswith(ext) for ext in allowed_exts)
if mime_type == "application/pdf":
if mime_type_l == "application/pdf":
is_allowed = True
if mime_type in {
if mime_type_l in {
"application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/rtf",
"text/rtf",
"application/vnd.oasis.opendocument.text",
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.ms-excel.sheet.macroenabled.12",
"application/vnd.oasis.opendocument.spreadsheet",
"application/vnd.ms-powerpoint",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/vnd.oasis.opendocument.presentation",
"text/plain",
"text/markdown",
"text/csv",
"text/tab-separated-values",
"application/json",
"application/yaml",
"application/x-yaml",
"text/yaml",
"application/xml",
"text/xml",
"text/html",
"application/zip",
"application/x-zip-compressed",
}:
is_allowed = True
if mime_type_l.startswith("image/"):
is_allowed = True
if is_allowed and file_id:
logger.info(f"{agent_config.name}: Document from {username} (tg:{user_id}), file_id: {file_id}, file_name: {file_name}")
@@ -2027,7 +2057,7 @@ async def process_document(
telegram_token = agent_config.get_telegram_token()
await send_telegram_message(
chat_id,
"Наразі підтримуються формати: PDF, DOCX, TXT, MD, CSV, XLSX, ZIP.",
"Підтримуються формати: PDF/DOC/DOCX/RTF/ODT, TXT/MD/CSV/TSV, XLS/XLSX/XLSM/ODS, PPT/PPTX/ODP, JSON/YAML/XML/HTML, ZIP, зображення.",
telegram_token,
)
return {"ok": False, "error": "Unsupported document type"}
@@ -3681,7 +3711,8 @@ async def _old_telegram_webhook(update: TelegramUpdate):
doc_url=file_url,
file_name=file_name,
dao_id=dao_id,
user_id=f"tg:{user_id}"
user_id=f"tg:{user_id}",
agent_id=agent_config.agent_id,
)
if result.success:
@@ -3705,7 +3736,8 @@ async def _old_telegram_webhook(update: TelegramUpdate):
result = await ingest_document(
session_id=session_id,
dao_id=dao_id,
user_id=f"tg:{user_id}"
user_id=f"tg:{user_id}",
agent_id=agent_config.agent_id,
)
if result.success:

View File

@@ -6,20 +6,32 @@ Endpoints:
- POST /api/doc/parse - Parse a document
- POST /api/doc/ingest - Ingest document to RAG
- POST /api/doc/ask - Ask question about document
- POST /api/doc/update - Update existing document text (versioned)
- POST /api/doc/publish - Publish physical file version via artifact registry
- GET /api/doc/versions/{doc_id} - List document versions
- GET /api/doc/artifacts/{artifact_id}/versions/{version_id}/download - Download via gateway proxy
"""
import logging
import os
import re
from typing import Optional, Dict, Any
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
from fastapi.responses import Response
from pydantic import BaseModel
import httpx
from services.doc_service import (
doc_service,
parse_document,
ingest_document,
ask_about_document,
update_document,
list_document_versions,
publish_document_artifact,
get_doc_context,
ParsedResult,
IngestResult,
UpdateResult,
QAResult,
DocContext
)
@@ -27,6 +39,8 @@ from services.doc_service import (
logger = logging.getLogger(__name__)
router = APIRouter()
ARTIFACT_REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/")
DOC_DOWNLOAD_TIMEOUT_SECONDS = float(os.getenv("DOC_DOWNLOAD_TIMEOUT_SECONDS", "60"))
# ========================================
@@ -52,6 +66,7 @@ class IngestDocumentRequest(BaseModel):
file_name: Optional[str] = None
dao_id: Optional[str] = None
user_id: Optional[str] = None
agent_id: str = "daarwizz"
class AskDocumentRequest(BaseModel):
@@ -61,6 +76,40 @@ class AskDocumentRequest(BaseModel):
doc_id: Optional[str] = None
dao_id: Optional[str] = None
user_id: Optional[str] = None
agent_id: str = "daarwizz"
class UpdateDocumentRequest(BaseModel):
"""Request to update existing document content."""
session_id: str
doc_id: Optional[str] = None
doc_url: Optional[str] = None
file_name: Optional[str] = None
text: Optional[str] = None
dao_id: Optional[str] = None
user_id: Optional[str] = None
agent_id: str = "daarwizz"
storage_ref: Optional[str] = None
publish_artifact: bool = False
artifact_id: Optional[str] = None
target_format: Optional[str] = None
artifact_label: Optional[str] = None
metadata: Optional[Dict[str, Any]] = None
class PublishDocumentRequest(BaseModel):
"""Request to publish document as physical artifact version."""
session_id: str
doc_id: Optional[str] = None
doc_url: Optional[str] = None
file_name: Optional[str] = None
text: Optional[str] = None
dao_id: Optional[str] = None
user_id: Optional[str] = None
artifact_id: Optional[str] = None
target_format: Optional[str] = None
artifact_label: Optional[str] = None
metadata: Optional[Dict[str, Any]] = None
# ========================================
@@ -167,7 +216,8 @@ async def ingest_document_endpoint(request: IngestDocumentRequest):
doc_url=request.doc_url,
file_name=request.file_name,
dao_id=request.dao_id,
user_id=request.user_id
user_id=request.user_id,
agent_id=request.agent_id,
)
if not result.success:
@@ -209,7 +259,8 @@ async def ask_about_document_endpoint(request: AskDocumentRequest):
question=request.question,
doc_id=doc_id,
dao_id=request.dao_id,
user_id=request.user_id
user_id=request.user_id,
agent_id=request.agent_id,
)
if not result.success:
@@ -227,6 +278,107 @@ async def ask_about_document_endpoint(request: AskDocumentRequest):
raise HTTPException(status_code=500, detail=str(e))
@router.post("/api/doc/update")
async def update_document_endpoint(request: UpdateDocumentRequest):
"""
Update a document and bump its version.
If text is omitted and doc_url exists, text is re-parsed from the source document.
"""
try:
result = await update_document(
session_id=request.session_id,
doc_id=request.doc_id,
doc_url=request.doc_url,
file_name=request.file_name,
text=request.text,
dao_id=request.dao_id,
user_id=request.user_id,
agent_id=request.agent_id,
storage_ref=request.storage_ref,
publish_artifact=request.publish_artifact,
artifact_id=request.artifact_id,
target_format=request.target_format,
artifact_label=request.artifact_label,
metadata=request.metadata,
)
if not result.success:
raise HTTPException(status_code=400, detail=result.error)
response = {
"ok": True,
"doc_id": result.doc_id,
"version_no": result.version_no,
"version_id": result.version_id,
"updated_chunks": result.updated_chunks,
"status": result.status,
"publish_error": result.publish_error,
"artifact_id": result.artifact_id,
"artifact_version_id": result.artifact_version_id,
"artifact_storage_key": result.artifact_storage_key,
"artifact_mime": result.artifact_mime,
"artifact_download_url": result.artifact_download_url,
}
return response
except HTTPException:
raise
except Exception as e:
logger.error(f"Update document error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/api/doc/publish")
async def publish_document_endpoint(request: PublishDocumentRequest):
"""
Publish current document text as physical file artifact version.
"""
try:
result = await publish_document_artifact(
session_id=request.session_id,
doc_id=request.doc_id,
doc_url=request.doc_url,
file_name=request.file_name,
text=request.text,
dao_id=request.dao_id,
user_id=request.user_id,
artifact_id=request.artifact_id,
target_format=request.target_format,
artifact_label=request.artifact_label,
metadata=request.metadata,
)
if not result.success:
raise HTTPException(status_code=400, detail=result.error)
return {
"ok": True,
"artifact_id": result.artifact_id,
"version_id": result.version_id,
"storage_key": result.storage_key,
"mime": result.mime,
"file_name": result.file_name,
"download_url": result.download_url,
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Publish document error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/api/doc/versions/{doc_id}")
async def list_document_versions_endpoint(doc_id: str, agent_id: str = "daarwizz", limit: int = 20):
"""
List document versions for agent/doc pair.
"""
try:
data = await list_document_versions(agent_id=agent_id, doc_id=doc_id, limit=limit)
if not data.get("ok"):
raise HTTPException(status_code=400, detail=data.get("error", "Failed to load versions"))
return data
except HTTPException:
raise
except Exception as e:
logger.error(f"List document versions error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/api/doc/context/{session_id}")
async def get_document_context(session_id: str):
"""
@@ -258,3 +410,56 @@ async def get_document_context(session_id: str):
logger.error(f"Get document context error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/api/doc/artifacts/{artifact_id}/versions/{version_id}/download")
async def download_artifact_version_via_gateway(
artifact_id: str,
version_id: str,
filename: Optional[str] = None,
inline: bool = False,
):
"""
Proxy download for artifact version to avoid exposing internal MinIO host to browser clients.
"""
aid = (artifact_id or "").strip()
vid = (version_id or "").strip()
if not aid or not vid:
raise HTTPException(status_code=400, detail="artifact_id and version_id are required")
try:
async with httpx.AsyncClient(timeout=DOC_DOWNLOAD_TIMEOUT_SECONDS) as client:
meta_resp = await client.get(
f"{ARTIFACT_REGISTRY_URL}/artifacts/{aid}/versions/{vid}/download"
)
if meta_resp.status_code >= 400:
detail = ""
try:
detail = meta_resp.json().get("detail") # type: ignore[assignment]
except Exception:
detail = meta_resp.text[:200]
raise HTTPException(status_code=meta_resp.status_code, detail=detail or "Version download info failed")
meta = meta_resp.json()
signed_url = (meta.get("url") or "").strip()
if not signed_url:
raise HTTPException(status_code=502, detail="artifact-registry returned empty download URL")
file_resp = await client.get(signed_url)
if file_resp.status_code >= 400:
raise HTTPException(status_code=502, detail=f"Artifact storage download failed: {file_resp.status_code}")
mime = (meta.get("mime") or file_resp.headers.get("content-type") or "application/octet-stream").strip()
storage_key = str(meta.get("storage_key") or "")
inferred_name = storage_key.rsplit("/", 1)[-1] if "/" in storage_key else storage_key
out_name = (filename or inferred_name or f"{aid}_{vid}.bin").strip()
out_name = re.sub(r"[^A-Za-z0-9._-]+", "_", out_name).strip("._") or f"{aid}_{vid}.bin"
disposition = "inline" if inline else "attachment"
headers = {
"Content-Disposition": f'{disposition}; filename="{out_name}"',
"Cache-Control": "private, max-age=60",
}
return Response(content=file_resp.content, media_type=mime, headers=headers)
except HTTPException:
raise
except Exception as e:
logger.error(f"Artifact version proxy download failed: aid={aid}, vid={vid}, err={e}", exc_info=True)
raise HTTPException(status_code=500, detail="Artifact proxy download failed")

View File

@@ -143,6 +143,10 @@ class MemoryClient:
"body_text": e.get("content", ""),
"kind": e.get("kind", "message"),
"type": "user" if e.get("role") == "user" else "agent",
"role": e.get("role", "unknown"),
"timestamp": e.get("timestamp"),
"user_id": e.get("user_id"),
"sender_name": e.get("sender_name"),
}
for e in events
if e.get("content")
@@ -445,4 +449,3 @@ class MemoryClient:
# Глобальний екземпляр клієнта
memory_client = MemoryClient()

View File

@@ -11,18 +11,23 @@ This service can be used by:
import os
import logging
import hashlib
import base64
import json
import re
from typing import Optional, Dict, Any, List
from pydantic import BaseModel
from datetime import datetime
from io import BytesIO
from router_client import send_to_router
from memory_client import memory_client
logger = logging.getLogger(__name__)
SHARED_EXCEL_POLICY_AGENTS = {"agromatrix", "helion", "nutra", "greenfood"}
ROUTER_URL = os.getenv("ROUTER_URL", "http://router:8000")
ARTIFACT_REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/")
DOC_WRITEBACK_CREATED_BY = os.getenv("DOC_WRITEBACK_CREATED_BY", "gateway-doc-service")
GATEWAY_PUBLIC_BASE_URL = os.getenv("GATEWAY_PUBLIC_BASE_URL", "").rstrip("/")
class QAItem(BaseModel):
@@ -51,6 +56,35 @@ class IngestResult(BaseModel):
error: Optional[str] = None
class UpdateResult(BaseModel):
"""Result of document update with version bump."""
success: bool
doc_id: Optional[str] = None
version_no: Optional[int] = None
version_id: Optional[int] = None
updated_chunks: int = 0
status: str = "unknown"
publish_error: Optional[str] = None
artifact_id: Optional[str] = None
artifact_version_id: Optional[str] = None
artifact_storage_key: Optional[str] = None
artifact_mime: Optional[str] = None
artifact_download_url: Optional[str] = None
error: Optional[str] = None
class PublishResult(BaseModel):
"""Result of artifact write-back publish."""
success: bool
artifact_id: Optional[str] = None
version_id: Optional[str] = None
storage_key: Optional[str] = None
mime: Optional[str] = None
file_name: Optional[str] = None
download_url: Optional[str] = None
error: Optional[str] = None
class QAResult(BaseModel):
"""Result of RAG query about a document"""
success: bool
@@ -84,6 +118,266 @@ class DocumentService:
"""Initialize document service"""
self.memory_client = memory_client
async def _router_post_json(
self,
path: str,
payload: Dict[str, Any],
timeout: float = 45.0,
) -> Dict[str, Any]:
import httpx
base = ROUTER_URL.rstrip("/")
url = f"{base}{path}"
async with httpx.AsyncClient(timeout=timeout) as client:
resp = await client.post(url, json=payload)
body = {}
try:
body = resp.json()
except Exception:
body = {"ok": False, "error": f"Invalid JSON from router ({resp.status_code})"}
if resp.status_code >= 400:
err = body.get("detail") or body.get("error") or f"HTTP {resp.status_code}"
raise RuntimeError(f"Router error on {path}: {err}")
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid router response type"}
async def _router_get_json(
self,
path: str,
timeout: float = 30.0,
) -> Dict[str, Any]:
import httpx
base = ROUTER_URL.rstrip("/")
url = f"{base}{path}"
async with httpx.AsyncClient(timeout=timeout) as client:
resp = await client.get(url)
body = {}
try:
body = resp.json()
except Exception:
body = {"ok": False, "error": f"Invalid JSON from router ({resp.status_code})"}
if resp.status_code >= 400:
err = body.get("detail") or body.get("error") or f"HTTP {resp.status_code}"
raise RuntimeError(f"Router error on {path}: {err}")
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid router response type"}
async def _artifact_post_json(
self,
path: str,
payload: Dict[str, Any],
timeout: float = 45.0,
) -> Dict[str, Any]:
import httpx
base = ARTIFACT_REGISTRY_URL.rstrip("/")
url = f"{base}{path}"
async with httpx.AsyncClient(timeout=timeout) as client:
resp = await client.post(url, json=payload)
body = {}
try:
body = resp.json()
except Exception:
body = {"ok": False, "error": f"Invalid JSON from artifact-registry ({resp.status_code})"}
if resp.status_code >= 400:
err = body.get("detail") or body.get("error") or f"HTTP {resp.status_code}"
raise RuntimeError(f"Artifact registry error on {path}: {err}")
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid artifact response type"}
async def _artifact_get_json(
self,
path: str,
timeout: float = 30.0,
) -> Dict[str, Any]:
import httpx
base = ARTIFACT_REGISTRY_URL.rstrip("/")
url = f"{base}{path}"
async with httpx.AsyncClient(timeout=timeout) as client:
resp = await client.get(url)
body = {}
try:
body = resp.json()
except Exception:
body = {"ok": False, "error": f"Invalid JSON from artifact-registry ({resp.status_code})"}
if resp.status_code >= 400:
err = body.get("detail") or body.get("error") or f"HTTP {resp.status_code}"
raise RuntimeError(f"Artifact registry error on {path}: {err}")
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid artifact response type"}
def _resolve_format(self, file_name: Optional[str], target_format: Optional[str]) -> str:
fmt = (target_format or "").strip().lower().lstrip(".")
if fmt:
return fmt
if file_name and "." in file_name:
return file_name.rsplit(".", 1)[1].strip().lower()
return "txt"
def _compose_output_name(self, file_name: Optional[str], doc_id: str, fmt: str) -> str:
base = "document"
if file_name:
base = file_name.rsplit("/", 1)[-1].rsplit("\\", 1)[-1]
if "." in base:
base = base.rsplit(".", 1)[0]
elif doc_id:
base = doc_id
safe_base = re.sub(r"[^A-Za-z0-9._-]+", "_", base).strip("._") or "document"
return f"{safe_base}.{fmt}"
def _gateway_artifact_download_path(self, artifact_id: str, version_id: str) -> str:
aid = (artifact_id or "").strip()
vid = (version_id or "").strip()
return f"/api/doc/artifacts/{aid}/versions/{vid}/download"
def _gateway_artifact_download_url(self, artifact_id: str, version_id: str) -> str:
path = self._gateway_artifact_download_path(artifact_id, version_id)
if GATEWAY_PUBLIC_BASE_URL:
return f"{GATEWAY_PUBLIC_BASE_URL}{path}"
return path
def _render_document_bytes(
self,
text: str,
file_name: Optional[str],
doc_id: str,
target_format: Optional[str] = None,
) -> Dict[str, Any]:
body = (text or "").strip()
if not body:
raise ValueError("Cannot render empty document text")
fmt = self._resolve_format(file_name=file_name, target_format=target_format)
output_name = self._compose_output_name(file_name=file_name, doc_id=doc_id, fmt=fmt)
if fmt in {"txt"}:
payload = body.encode("utf-8")
return {"bytes": payload, "mime": "text/plain; charset=utf-8", "file_name": output_name}
if fmt in {"md", "markdown"}:
payload = body.encode("utf-8")
return {"bytes": payload, "mime": "text/markdown; charset=utf-8", "file_name": output_name}
if fmt in {"json"}:
parsed: Any
try:
parsed = json.loads(body)
except Exception:
parsed = {"text": body}
payload = json.dumps(parsed, ensure_ascii=False, indent=2).encode("utf-8")
return {"bytes": payload, "mime": "application/json", "file_name": output_name}
if fmt in {"csv"}:
payload = body.encode("utf-8")
return {"bytes": payload, "mime": "text/csv; charset=utf-8", "file_name": output_name}
if fmt in {"xlsx", "xlsm", "xls"}:
try:
from openpyxl import Workbook
except Exception as e:
raise RuntimeError(f"openpyxl is required for {fmt} rendering: {e}")
wb = Workbook()
ws = wb.active
ws.title = "Document"
lines = [ln for ln in body.splitlines()] or [body]
for idx, line in enumerate(lines, start=1):
ws.cell(row=idx, column=1, value=line)
buf = BytesIO()
wb.save(buf)
mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
return {"bytes": buf.getvalue(), "mime": mime, "file_name": self._compose_output_name(file_name, doc_id, "xlsx")}
if fmt in {"docx"}:
try:
from docx import Document
except Exception as e:
raise RuntimeError(f"python-docx is required for docx rendering: {e}")
doc = Document()
for line in body.splitlines():
doc.add_paragraph(line if line else " ")
buf = BytesIO()
doc.save(buf)
mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
return {"bytes": buf.getvalue(), "mime": mime, "file_name": self._compose_output_name(file_name, doc_id, "docx")}
payload = body.encode("utf-8")
fallback_name = self._compose_output_name(file_name=file_name, doc_id=doc_id, fmt="txt")
return {"bytes": payload, "mime": "text/plain; charset=utf-8", "file_name": fallback_name}
async def _publish_text_artifact(
self,
text: str,
doc_id: str,
file_name: Optional[str] = None,
dao_id: Optional[str] = None,
user_id: Optional[str] = None,
artifact_id: Optional[str] = None,
target_format: Optional[str] = None,
label: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> PublishResult:
try:
rendered = self._render_document_bytes(
text=text,
file_name=file_name,
doc_id=doc_id,
target_format=target_format,
)
content_bytes = rendered["bytes"]
content_b64 = base64.b64encode(content_bytes).decode("ascii")
effective_artifact_id = (artifact_id or "").strip()
if not effective_artifact_id:
create_resp = await self._artifact_post_json(
"/artifacts",
{
"type": "doc",
"title": file_name or doc_id,
"project_id": dao_id,
"acl_ref": dao_id,
"created_by": user_id or DOC_WRITEBACK_CREATED_BY,
},
timeout=30.0,
)
effective_artifact_id = str(create_resp.get("artifact_id") or "").strip()
if not effective_artifact_id:
return PublishResult(success=False, error="Artifact create failed: empty artifact_id")
meta = {"doc_id": doc_id, "source": "doc_update_publish"}
if isinstance(metadata, dict):
meta.update(metadata)
version_resp = await self._artifact_post_json(
f"/artifacts/{effective_artifact_id}/versions/from_base64",
{
"content_base64": content_b64,
"mime": rendered["mime"],
"filename": rendered["file_name"],
"label": label or "edited",
"meta_json": meta,
},
timeout=45.0,
)
version_id = str(version_resp.get("version_id") or "").strip()
storage_key = version_resp.get("storage_key")
if not version_id:
return PublishResult(
success=False,
artifact_id=effective_artifact_id,
error="Artifact version create failed: empty version_id",
)
download_url = self._gateway_artifact_download_url(
artifact_id=effective_artifact_id,
version_id=version_id,
)
return PublishResult(
success=True,
artifact_id=effective_artifact_id,
version_id=version_id,
storage_key=storage_key,
mime=rendered["mime"],
file_name=rendered["file_name"],
download_url=download_url,
)
except Exception as e:
logger.error(f"publish_text_artifact failed: {e}", exc_info=True)
return PublishResult(success=False, error=str(e))
def _is_excel_filename(self, file_name: Optional[str]) -> bool:
if not file_name:
return False
@@ -462,7 +756,8 @@ class DocumentService:
doc_url: Optional[str] = None,
file_name: Optional[str] = None,
dao_id: str = None,
user_id: str = None
user_id: str = None,
agent_id: str = "daarwizz",
) -> IngestResult:
"""
Ingest document chunks into RAG/Memory.
@@ -488,64 +783,60 @@ class DocumentService:
file_name = file_name or doc_context.file_name
dao_id = dao_id or doc_context.dao_id
if not doc_id and not doc_url:
if not doc_url:
return IngestResult(
success=False,
error="No document ID or URL provided"
error="No document URL available for ingest"
)
# Build request to Router with ingest flag
router_request = {
"mode": "doc_parse",
"agent": "parser",
parsed = await self.parse_document(
session_id=session_id,
doc_url=doc_url,
file_name=file_name or "document",
dao_id=dao_id or "",
user_id=user_id or "",
output_mode="markdown",
metadata={"source": self._extract_source(session_id), "mode": "ingest"},
)
if not parsed.success:
return IngestResult(success=False, error=parsed.error or "Document parse failed")
effective_doc_id = doc_id or parsed.doc_id
if not effective_doc_id:
effective_doc_id = hashlib.md5(f"{session_id}:{file_name}:{datetime.utcnow().isoformat()}".encode()).hexdigest()[:12]
doc_text = (parsed.markdown or "").strip()
if not doc_text:
return IngestResult(success=False, error="No extractable text for ingestion")
payload = {
"agent_id": (agent_id or "daarwizz").lower(),
"doc_id": effective_doc_id,
"file_name": file_name or "document",
"text": doc_text,
"dao_id": dao_id,
"user_id": user_id,
"metadata": {
"source": self._extract_source(session_id),
"dao_id": dao_id,
"user_id": user_id,
"session_id": session_id,
},
"payload": {
"output_mode": "chunks", # Use chunks for RAG ingestion
"dao_id": dao_id,
"user_id": user_id,
"ingest": True, # Flag for ingestion
"source": self._extract_source(session_id),
},
}
if doc_url:
router_request["payload"]["doc_url"] = doc_url
router_request["payload"]["file_name"] = file_name or "document.pdf"
if doc_id:
router_request["payload"]["doc_id"] = doc_id
logger.info(f"Ingesting document: session={session_id}, doc_id={doc_id}")
# Send to Router
response = await send_to_router(router_request)
if not isinstance(response, dict):
return IngestResult(
success=False,
error="Invalid response from router"
)
data = response.get("data", {})
chunks = data.get("chunks", [])
if chunks:
response = await self._router_post_json("/v1/documents/ingest", payload, timeout=90.0)
if response.get("ok"):
return IngestResult(
success=True,
doc_id=doc_id or data.get("doc_id"),
ingested_chunks=len(chunks),
status="ingested"
)
else:
return IngestResult(
success=False,
status="failed",
error="No chunks to ingest"
doc_id=response.get("doc_id") or effective_doc_id,
ingested_chunks=int(response.get("chunks_stored", 0) or 0),
status="ingested",
)
return IngestResult(
success=False,
doc_id=effective_doc_id,
status="failed",
error=response.get("error", "Router ingest failed"),
)
except Exception as e:
logger.error(f"Document ingestion failed: {e}", exc_info=True)
@@ -553,6 +844,245 @@ class DocumentService:
success=False,
error=str(e)
)
async def update_document(
self,
session_id: str,
doc_id: Optional[str] = None,
doc_url: Optional[str] = None,
file_name: Optional[str] = None,
text: Optional[str] = None,
dao_id: Optional[str] = None,
user_id: Optional[str] = None,
agent_id: str = "daarwizz",
storage_ref: Optional[str] = None,
publish_artifact: bool = False,
artifact_id: Optional[str] = None,
target_format: Optional[str] = None,
artifact_label: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> UpdateResult:
"""
Update existing document content and bump version in router memory.
"""
try:
context = await self.get_doc_context(session_id)
if context:
if not doc_id:
doc_id = context.doc_id
if not doc_url:
doc_url = context.doc_url
if not file_name:
file_name = context.file_name
if not dao_id:
dao_id = context.dao_id
if not doc_id:
return UpdateResult(
success=False,
status="failed",
error="No document context found. Provide doc_id or parse/ingest first.",
)
effective_text = (text or "").strip()
if not effective_text:
if not doc_url:
return UpdateResult(
success=False,
doc_id=doc_id,
status="failed",
error="No text or doc_url provided for update",
)
parsed = await self.parse_document(
session_id=session_id,
doc_url=doc_url,
file_name=file_name or "document",
dao_id=dao_id or "",
user_id=user_id or "",
output_mode="markdown",
metadata={"source": self._extract_source(session_id), "mode": "update"},
)
if not parsed.success:
return UpdateResult(
success=False,
doc_id=doc_id,
status="failed",
error=parsed.error or "Document parse failed",
)
effective_text = (parsed.markdown or "").strip()
if not effective_text:
return UpdateResult(
success=False,
doc_id=doc_id,
status="failed",
error="No extractable text for update",
)
meta = {
"session_id": session_id,
"source": self._extract_source(session_id),
}
if isinstance(metadata, dict):
meta.update(metadata)
response = await self._router_post_json(
"/v1/documents/update",
{
"agent_id": (agent_id or "daarwizz").lower(),
"doc_id": doc_id,
"file_name": file_name,
"text": effective_text,
"dao_id": dao_id,
"user_id": user_id,
"storage_ref": storage_ref,
"metadata": meta,
},
timeout=90.0,
)
if not response.get("ok"):
return UpdateResult(
success=False,
doc_id=doc_id,
status="failed",
error=response.get("error", "Router update failed"),
)
await self.save_doc_context(
session_id=session_id,
doc_id=doc_id,
doc_url=doc_url,
file_name=file_name,
dao_id=dao_id,
user_id=user_id,
)
publish = PublishResult(success=False)
if publish_artifact:
publish = await self._publish_text_artifact(
text=effective_text,
doc_id=doc_id,
file_name=file_name,
dao_id=dao_id,
user_id=user_id,
artifact_id=artifact_id,
target_format=target_format,
label=artifact_label,
metadata=meta,
)
return UpdateResult(
success=True,
doc_id=response.get("doc_id") or doc_id,
version_no=int(response.get("version_no", 0) or 0) or None,
version_id=int(response.get("version_id", 0) or 0) or None,
updated_chunks=int(response.get("chunks_stored", 0) or 0),
status="updated_published" if publish_artifact and publish.success else ("updated_publish_failed" if publish_artifact else "updated"),
publish_error=publish.error if publish_artifact and not publish.success else None,
artifact_id=publish.artifact_id if publish_artifact else None,
artifact_version_id=publish.version_id if publish_artifact else None,
artifact_storage_key=publish.storage_key if publish_artifact else None,
artifact_mime=publish.mime if publish_artifact else None,
artifact_download_url=publish.download_url if publish_artifact else None,
)
except Exception as e:
logger.error(f"Document update failed: {e}", exc_info=True)
return UpdateResult(
success=False,
doc_id=doc_id,
status="failed",
error=str(e),
)
async def list_document_versions(
self,
agent_id: str,
doc_id: str,
limit: int = 20,
) -> Dict[str, Any]:
aid = (agent_id or "daarwizz").lower()
did = (doc_id or "").strip()
if not did:
return {"ok": False, "error": "doc_id is required", "items": []}
try:
response = await self._router_get_json(
f"/v1/documents/{did}/versions?agent_id={aid}&limit={max(1, min(int(limit or 20), 200))}",
timeout=30.0,
)
return response if isinstance(response, dict) else {"ok": False, "error": "invalid_response", "items": []}
except Exception as e:
logger.error(f"list_document_versions failed: {e}")
return {"ok": False, "error": str(e), "items": []}
async def publish_document_artifact(
self,
session_id: str,
doc_id: Optional[str] = None,
doc_url: Optional[str] = None,
file_name: Optional[str] = None,
text: Optional[str] = None,
dao_id: Optional[str] = None,
user_id: Optional[str] = None,
artifact_id: Optional[str] = None,
target_format: Optional[str] = None,
artifact_label: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> PublishResult:
"""
Publish text as a physical artifact version (.docx/.xlsx/.txt/...) without changing RAG index.
"""
try:
context = await self.get_doc_context(session_id)
if context:
if not doc_id:
doc_id = context.doc_id
if not doc_url:
doc_url = context.doc_url
if not file_name:
file_name = context.file_name
if not dao_id:
dao_id = context.dao_id
if not user_id:
user_id = context.user_id
if not doc_id:
return PublishResult(success=False, error="doc_id is required")
body = (text or "").strip()
if not body:
if not doc_url:
return PublishResult(success=False, error="text or doc_url is required")
parsed = await self.parse_document(
session_id=session_id,
doc_url=doc_url,
file_name=file_name or "document",
dao_id=dao_id or "",
user_id=user_id or "",
output_mode="markdown",
metadata={"source": self._extract_source(session_id), "mode": "publish"},
)
if not parsed.success:
return PublishResult(success=False, error=parsed.error or "Document parse failed")
body = (parsed.markdown or "").strip()
if not body:
return PublishResult(success=False, error="No text available for publish")
return await self._publish_text_artifact(
text=body,
doc_id=doc_id,
file_name=file_name,
dao_id=dao_id,
user_id=user_id,
artifact_id=artifact_id,
target_format=target_format,
label=artifact_label,
metadata=metadata,
)
except Exception as e:
logger.error(f"publish_document_artifact failed: {e}", exc_info=True)
return PublishResult(success=False, error=str(e))
async def ask_about_document(
self,
@@ -625,38 +1155,30 @@ class DocumentService:
}],
)
# Build RAG query request
router_request = {
"mode": "rag_query",
"agent": agent_id,
"metadata": {
"source": self._extract_source(session_id),
"dao_id": dao_id,
"user_id": user_id,
"session_id": session_id,
},
"payload": {
"question": question,
"dao_id": dao_id,
"user_id": user_id,
"doc_id": doc_id,
},
}
logger.info(
f"RAG query: agent={agent_id}, session={session_id}, question={question[:50]}, doc_id={doc_id}"
)
# Send to Router
response = await send_to_router(router_request)
if not isinstance(response, dict):
response = await self._router_post_json(
"/v1/documents/query",
{
"agent_id": (agent_id or "daarwizz").lower(),
"question": question,
"doc_id": doc_id,
"dao_id": dao_id,
"user_id": user_id,
"limit": 5,
},
timeout=60.0,
)
if isinstance(response, dict) and not response.get("ok", False):
return QAResult(
success=False,
error="Invalid response from router"
error=response.get("error", "Document query failed"),
)
data = response.get("data", {})
data = response.get("data", {}) if isinstance(response, dict) else {}
answer = data.get("answer") or data.get("text")
sources = data.get("citations", []) or data.get("sources", [])
@@ -717,7 +1239,8 @@ async def ingest_document(
doc_url: Optional[str] = None,
file_name: Optional[str] = None,
dao_id: Optional[str] = None,
user_id: Optional[str] = None
user_id: Optional[str] = None,
agent_id: str = "daarwizz",
) -> IngestResult:
"""Ingest document chunks into RAG/Memory"""
return await doc_service.ingest_document(
@@ -726,7 +1249,8 @@ async def ingest_document(
doc_url=doc_url,
file_name=file_name,
dao_id=dao_id,
user_id=user_id
user_id=user_id,
agent_id=agent_id,
)
@@ -749,6 +1273,79 @@ async def ask_about_document(
)
async def update_document(
session_id: str,
doc_id: Optional[str] = None,
doc_url: Optional[str] = None,
file_name: Optional[str] = None,
text: Optional[str] = None,
dao_id: Optional[str] = None,
user_id: Optional[str] = None,
agent_id: str = "daarwizz",
storage_ref: Optional[str] = None,
publish_artifact: bool = False,
artifact_id: Optional[str] = None,
target_format: Optional[str] = None,
artifact_label: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> UpdateResult:
"""Update document chunks and bump version."""
return await doc_service.update_document(
session_id=session_id,
doc_id=doc_id,
doc_url=doc_url,
file_name=file_name,
text=text,
dao_id=dao_id,
user_id=user_id,
agent_id=agent_id,
storage_ref=storage_ref,
publish_artifact=publish_artifact,
artifact_id=artifact_id,
target_format=target_format,
artifact_label=artifact_label,
metadata=metadata,
)
async def list_document_versions(agent_id: str, doc_id: str, limit: int = 20) -> Dict[str, Any]:
"""List document versions from router."""
return await doc_service.list_document_versions(
agent_id=agent_id,
doc_id=doc_id,
limit=limit,
)
async def publish_document_artifact(
session_id: str,
doc_id: Optional[str] = None,
doc_url: Optional[str] = None,
file_name: Optional[str] = None,
text: Optional[str] = None,
dao_id: Optional[str] = None,
user_id: Optional[str] = None,
artifact_id: Optional[str] = None,
target_format: Optional[str] = None,
artifact_label: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> PublishResult:
"""Publish physical artifact version for document text."""
return await doc_service.publish_document_artifact(
session_id=session_id,
doc_id=doc_id,
doc_url=doc_url,
file_name=file_name,
text=text,
dao_id=dao_id,
user_id=user_id,
artifact_id=artifact_id,
target_format=target_format,
artifact_label=artifact_label,
metadata=metadata,
)
async def save_doc_context(
session_id: str,
doc_id: str,