feat: Add Alateya, Clan, Eonarch agents + fix gateway-router connection

## Agents Added
- Alateya: R&D, biotech, innovations
- Clan (Spirit): Community spirit agent
- Eonarch: Consciousness evolution agent

## Changes
- docker-compose.node1.yml: Added tokens for all 3 new agents
- gateway-bot/http_api.py: Added configs and webhook endpoints
- gateway-bot/clan_prompt.txt: New prompt file
- gateway-bot/eonarch_prompt.txt: New prompt file

## Fixes
- Fixed ROUTER_URL from :9102 to :8000 (internal container port)
- All 9 Telegram agents now working

## Documentation
- Created PROJECT-MASTER-INDEX.md - single entry point
- Added various status documents and scripts

Tokens configured:
- Helion, NUTRA, Agromatrix (existing)
- Alateya, Clan, Eonarch (new)
- Druid, GreenFood, DAARWIZZ (configured)
This commit is contained in:
Apple
2026-01-28 06:40:34 -08:00
parent 4aeb69e7ae
commit 0c8bef82f4
120 changed files with 21905 additions and 425 deletions

View File

@@ -14,7 +14,7 @@ RUN apt-get update && apt-get install -y \
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install --no-cache-dir -r requirements.txt && pip check
# Copy application code
COPY app/ ./app/

View File

@@ -4,7 +4,7 @@ Configuration for RAG Service
import os
from typing import Literal
from pydantic_settings import BaseSettings
from pydantic import BaseSettings
class Settings(BaseSettings):
@@ -15,10 +15,8 @@ class Settings(BaseSettings):
API_PORT: int = 9500
# PostgreSQL + pgvector
PG_DSN: str = os.getenv(
"PG_DSN",
"postgresql+psycopg2://postgres:postgres@city-db:5432/daarion_city"
)
_default_dsn = "postgresql+psycopg2://postgres:postgres@city-db:5432/daarion_city"
PG_DSN: str = os.getenv("PG_DSN", os.getenv("DATABASE_URL", _default_dsn))
# Embedding model
EMBED_MODEL_NAME: str = os.getenv("EMBED_MODEL_NAME", "BAAI/bge-m3")

View File

@@ -4,9 +4,33 @@ Uses PostgreSQL + pgvector via Haystack
"""
import logging
from typing import Optional
import json
import uuid
from dataclasses import dataclass
from typing import Optional, List, Dict, Any
from haystack.document_stores import PGVectorDocumentStore
import psycopg2
try:
from haystack.document_stores import PGVectorDocumentStore # type: ignore
from haystack.schema import Document as HaystackDocument # type: ignore
except Exception:
PGVectorDocumentStore = None # type: ignore
HaystackDocument = None # type: ignore
@dataclass
class Document:
content: str
meta: Dict[str, Any]
embedding: Optional[List[float]] = None
id: Optional[str] = None
def _make_document(content: str, meta: Dict[str, Any], embedding: Optional[List[float]] = None, doc_id: Optional[str] = None):
if HaystackDocument:
return HaystackDocument(content=content, meta=meta, embedding=embedding, id=doc_id)
return Document(content=content, meta=meta, embedding=embedding, id=doc_id)
from app.core.config import settings
@@ -16,7 +40,7 @@ logger = logging.getLogger(__name__)
_document_store: Optional[PGVectorDocumentStore] = None
def get_document_store() -> PGVectorDocumentStore:
def get_document_store():
"""
Get or create PGVectorDocumentStore instance
@@ -32,24 +56,156 @@ def get_document_store() -> PGVectorDocumentStore:
logger.info(f"Connection: {settings.PG_DSN.split('@')[1] if '@' in settings.PG_DSN else 'hidden'}")
try:
_document_store = PGVectorDocumentStore(
connection_string=settings.PG_DSN,
embedding_dim=settings.EMBED_DIM,
if PGVectorDocumentStore:
_document_store = PGVectorDocumentStore(
connection_string=settings.PG_DSN,
embedding_dim=settings.EMBED_DIM,
table_name=settings.RAG_TABLE_NAME,
search_strategy=settings.SEARCH_STRATEGY,
recreate_table=False,
similarity="cosine",
)
logger.info("PGVectorDocumentStore initialized successfully")
return _document_store
_document_store = SimplePGVectorStore(
dsn=settings.PG_DSN,
table_name=settings.RAG_TABLE_NAME,
search_strategy=settings.SEARCH_STRATEGY,
# Additional options
recreate_table=False, # Don't drop existing table
similarity="cosine", # Cosine similarity for embeddings
embedding_dim=settings.EMBED_DIM,
)
logger.info("PGVectorDocumentStore initialized successfully")
logger.info("SimplePGVectorStore initialized successfully")
return _document_store
except Exception as e:
logger.error(f"Failed to initialize DocumentStore: {e}", exc_info=True)
raise RuntimeError(f"DocumentStore initialization failed: {e}") from e
class SimplePGVectorStore:
def __init__(self, dsn: str, table_name: str, embedding_dim: int) -> None:
self.dsn = dsn.replace("postgresql+psycopg2", "postgresql")
self.table_name = table_name
self.embedding_dim = embedding_dim
self._ensure_table()
def _connect(self):
return psycopg2.connect(self.dsn)
def _ensure_table(self) -> None:
with self._connect() as conn:
with conn.cursor() as cur:
cur.execute(
f"""
create table if not exists {self.table_name} (
id text primary key,
content text,
embedding vector({self.embedding_dim}),
meta jsonb
);
"""
)
cur.execute(
f"create index if not exists {self.table_name}_meta_gin on {self.table_name} using gin (meta);"
)
cur.execute(
f"create index if not exists {self.table_name}_embedding_idx on {self.table_name} using ivfflat (embedding vector_cosine_ops);"
)
conn.commit()
def _vec(self, embedding: List[float]) -> str:
return "[" + ",".join([str(x) for x in embedding]) + "]"
def write_documents(self, documents: List[Any]) -> None:
with self._connect() as conn:
with conn.cursor() as cur:
for doc in documents:
doc_id = getattr(doc, "id", None) or str(uuid.uuid4())
meta = getattr(doc, "meta", None) or {}
embedding = getattr(doc, "embedding", None)
cur.execute(
f"""
insert into {self.table_name} (id, content, embedding, meta)
values (%s, %s, %s, %s)
on conflict (id) do update set
content = excluded.content,
embedding = excluded.embedding,
meta = excluded.meta
""",
(doc_id, doc.content, self._vec(embedding), json.dumps(meta)),
)
conn.commit()
def delete_documents(self, filters: Optional[Dict[str, Any]] = None) -> None:
if not filters:
return
fingerprint = None
if "index_fingerprint" in filters:
value = filters["index_fingerprint"]
if isinstance(value, dict):
fingerprint = value.get("$eq")
else:
fingerprint = value
if not fingerprint:
return
with self._connect() as conn:
with conn.cursor() as cur:
cur.execute(
f"delete from {self.table_name} where meta->>'index_fingerprint' = %s",
(fingerprint,),
)
conn.commit()
def search(self, query_embedding: List[float], top_k: int = 5, filters: Optional[Dict[str, Any]] = None, return_embedding: bool = False):
where_clause, params = self._build_where(filters)
params.append(self._vec(query_embedding))
params.append(top_k)
with self._connect() as conn:
with conn.cursor() as cur:
cur.execute(
f"""
select content, meta
from {self.table_name}
{where_clause}
order by embedding <=> %s
limit %s
""",
params,
)
rows = cur.fetchall()
return [_make_document(content=r[0], meta=r[1]) for r in rows]
def filter_documents(self, filters: Optional[Dict[str, Any]] = None, top_k: int = 5, return_embedding: bool = False):
where_clause, params = self._build_where(filters)
params.append(top_k)
with self._connect() as conn:
with conn.cursor() as cur:
cur.execute(
f"""
select content, meta
from {self.table_name}
{where_clause}
limit %s
""",
params,
)
rows = cur.fetchall()
return [_make_document(content=r[0], meta=r[1]) for r in rows]
def _build_where(self, filters: Optional[Dict[str, Any]]) -> tuple[str, List[Any]]:
where_parts: List[str] = []
params: List[Any] = []
if filters:
for key in ["dao_id", "artifact_id", "brand_id", "project_id", "acl_ref"]:
if key in filters and filters[key] is not None:
value = filters[key]
if isinstance(value, list):
value = value[0] if value else None
if value is not None:
where_parts.append(f"meta->>'{key}' = %s")
params.append(value)
where_clause = f"where {' and '.join(where_parts)}" if where_parts else ""
return where_clause, params
def reset_document_store():
"""Reset global document store instance (for testing)"""
global _document_store

View File

@@ -4,49 +4,45 @@ Uses SentenceTransformers via Haystack
"""
import logging
from typing import Optional
from typing import Optional, List, Dict, Any
from haystack.components.embedders import SentenceTransformersTextEmbedder
from sentence_transformers import SentenceTransformer
from app.core.config import settings
logger = logging.getLogger(__name__)
class SimpleEmbedder:
def __init__(self, model_name: str, device: str) -> None:
self.model = SentenceTransformer(model_name, device=device)
def run(self, texts: Optional[List[str]] = None, text: Optional[str] = None) -> Dict[str, Any]:
if texts is not None:
embeddings = self.model.encode(texts, convert_to_numpy=True).tolist()
return {"embeddings": embeddings}
if text is not None:
embedding = self.model.encode([text], convert_to_numpy=True).tolist()
return {"embedding": embedding}
return {"embeddings": []}
# Global embedder instance
_text_embedder: Optional[SentenceTransformersTextEmbedder] = None
_text_embedder: Optional[SimpleEmbedder] = None
def get_text_embedder() -> SentenceTransformersTextEmbedder:
"""
Get or create SentenceTransformersTextEmbedder instance
Returns:
SentenceTransformersTextEmbedder configured with embedding model
"""
def get_text_embedder() -> SimpleEmbedder:
global _text_embedder
if _text_embedder is not None:
return _text_embedder
logger.info(f"Loading embedding model: {settings.EMBED_MODEL_NAME}")
logger.info(f"Device: {settings.EMBED_DEVICE}")
try:
_text_embedder = SentenceTransformersTextEmbedder(
model=settings.EMBED_MODEL_NAME,
device=settings.EMBED_DEVICE,
)
logger.info("Text embedder initialized successfully")
return _text_embedder
except Exception as e:
logger.error(f"Failed to initialize TextEmbedder: {e}", exc_info=True)
raise RuntimeError(f"TextEmbedder initialization failed: {e}") from e
_text_embedder = SimpleEmbedder(settings.EMBED_MODEL_NAME, settings.EMBED_DEVICE)
logger.info("Text embedder initialized successfully")
return _text_embedder
def reset_embedder():
"""Reset global embedder instance (for testing)"""
global _text_embedder
_text_embedder = None

View File

@@ -10,7 +10,6 @@ from typing import Dict, Any, Optional
from app.core.config import settings
from app.ingest_pipeline import ingest_parsed_document
from app.document_store import DocumentStore
import nats
from nats.js.errors import NotFoundError

View File

@@ -93,7 +93,9 @@ async def publish_event(
# Publish to JetStream
js = conn.jetstream()
ack = await js.publish(subject, json.dumps(event_envelope))
logger.info(f"Event published to {subject}: {seq={ack.sequence}, stream_seq={ack.stream_seq}")
logger.info(
f"Event published to {subject}: seq={ack.sequence}, stream_seq={ack.stream_seq}"
)
return ack
except Exception as e:

View File

@@ -6,11 +6,7 @@ Converts ParsedDocument to Haystack Documents and indexes them
import logging
from typing import List, Dict, Any, Optional
from haystack import Pipeline, Document
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter
from app.document_store import get_document_store
from app.document_store import get_document_store, _make_document
from app.embedding import get_text_embedder
from app.core.config import settings
from app.events import publish_document_ingested, publish_document_indexed
@@ -53,18 +49,25 @@ async def ingest_parsed_document(
"doc_count": 0
}
logger.info(f"Converted {len(documents)} blocks to Haystack Documents")
logger.info(f"Converted {len(documents)} blocks to document chunks")
# Create ingest pipeline
pipeline = _create_ingest_pipeline()
embedder = get_text_embedder()
texts = [doc["content"] for doc in documents]
embedding_result = embedder.run(texts=texts)
embeddings = embedding_result.get("embeddings", [])
doc_objects = []
for idx, doc in enumerate(documents):
embedding = embeddings[idx] if idx < len(embeddings) else None
doc_objects.append(
_make_document(content=doc["content"], meta=doc["meta"], embedding=embedding)
)
# Run pipeline
pipeline_start = time.time()
result = pipeline.run({"documents": documents})
document_store = get_document_store()
document_store.write_documents(doc_objects)
pipeline_time = time.time() - pipeline_start
# Extract results
written_docs = result.get("documents_writer", {}).get("documents_written", 0)
written_docs = len(doc_objects)
# Calculate metrics
total_time = time.time() - ingest_start
@@ -124,7 +127,7 @@ def _parsed_json_to_documents(
dao_id: str,
doc_id: str,
user_id: Optional[str] = None
) -> List[Document]:
) -> List[Dict[str, Any]]:
"""
Convert ParsedDocument JSON to Haystack Documents
@@ -137,7 +140,7 @@ def _parsed_json_to_documents(
Returns:
List of Haystack Document objects
"""
documents = []
documents: List[Dict[str, Any]] = []
# Extract pages from parsed_json
pages = parsed_json.get("pages", [])
@@ -186,13 +189,7 @@ def _parsed_json_to_documents(
if k not in ["dao_id"] # Already added
})
# Create Haystack Document
doc = Document(
content=text,
meta=meta
)
documents.append(doc)
documents.append({"content": text, "meta": meta})
return documents
@@ -242,35 +239,7 @@ async def _publish_events_async(
logger.error(f"Failed to publish RAG events for doc_id={doc_id}: {e}")
def _create_ingest_pipeline() -> Pipeline:
"""
Create Haystack ingest pipeline
Pipeline: DocumentSplitter → Embedder → DocumentWriter
"""
# Get components
embedder = get_text_embedder()
document_store = get_document_store()
# Create splitter (optional, if chunks are too large)
splitter = DocumentSplitter(
split_by="sentence",
split_length=settings.CHUNK_SIZE,
split_overlap=settings.CHUNK_OVERLAP
)
# Create writer
writer = DocumentWriter(document_store)
# Build pipeline
pipeline = Pipeline()
pipeline.add_component("splitter", splitter)
pipeline.add_component("embedder", embedder)
pipeline.add_component("documents_writer", writer)
# Connect components
pipeline.connect("splitter", "embedder")
pipeline.connect("embedder", "documents_writer")
return pipeline
def _create_ingest_pipeline():
# Deprecated: no haystack pipeline in minimal PGVector mode.
return None

View File

@@ -4,11 +4,26 @@ Retrieval-Augmented Generation for MicroDAO
"""
import logging
import os
from typing import Any, Dict
from contextlib import asynccontextmanager
import psycopg2
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from app.models import IngestRequest, IngestResponse, QueryRequest, QueryResponse
from app.core.config import settings
from app.document_store import get_document_store, _make_document
from app.embedding import get_text_embedder
from app.models import (
IngestRequest,
IngestResponse,
QueryRequest,
QueryResponse,
UpsertRequest,
UpsertResponse,
DeleteByFingerprintRequest,
DeleteResponse,
)
from app.ingest_pipeline import ingest_parsed_document
from app.query_pipeline import answer_query
from app.event_worker import event_worker
@@ -23,6 +38,16 @@ async def lifespan(app: FastAPI):
# Startup
logger.info("Starting RAG Service...")
try:
dsn = settings.PG_DSN.replace("postgresql+psycopg2", "postgresql")
with psycopg2.connect(dsn) as conn:
with conn.cursor() as cur:
cur.execute("create extension if not exists vector;")
conn.commit()
logger.info("pgvector extension ensured")
except Exception as e:
logger.error(f"Failed to ensure pgvector extension: {e}", exc_info=True)
raise
# Start event worker in a background thread
def run_event_worker():
@@ -55,6 +80,9 @@ app = FastAPI(
lifespan=lifespan
)
NODE_ENV = os.getenv("NODE_ENV", "production").lower()
DEBUG_ENDPOINTS = os.getenv("DEBUG_ENDPOINTS", "false").lower() == "true"
# CORS middleware
app.add_middleware(
CORSMiddleware,
@@ -127,6 +155,62 @@ async def query_endpoint(request: QueryRequest):
raise HTTPException(status_code=500, detail=str(e))
@app.post("/index/upsert", response_model=UpsertResponse)
async def index_upsert(request: UpsertRequest):
try:
if not request.chunks:
return UpsertResponse(status="error", indexed_count=0, message="No chunks provided")
embedder = get_text_embedder()
document_store = get_document_store()
texts = [chunk.content for chunk in request.chunks]
embeddings_result = embedder.run(texts=texts)
embeddings = embeddings_result.get("embeddings") or []
documents = []
for idx, chunk in enumerate(request.chunks):
embedding = embeddings[idx] if idx < len(embeddings) else None
documents.append(_make_document(content=chunk.content, meta=chunk.meta, embedding=embedding))
document_store.write_documents(documents)
return UpsertResponse(status="success", indexed_count=len(documents))
except Exception as e:
logger.error(f"Index upsert failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@app.post("/index/delete_by_fingerprint", response_model=DeleteResponse)
async def delete_by_fingerprint(request: DeleteByFingerprintRequest):
try:
document_store = get_document_store()
document_store.delete_documents(filters={"index_fingerprint": {"$eq": request.fingerprint}})
return DeleteResponse(status="success", deleted_count=0, message="Delete requested")
except Exception as e:
logger.error(f"Delete by fingerprint failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@app.get("/debug/chunks")
async def debug_chunks(artifact_id: str, limit: int = 3) -> Dict[str, Any]:
if NODE_ENV == "production" and not DEBUG_ENDPOINTS:
raise HTTPException(status_code=404, detail="Not Found")
try:
document_store = get_document_store()
docs = document_store.filter_documents(
filters={"artifact_id": artifact_id},
top_k=limit,
return_embedding=False,
)
items = []
for doc in docs:
items.append({"content_preview": doc.content[:200], "meta": doc.meta})
return {"items": items, "count": len(items)}
except Exception as e:
logger.error(f"Debug chunks failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
from app.core.config import settings

View File

@@ -45,3 +45,28 @@ class QueryResponse(BaseModel):
citations: List[Citation] = Field(..., description="List of citations")
documents: List[Dict[str, Any]] = Field(..., description="Retrieved documents (for debugging)")
class UpsertChunk(BaseModel):
content: str = Field(..., description="Chunk content")
meta: Dict[str, Any] = Field(default_factory=dict, description="Chunk metadata")
class UpsertRequest(BaseModel):
chunks: List[UpsertChunk] = Field(..., description="Chunks to index")
class UpsertResponse(BaseModel):
status: str = Field(..., description="Status: success or error")
indexed_count: int = Field(..., description="Number of chunks indexed")
message: Optional[str] = Field(None, description="Optional message")
class DeleteByFingerprintRequest(BaseModel):
fingerprint: str = Field(..., description="Index fingerprint to delete")
class DeleteResponse(BaseModel):
status: str = Field(..., description="Status: success or error")
deleted_count: int = Field(..., description="Number of chunks deleted")
message: Optional[str] = Field(None, description="Optional message")

View File

@@ -6,12 +6,6 @@ Retrieves relevant documents and generates answers
import logging
from typing import List, Dict, Any, Optional
import httpx
from haystack import Pipeline
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.document_stores import PGVectorDocumentStore
from app.document_store import get_document_store
from app.embedding import get_text_embedder
from app.core.config import settings
@@ -151,7 +145,7 @@ def _retrieve_documents(
document_store = get_document_store()
# Embed query
embedding_result = embedder.run(question)
embedding_result = embedder.run(text=question)
query_embedding = embedding_result["embedding"][0] if isinstance(embedding_result["embedding"], list) else embedding_result["embedding"]
# Retrieve with filters using vector similarity search

View File

@@ -0,0 +1,61 @@
anyio==3.7.1
certifi==2026.1.4
charset-normalizer==3.4.4
click==8.3.1
fastapi==0.103.2
filelock==3.20.3
fsspec==2026.1.0
h11==0.16.0
httpcore==1.0.9
httpx==0.27.0
huggingface-hub==0.19.4
idna==3.11
Jinja2==3.1.6
joblib==1.5.3
MarkupSafe==3.0.3
mpmath==1.3.0
nats-py==2.8.0
networkx==3.6.1
nltk==3.9.2
numpy==2.4.1
nvidia-cublas-cu12==12.8.4.1
nvidia-cuda-cupti-cu12==12.8.90
nvidia-cuda-nvrtc-cu12==12.8.93
nvidia-cuda-runtime-cu12==12.8.90
nvidia-cudnn-cu12==9.10.2.21
nvidia-cufft-cu12==11.3.3.83
nvidia-cufile-cu12==1.13.1.3
nvidia-curand-cu12==10.3.9.90
nvidia-cusolver-cu12==11.7.3.90
nvidia-cusparse-cu12==12.5.8.93
nvidia-cusparselt-cu12==0.7.1
nvidia-nccl-cu12==2.27.5
nvidia-nvjitlink-cu12==12.8.93
nvidia-nvshmem-cu12==3.3.20
nvidia-nvtx-cu12==12.8.90
packaging==25.0
pillow==12.1.0
psycopg2-binary==2.9.11
pydantic==1.10.13
python-dotenv==1.2.1
PyYAML==6.0.3
regex==2026.1.15
requests==2.32.5
safetensors==0.7.0
scikit-learn==1.8.0
scipy==1.17.0
sentence-transformers==2.2.2
sentencepiece==0.2.1
sniffio==1.3.1
starlette==0.27.0
sympy==1.14.0
threadpoolctl==3.6.0
tokenizers==0.19.1
torch==2.9.1
torchvision==0.24.1
tqdm==4.67.1
transformers==4.40.2
triton==3.5.1
typing_extensions==4.15.0
urllib3==2.6.3
uvicorn==0.23.2

View File

@@ -1,11 +1,10 @@
fastapi>=0.115.0
uvicorn[standard]>=0.30.0
pydantic>=2.0.0
pydantic-settings>=2.0.0
farm-haystack[postgresql]>=1.25.3
sentence-transformers>=2.2.0
fastapi==0.103.2
uvicorn==0.23.2
pydantic==1.10.13
sentence-transformers==2.2.2
huggingface_hub==0.19.4
psycopg2-binary>=2.9.0
httpx>=0.27.0
httpx==0.27.0
python-dotenv>=1.0.0
nats-py>=2.7.0
nats-py==2.8.0

View File

@@ -26,20 +26,11 @@ class TestQueryPipeline:
@pytest.mark.asyncio
async def test_build_citations(self):
"""Test citation building"""
from haystack.schema import Document
documents = [
Document(
content="Test content 1",
meta={"doc_id": "doc1", "page": 1, "section": "Section 1"}
),
Document(
content="Test content 2",
meta={"doc_id": "doc2", "page": 2}
)
{"content": "Test content 1", "meta": {"doc_id": "doc1", "page": 1, "section": "Section 1"}},
{"content": "Test content 2", "meta": {"doc_id": "doc2", "page": 2}},
]
citations = _build_citations(documents)
citations = _build_citations([MagicMock(**d) for d in documents])
assert len(citations) == 2
assert citations[0]["doc_id"] == "doc1"