feat: Add Alateya, Clan, Eonarch agents + fix gateway-router connection

## Agents Added - Alateya: R&D, biotech, innovations - Clan (Spirit): Community spirit agent - Eonarch: Consciousness evolution agent ## Changes - docker-compose.node1.yml: Added tokens for all 3 new agents - gateway-bot/http_api.py: Added configs and webhook endpoints - gateway-bot/clan_prompt.txt: New prompt file - gateway-bot/eonarch_prompt.txt: New prompt file ## Fixes - Fixed ROUTER_URL from :9102 to :8000 (internal container port) - All 9 Telegram agents now working ## Documentation - Created PROJECT-MASTER-INDEX.md - single entry point - Added various status documents and scripts Tokens configured: - Helion, NUTRA, Agromatrix (existing) - Alateya, Clan, Eonarch (new) - Druid, GreenFood, DAARWIZZ (configured)
2026-01-28 06:40:34 -08:00
parent 4aeb69e7ae
commit 0c8bef82f4
120 changed files with 21905 additions and 425 deletions
--- a/services/rag-service/app/ingest_pipeline.py
+++ b/services/rag-service/app/ingest_pipeline.py
@@ -6,11 +6,7 @@ Converts ParsedDocument to Haystack Documents and indexes them
 import logging
 from typing import List, Dict, Any, Optional

-from haystack import Pipeline, Document
-from haystack.components.preprocessors import DocumentSplitter
-from haystack.components.writers import DocumentWriter
-
-from app.document_store import get_document_store
+from app.document_store import get_document_store, _make_document
 from app.embedding import get_text_embedder
 from app.core.config import settings
 from app.events import publish_document_ingested, publish_document_indexed
@@ -53,18 +49,25 @@ async def ingest_parsed_document(
                "doc_count": 0
            }
        
-        logger.info(f"Converted {len(documents)} blocks to Haystack Documents")
+        logger.info(f"Converted {len(documents)} blocks to document chunks")
        
-        # Create ingest pipeline
-        pipeline = _create_ingest_pipeline()
+        embedder = get_text_embedder()
+        texts = [doc["content"] for doc in documents]
+        embedding_result = embedder.run(texts=texts)
+        embeddings = embedding_result.get("embeddings", [])
+        
+        doc_objects = []
+        for idx, doc in enumerate(documents):
+            embedding = embeddings[idx] if idx < len(embeddings) else None
+            doc_objects.append(
+                _make_document(content=doc["content"], meta=doc["meta"], embedding=embedding)
+            )
        
-        # Run pipeline
        pipeline_start = time.time()
-        result = pipeline.run({"documents": documents})
+        document_store = get_document_store()
+        document_store.write_documents(doc_objects)
        pipeline_time = time.time() - pipeline_start
-        
-        # Extract results
-        written_docs = result.get("documents_writer", {}).get("documents_written", 0)
+        written_docs = len(doc_objects)
        
        # Calculate metrics
        total_time = time.time() - ingest_start
@@ -124,7 +127,7 @@ def _parsed_json_to_documents(
    dao_id: str,
    doc_id: str,
    user_id: Optional[str] = None
-) -> List[Document]:
+) -> List[Dict[str, Any]]:
    """
    Convert ParsedDocument JSON to Haystack Documents
    
@@ -137,7 +140,7 @@ def _parsed_json_to_documents(
    Returns:
        List of Haystack Document objects
    """
-    documents = []
+    documents: List[Dict[str, Any]] = []
    
    # Extract pages from parsed_json
    pages = parsed_json.get("pages", [])
@@ -186,13 +189,7 @@ def _parsed_json_to_documents(
                    if k not in ["dao_id"]  # Already added
                })
            
-            # Create Haystack Document
-            doc = Document(
-                content=text,
-                meta=meta
-            )
-            
-            documents.append(doc)
+            documents.append({"content": text, "meta": meta})
    
    return documents

@@ -242,35 +239,7 @@ async def _publish_events_async(
        logger.error(f"Failed to publish RAG events for doc_id={doc_id}: {e}")


-def _create_ingest_pipeline() -> Pipeline:
-    """
-    Create Haystack ingest pipeline
-    
-    Pipeline: DocumentSplitter → Embedder → DocumentWriter
-    """
-    # Get components
-    embedder = get_text_embedder()
-    document_store = get_document_store()
-    
-    # Create splitter (optional, if chunks are too large)
-    splitter = DocumentSplitter(
-        split_by="sentence",
-        split_length=settings.CHUNK_SIZE,
-        split_overlap=settings.CHUNK_OVERLAP
-    )
-    
-    # Create writer
-    writer = DocumentWriter(document_store)
-    
-    # Build pipeline
-    pipeline = Pipeline()
-    pipeline.add_component("splitter", splitter)
-    pipeline.add_component("embedder", embedder)
-    pipeline.add_component("documents_writer", writer)
-    
-    # Connect components
-    pipeline.connect("splitter", "embedder")
-    pipeline.connect("embedder", "documents_writer")
-    
-    return pipeline
+def _create_ingest_pipeline():
+    # Deprecated: no haystack pipeline in minimal PGVector mode.
+    return None