feat: Add Alateya, Clan, Eonarch agents + fix gateway-router connection
## Agents Added - Alateya: R&D, biotech, innovations - Clan (Spirit): Community spirit agent - Eonarch: Consciousness evolution agent ## Changes - docker-compose.node1.yml: Added tokens for all 3 new agents - gateway-bot/http_api.py: Added configs and webhook endpoints - gateway-bot/clan_prompt.txt: New prompt file - gateway-bot/eonarch_prompt.txt: New prompt file ## Fixes - Fixed ROUTER_URL from :9102 to :8000 (internal container port) - All 9 Telegram agents now working ## Documentation - Created PROJECT-MASTER-INDEX.md - single entry point - Added various status documents and scripts Tokens configured: - Helion, NUTRA, Agromatrix (existing) - Alateya, Clan, Eonarch (new) - Druid, GreenFood, DAARWIZZ (configured)
This commit is contained in:
@@ -6,11 +6,7 @@ Converts ParsedDocument to Haystack Documents and indexes them
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from haystack import Pipeline, Document
|
||||
from haystack.components.preprocessors import DocumentSplitter
|
||||
from haystack.components.writers import DocumentWriter
|
||||
|
||||
from app.document_store import get_document_store
|
||||
from app.document_store import get_document_store, _make_document
|
||||
from app.embedding import get_text_embedder
|
||||
from app.core.config import settings
|
||||
from app.events import publish_document_ingested, publish_document_indexed
|
||||
@@ -53,18 +49,25 @@ async def ingest_parsed_document(
|
||||
"doc_count": 0
|
||||
}
|
||||
|
||||
logger.info(f"Converted {len(documents)} blocks to Haystack Documents")
|
||||
logger.info(f"Converted {len(documents)} blocks to document chunks")
|
||||
|
||||
# Create ingest pipeline
|
||||
pipeline = _create_ingest_pipeline()
|
||||
embedder = get_text_embedder()
|
||||
texts = [doc["content"] for doc in documents]
|
||||
embedding_result = embedder.run(texts=texts)
|
||||
embeddings = embedding_result.get("embeddings", [])
|
||||
|
||||
doc_objects = []
|
||||
for idx, doc in enumerate(documents):
|
||||
embedding = embeddings[idx] if idx < len(embeddings) else None
|
||||
doc_objects.append(
|
||||
_make_document(content=doc["content"], meta=doc["meta"], embedding=embedding)
|
||||
)
|
||||
|
||||
# Run pipeline
|
||||
pipeline_start = time.time()
|
||||
result = pipeline.run({"documents": documents})
|
||||
document_store = get_document_store()
|
||||
document_store.write_documents(doc_objects)
|
||||
pipeline_time = time.time() - pipeline_start
|
||||
|
||||
# Extract results
|
||||
written_docs = result.get("documents_writer", {}).get("documents_written", 0)
|
||||
written_docs = len(doc_objects)
|
||||
|
||||
# Calculate metrics
|
||||
total_time = time.time() - ingest_start
|
||||
@@ -124,7 +127,7 @@ def _parsed_json_to_documents(
|
||||
dao_id: str,
|
||||
doc_id: str,
|
||||
user_id: Optional[str] = None
|
||||
) -> List[Document]:
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Convert ParsedDocument JSON to Haystack Documents
|
||||
|
||||
@@ -137,7 +140,7 @@ def _parsed_json_to_documents(
|
||||
Returns:
|
||||
List of Haystack Document objects
|
||||
"""
|
||||
documents = []
|
||||
documents: List[Dict[str, Any]] = []
|
||||
|
||||
# Extract pages from parsed_json
|
||||
pages = parsed_json.get("pages", [])
|
||||
@@ -186,13 +189,7 @@ def _parsed_json_to_documents(
|
||||
if k not in ["dao_id"] # Already added
|
||||
})
|
||||
|
||||
# Create Haystack Document
|
||||
doc = Document(
|
||||
content=text,
|
||||
meta=meta
|
||||
)
|
||||
|
||||
documents.append(doc)
|
||||
documents.append({"content": text, "meta": meta})
|
||||
|
||||
return documents
|
||||
|
||||
@@ -242,35 +239,7 @@ async def _publish_events_async(
|
||||
logger.error(f"Failed to publish RAG events for doc_id={doc_id}: {e}")
|
||||
|
||||
|
||||
def _create_ingest_pipeline() -> Pipeline:
|
||||
"""
|
||||
Create Haystack ingest pipeline
|
||||
|
||||
Pipeline: DocumentSplitter → Embedder → DocumentWriter
|
||||
"""
|
||||
# Get components
|
||||
embedder = get_text_embedder()
|
||||
document_store = get_document_store()
|
||||
|
||||
# Create splitter (optional, if chunks are too large)
|
||||
splitter = DocumentSplitter(
|
||||
split_by="sentence",
|
||||
split_length=settings.CHUNK_SIZE,
|
||||
split_overlap=settings.CHUNK_OVERLAP
|
||||
)
|
||||
|
||||
# Create writer
|
||||
writer = DocumentWriter(document_store)
|
||||
|
||||
# Build pipeline
|
||||
pipeline = Pipeline()
|
||||
pipeline.add_component("splitter", splitter)
|
||||
pipeline.add_component("embedder", embedder)
|
||||
pipeline.add_component("documents_writer", writer)
|
||||
|
||||
# Connect components
|
||||
pipeline.connect("splitter", "embedder")
|
||||
pipeline.connect("embedder", "documents_writer")
|
||||
|
||||
return pipeline
|
||||
def _create_ingest_pipeline():
|
||||
# Deprecated: no haystack pipeline in minimal PGVector mode.
|
||||
return None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user