feat: Add Alateya, Clan, Eonarch agents + fix gateway-router connection

## Agents Added - Alateya: R&D, biotech, innovations - Clan (Spirit): Community spirit agent - Eonarch: Consciousness evolution agent ## Changes - docker-compose.node1.yml: Added tokens for all 3 new agents - gateway-bot/http_api.py: Added configs and webhook endpoints - gateway-bot/clan_prompt.txt: New prompt file - gateway-bot/eonarch_prompt.txt: New prompt file ## Fixes - Fixed ROUTER_URL from :9102 to :8000 (internal container port) - All 9 Telegram agents now working ## Documentation - Created PROJECT-MASTER-INDEX.md - single entry point - Added various status documents and scripts Tokens configured: - Helion, NUTRA, Agromatrix (existing) - Alateya, Clan, Eonarch (new) - Druid, GreenFood, DAARWIZZ (configured)
2026-01-28 06:40:34 -08:00
parent 4aeb69e7ae
commit 0c8bef82f4
120 changed files with 21905 additions and 425 deletions
--- a/scripts/qdrant_migrate_rest.py
+++ b/scripts/qdrant_migrate_rest.py
@@ -0,0 +1,441 @@
+#!/usr/bin/env python3
+"""
+Qdrant Migration Script (REST API version)
+
+No qdrant-client dependency - uses pure REST API.
+
+Usage:
+    python3 qdrant_migrate_rest.py --dry-run
+    python3 qdrant_migrate_rest.py --all
+"""
+
+import argparse
+import hashlib
+import json
+import logging
+import os
+import re
+import sys
+import urllib.request
+import urllib.error
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Tuple
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+# Configuration
+DEFAULT_CONFIG = {
+    "qdrant_url": "http://localhost:6333",
+    "tenant_id": "t_daarion",
+    "team_id": "team_core",
+    "text_dim": 1024,
+    "text_metric": "Cosine",  # Qdrant uses capitalized
+    "default_visibility": "confidential",
+    "default_owner_kind": "agent",
+    "batch_size": 100,
+}
+
+# Collection patterns
+COLLECTION_PATTERNS = [
+    (r"^([a-z]+)_docs$", 1, "docs", []),
+    (r"^([a-z]+)_messages$", 1, "messages", []),
+    (r"^([a-z]+)_memory_items$", 1, "memory", []),
+    (r"^([a-z]+)_artifacts$", 1, "artifacts", []),
+    (r"^druid_legal_kb$", None, "docs", ["legal_kb"]),
+    (r"^nutra_food_knowledge$", None, "docs", ["food_kb"]),
+    (r"^memories$", None, "memory", []),
+    (r"^messages$", None, "messages", []),
+]
+
+AGENT_SLUGS = {
+    "helion": "agt_helion",
+    "nutra": "agt_nutra",
+    "druid": "agt_druid",
+    "greenfood": "agt_greenfood",
+    "agromatrix": "agt_agromatrix",
+    "daarwizz": "agt_daarwizz",
+    "alateya": "agt_alateya",
+}
+
+
+def qdrant_request(url: str, method: str = "GET", data: Optional[Dict] = None) -> Dict:
+    """Make HTTP request to Qdrant."""
+    req = urllib.request.Request(url, method=method)
+    req.add_header("Content-Type", "application/json")
+    
+    body = None
+    if data:
+        body = json.dumps(data).encode("utf-8")
+    
+    try:
+        with urllib.request.urlopen(req, body, timeout=30) as resp:
+            return json.loads(resp.read().decode("utf-8"))
+    except urllib.error.HTTPError as e:
+        error_body = e.read().decode("utf-8") if e.fp else ""
+        raise Exception(f"Qdrant error {e.code}: {error_body}")
+
+
+def get_collections(base_url: str) -> List[str]:
+    """Get list of all collections."""
+    resp = qdrant_request(f"{base_url}/collections")
+    return [c["name"] for c in resp.get("result", {}).get("collections", [])]
+
+
+def get_collection_info(base_url: str, name: str) -> Dict:
+    """Get collection info including vector config."""
+    resp = qdrant_request(f"{base_url}/collections/{name}")
+    result = resp.get("result", {})
+    config = result.get("config", {}).get("params", {}).get("vectors", {})
+    return {
+        "name": name,
+        "points_count": result.get("points_count", 0),
+        "dim": config.get("size"),
+        "metric": config.get("distance"),
+    }
+
+
+def create_collection(base_url: str, name: str, dim: int, metric: str) -> bool:
+    """Create a new collection."""
+    data = {
+        "vectors": {
+            "size": dim,
+            "distance": metric,
+        }
+    }
+    try:
+        qdrant_request(f"{base_url}/collections/{name}", "PUT", data)
+        return True
+    except Exception as e:
+        if "already exists" in str(e).lower():
+            return False
+        raise
+
+
+def scroll_points(base_url: str, collection: str, limit: int = 100, offset: Optional[str] = None) -> Tuple[List, Optional[str]]:
+    """Scroll through collection points."""
+    data = {
+        "limit": limit,
+        "with_payload": True,
+        "with_vector": True,
+    }
+    if offset:
+        data["offset"] = offset
+    
+    resp = qdrant_request(f"{base_url}/collections/{collection}/points/scroll", "POST", data)
+    result = resp.get("result", {})
+    points = result.get("points", [])
+    next_offset = result.get("next_page_offset")
+    return points, next_offset
+
+
+def upsert_points(base_url: str, collection: str, points: List[Dict]) -> None:
+    """Upsert points to collection."""
+    data = {"points": points}
+    qdrant_request(f"{base_url}/collections/{collection}/points", "PUT", data)
+
+
+def compute_deterministic_id(source_collection: str, legacy_id: str, source_id: str, chunk_idx: int) -> str:
+    """Compute deterministic point ID (32 hex chars)."""
+    content = f"{source_collection}|{legacy_id}|{source_id}|{chunk_idx}"
+    return hashlib.sha256(content.encode()).hexdigest()[:32]
+
+
+def compute_fingerprint(source_id: str, chunk_idx: int, text: str = "") -> str:
+    """Compute stable fingerprint."""
+    content = f"{source_id}:{chunk_idx}:{text}"
+    return f"sha256:{hashlib.sha256(content.encode()).hexdigest()[:32]}"
+
+
+def parse_collection_name(name: str) -> Optional[Dict]:
+    """Parse legacy collection name."""
+    for pattern, agent_group, scope, tags in COLLECTION_PATTERNS:
+        match = re.match(pattern, name.lower())
+        if match:
+            agent_id = None
+            if agent_group is not None:
+                agent_slug = match.group(agent_group).lower()
+                agent_id = AGENT_SLUGS.get(agent_slug, f"agt_{agent_slug}")
+            elif "druid" in name.lower():
+                agent_id = "agt_druid"
+            elif "nutra" in name.lower():
+                agent_id = "agt_nutra"
+            
+            return {"agent_id": agent_id, "scope": scope, "tags": tags.copy()}
+    return None
+
+
+def build_canonical_payload(legacy_payload: Dict, collection_info: Dict, config: Dict, point_id: str) -> Dict:
+    """Build canonical payload from legacy."""
+    agent_id = collection_info.get("agent_id")
+    scope = collection_info.get("scope", "docs")
+    tags = collection_info.get("tags", [])
+    
+    source_id = (
+        legacy_payload.get("source_id") or
+        legacy_payload.get("document_id") or
+        legacy_payload.get("doc_id") or
+        f"doc_{point_id}"
+    )
+    
+    if not re.match(r"^(doc|msg|art|web|code)_", source_id):
+        prefix = "msg" if scope == "messages" else "doc"
+        source_id = f"{prefix}_{source_id}"
+    
+    chunk_idx = legacy_payload.get("chunk_idx", legacy_payload.get("chunk_index", 0))
+    chunk_id = legacy_payload.get("chunk_id") or f"chk_{point_id}"
+    if not chunk_id.startswith("chk_"):
+        chunk_id = f"chk_{chunk_id}"
+    
+    text = legacy_payload.get("text", legacy_payload.get("content", ""))
+    fingerprint = legacy_payload.get("fingerprint") or compute_fingerprint(source_id, chunk_idx, text)
+    
+    created_at = (
+        legacy_payload.get("created_at") or
+        legacy_payload.get("timestamp") or
+        datetime.utcnow().isoformat() + "Z"
+    )
+    
+    source_kind = {"docs": "document", "messages": "message", "memory": "document", "artifacts": "artifact"}.get(scope, "document")
+    
+    payload = {
+        "schema_version": "cm_payload_v1",
+        "tenant_id": config["tenant_id"],
+        "team_id": config.get("team_id"),
+        "project_id": legacy_payload.get("project_id"),
+        "agent_id": agent_id,
+        "owner_kind": config["default_owner_kind"],
+        "owner_id": agent_id or config["team_id"],
+        "scope": scope,
+        "visibility": legacy_payload.get("visibility", config["default_visibility"]),
+        "indexed": legacy_payload.get("indexed", True),
+        "source_kind": source_kind,
+        "source_id": source_id,
+        "chunk": {"chunk_id": chunk_id, "chunk_idx": chunk_idx},
+        "fingerprint": fingerprint,
+        "created_at": created_at,
+        "_legacy_collection": collection_info.get("_collection"),
+        "_legacy_point_id": point_id,
+    }
+    
+    if tags:
+        payload["tags"] = tags
+    if legacy_payload.get("tags"):
+        payload["tags"] = list(set(payload.get("tags", []) + legacy_payload["tags"]))
+    
+    for field in ["lang", "importance", "channel_id"]:
+        if field in legacy_payload:
+            payload[field] = legacy_payload[field]
+    
+    return payload
+
+
+class MigrationStats:
+    def __init__(self):
+        self.collections_processed = 0
+        self.points_read = 0
+        self.points_migrated = 0
+        self.points_skipped = 0
+        self.errors = []
+        self.dim_mismatches = []
+    
+    def summary(self) -> Dict:
+        return {
+            "collections_processed": self.collections_processed,
+            "points_read": self.points_read,
+            "points_migrated": self.points_migrated,
+            "points_skipped": self.points_skipped,
+            "errors_count": len(self.errors),
+            "dim_mismatches": self.dim_mismatches,
+        }
+
+
+def migrate_collection(
+    base_url: str,
+    source_collection: str,
+    target_collection: str,
+    config: Dict,
+    stats: MigrationStats,
+    dry_run: bool = True,
+) -> None:
+    """Migrate a single collection."""
+    logger.info(f"Migrating: {source_collection}")
+    
+    # Get source info
+    source_info = get_collection_info(base_url, source_collection)
+    logger.info(f"  Source: dim={source_info['dim']}, metric={source_info['metric']}, points={source_info['points_count']}")
+    
+    # Check dim/metric
+    if source_info["dim"] != config["text_dim"]:
+        msg = f"Dim mismatch: {source_collection} has {source_info['dim']}, target expects {config['text_dim']}"
+        logger.error(f"  ❌ {msg}")
+        stats.dim_mismatches.append(source_collection)
+        stats.errors.append({"collection": source_collection, "error": msg})
+        return
+    
+    if source_info["metric"] and source_info["metric"] != config["text_metric"]:
+        msg = f"Metric mismatch: {source_collection} has {source_info['metric']}, target expects {config['text_metric']}"
+        logger.warning(f"  ⚠️ {msg}")
+    
+    # Parse collection name
+    collection_info = parse_collection_name(source_collection) or {"agent_id": None, "scope": "docs", "tags": []}
+    collection_info["_collection"] = source_collection
+    logger.info(f"  Mapped: agent={collection_info['agent_id']}, scope={collection_info['scope']}")
+    
+    if dry_run:
+        logger.info(f"  [DRY RUN] Would migrate {source_info['points_count']} points")
+        stats.points_read += source_info['points_count']
+        stats.points_migrated += source_info['points_count']
+        stats.collections_processed += 1
+        return
+    
+    # Scroll and migrate
+    offset = None
+    batch_count = 0
+    collection_migrated = 0
+    
+    while True:
+        points, next_offset = scroll_points(base_url, source_collection, config["batch_size"], offset)
+        
+        if not points:
+            break
+        
+        batch_count += 1
+        stats.points_read += len(points)
+        
+        canonical_points = []
+        for point in points:
+            try:
+                legacy_payload = point.get("payload", {})
+                vector = point.get("vector", [])
+                point_id = str(point.get("id", ""))
+                
+                canonical_payload = build_canonical_payload(legacy_payload, collection_info, config, point_id)
+                
+                source_id = canonical_payload.get("source_id", "")
+                chunk_idx = canonical_payload.get("chunk", {}).get("chunk_idx", 0)
+                det_id = compute_deterministic_id(source_collection, point_id, source_id, chunk_idx)
+                
+                canonical_points.append({
+                    "id": det_id,
+                    "vector": vector,
+                    "payload": canonical_payload,
+                })
+            except Exception as e:
+                stats.errors.append({"collection": source_collection, "point": point_id, "error": str(e)})
+                stats.points_skipped += 1
+        
+        if canonical_points:
+            upsert_points(base_url, target_collection, canonical_points)
+            collection_migrated += len(canonical_points)
+            stats.points_migrated += len(canonical_points)
+        
+        if batch_count % 10 == 0:
+            logger.info(f"  Progress: {stats.points_read} read, {collection_migrated} migrated")
+        
+        offset = next_offset
+        if not offset:
+            break
+    
+    stats.collections_processed += 1
+    logger.info(f"  Completed: {collection_migrated} points migrated")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Qdrant Migration (REST API)")
+    parser.add_argument("--url", default=DEFAULT_CONFIG["qdrant_url"], help="Qdrant URL")
+    parser.add_argument("--collections", help="Comma-separated collections")
+    parser.add_argument("--all", action="store_true", help="Migrate all legacy collections")
+    parser.add_argument("--dry-run", action="store_true", help="Show what would be migrated")
+    parser.add_argument("--dim", type=int, default=DEFAULT_CONFIG["text_dim"], help="Target dimension")
+    parser.add_argument("--continue-on-error", action="store_true", help="Continue on errors")
+    args = parser.parse_args()
+    
+    config = DEFAULT_CONFIG.copy()
+    config["qdrant_url"] = args.url
+    config["text_dim"] = args.dim
+    
+    base_url = config["qdrant_url"]
+    
+    # Get collections
+    logger.info(f"Connecting to Qdrant at {base_url}")
+    all_collections = get_collections(base_url)
+    logger.info(f"Found {len(all_collections)} collections")
+    
+    # Determine which to migrate
+    if args.collections:
+        collections = [c.strip() for c in args.collections.split(",")]
+    elif args.all:
+        collections = [c for c in all_collections if not c.startswith("cm_")]
+        logger.info(f"Legacy collections to migrate: {len(collections)}")
+    else:
+        print("\nLegacy collections:")
+        for col in all_collections:
+            if not col.startswith("cm_"):
+                info = parse_collection_name(col)
+                info_str = f"→ agent={info['agent_id']}, scope={info['scope']}" if info else "→ (unknown)"
+                print(f"  - {col} {info_str}")
+        print("\nUse --collections <names> or --all to migrate")
+        return
+    
+    # Target collection
+    target_collection = f"cm_text_{config['text_dim']}_v1"
+    logger.info(f"Target collection: {target_collection}")
+    
+    # Create target if needed
+    if not args.dry_run:
+        if target_collection not in all_collections:
+            logger.info(f"Creating target collection: {target_collection}")
+            create_collection(base_url, target_collection, config["text_dim"], config["text_metric"])
+        else:
+            logger.info(f"Target collection exists: {target_collection}")
+    
+    # Migrate
+    stats = MigrationStats()
+    logger.info(f"Starting migration ({'DRY RUN' if args.dry_run else 'LIVE'})")
+    
+    for collection in collections:
+        try:
+            migrate_collection(base_url, collection, target_collection, config, stats, args.dry_run)
+        except Exception as e:
+            logger.error(f"Failed: {collection}: {e}")
+            stats.errors.append({"collection": collection, "error": str(e)})
+            if not args.continue_on_error:
+                break
+    
+    # Summary
+    print("\n" + "=" * 60)
+    print("MIGRATION SUMMARY")
+    print("=" * 60)
+    summary = stats.summary()
+    print(f"Target collection:     {target_collection}")
+    print(f"Collections processed: {summary['collections_processed']}/{len(collections)}")
+    print(f"Points read:           {summary['points_read']}")
+    print(f"Points migrated:       {summary['points_migrated']}")
+    print(f"Points skipped:        {summary['points_skipped']}")
+    print(f"Errors:                {summary['errors_count']}")
+    
+    if summary['dim_mismatches']:
+        print(f"\n❌ Dim mismatches ({len(summary['dim_mismatches'])}):")
+        for col in summary['dim_mismatches']:
+            print(f"  - {col}")
+    
+    if stats.errors[:5]:
+        print("\nFirst 5 errors:")
+        for err in stats.errors[:5]:
+            print(f"  - {err}")
+    
+    if args.dry_run:
+        print("\n[DRY RUN] No changes were made")
+    
+    if summary['dim_mismatches']:
+        print("\n❌ MIGRATION BLOCKED due to dim mismatches")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()