feat: Add Alateya, Clan, Eonarch agents + fix gateway-router connection
## Agents Added - Alateya: R&D, biotech, innovations - Clan (Spirit): Community spirit agent - Eonarch: Consciousness evolution agent ## Changes - docker-compose.node1.yml: Added tokens for all 3 new agents - gateway-bot/http_api.py: Added configs and webhook endpoints - gateway-bot/clan_prompt.txt: New prompt file - gateway-bot/eonarch_prompt.txt: New prompt file ## Fixes - Fixed ROUTER_URL from :9102 to :8000 (internal container port) - All 9 Telegram agents now working ## Documentation - Created PROJECT-MASTER-INDEX.md - single entry point - Added various status documents and scripts Tokens configured: - Helion, NUTRA, Agromatrix (existing) - Alateya, Clan, Eonarch (new) - Druid, GreenFood, DAARWIZZ (configured)
This commit is contained in:
441
scripts/qdrant_migrate_rest.py
Normal file
441
scripts/qdrant_migrate_rest.py
Normal file
@@ -0,0 +1,441 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Qdrant Migration Script (REST API version)
|
||||
|
||||
No qdrant-client dependency - uses pure REST API.
|
||||
|
||||
Usage:
|
||||
python3 qdrant_migrate_rest.py --dry-run
|
||||
python3 qdrant_migrate_rest.py --all
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Configuration
|
||||
DEFAULT_CONFIG = {
|
||||
"qdrant_url": "http://localhost:6333",
|
||||
"tenant_id": "t_daarion",
|
||||
"team_id": "team_core",
|
||||
"text_dim": 1024,
|
||||
"text_metric": "Cosine", # Qdrant uses capitalized
|
||||
"default_visibility": "confidential",
|
||||
"default_owner_kind": "agent",
|
||||
"batch_size": 100,
|
||||
}
|
||||
|
||||
# Collection patterns
|
||||
COLLECTION_PATTERNS = [
|
||||
(r"^([a-z]+)_docs$", 1, "docs", []),
|
||||
(r"^([a-z]+)_messages$", 1, "messages", []),
|
||||
(r"^([a-z]+)_memory_items$", 1, "memory", []),
|
||||
(r"^([a-z]+)_artifacts$", 1, "artifacts", []),
|
||||
(r"^druid_legal_kb$", None, "docs", ["legal_kb"]),
|
||||
(r"^nutra_food_knowledge$", None, "docs", ["food_kb"]),
|
||||
(r"^memories$", None, "memory", []),
|
||||
(r"^messages$", None, "messages", []),
|
||||
]
|
||||
|
||||
AGENT_SLUGS = {
|
||||
"helion": "agt_helion",
|
||||
"nutra": "agt_nutra",
|
||||
"druid": "agt_druid",
|
||||
"greenfood": "agt_greenfood",
|
||||
"agromatrix": "agt_agromatrix",
|
||||
"daarwizz": "agt_daarwizz",
|
||||
"alateya": "agt_alateya",
|
||||
}
|
||||
|
||||
|
||||
def qdrant_request(url: str, method: str = "GET", data: Optional[Dict] = None) -> Dict:
|
||||
"""Make HTTP request to Qdrant."""
|
||||
req = urllib.request.Request(url, method=method)
|
||||
req.add_header("Content-Type", "application/json")
|
||||
|
||||
body = None
|
||||
if data:
|
||||
body = json.dumps(data).encode("utf-8")
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, body, timeout=30) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
except urllib.error.HTTPError as e:
|
||||
error_body = e.read().decode("utf-8") if e.fp else ""
|
||||
raise Exception(f"Qdrant error {e.code}: {error_body}")
|
||||
|
||||
|
||||
def get_collections(base_url: str) -> List[str]:
|
||||
"""Get list of all collections."""
|
||||
resp = qdrant_request(f"{base_url}/collections")
|
||||
return [c["name"] for c in resp.get("result", {}).get("collections", [])]
|
||||
|
||||
|
||||
def get_collection_info(base_url: str, name: str) -> Dict:
|
||||
"""Get collection info including vector config."""
|
||||
resp = qdrant_request(f"{base_url}/collections/{name}")
|
||||
result = resp.get("result", {})
|
||||
config = result.get("config", {}).get("params", {}).get("vectors", {})
|
||||
return {
|
||||
"name": name,
|
||||
"points_count": result.get("points_count", 0),
|
||||
"dim": config.get("size"),
|
||||
"metric": config.get("distance"),
|
||||
}
|
||||
|
||||
|
||||
def create_collection(base_url: str, name: str, dim: int, metric: str) -> bool:
|
||||
"""Create a new collection."""
|
||||
data = {
|
||||
"vectors": {
|
||||
"size": dim,
|
||||
"distance": metric,
|
||||
}
|
||||
}
|
||||
try:
|
||||
qdrant_request(f"{base_url}/collections/{name}", "PUT", data)
|
||||
return True
|
||||
except Exception as e:
|
||||
if "already exists" in str(e).lower():
|
||||
return False
|
||||
raise
|
||||
|
||||
|
||||
def scroll_points(base_url: str, collection: str, limit: int = 100, offset: Optional[str] = None) -> Tuple[List, Optional[str]]:
|
||||
"""Scroll through collection points."""
|
||||
data = {
|
||||
"limit": limit,
|
||||
"with_payload": True,
|
||||
"with_vector": True,
|
||||
}
|
||||
if offset:
|
||||
data["offset"] = offset
|
||||
|
||||
resp = qdrant_request(f"{base_url}/collections/{collection}/points/scroll", "POST", data)
|
||||
result = resp.get("result", {})
|
||||
points = result.get("points", [])
|
||||
next_offset = result.get("next_page_offset")
|
||||
return points, next_offset
|
||||
|
||||
|
||||
def upsert_points(base_url: str, collection: str, points: List[Dict]) -> None:
|
||||
"""Upsert points to collection."""
|
||||
data = {"points": points}
|
||||
qdrant_request(f"{base_url}/collections/{collection}/points", "PUT", data)
|
||||
|
||||
|
||||
def compute_deterministic_id(source_collection: str, legacy_id: str, source_id: str, chunk_idx: int) -> str:
|
||||
"""Compute deterministic point ID (32 hex chars)."""
|
||||
content = f"{source_collection}|{legacy_id}|{source_id}|{chunk_idx}"
|
||||
return hashlib.sha256(content.encode()).hexdigest()[:32]
|
||||
|
||||
|
||||
def compute_fingerprint(source_id: str, chunk_idx: int, text: str = "") -> str:
|
||||
"""Compute stable fingerprint."""
|
||||
content = f"{source_id}:{chunk_idx}:{text}"
|
||||
return f"sha256:{hashlib.sha256(content.encode()).hexdigest()[:32]}"
|
||||
|
||||
|
||||
def parse_collection_name(name: str) -> Optional[Dict]:
|
||||
"""Parse legacy collection name."""
|
||||
for pattern, agent_group, scope, tags in COLLECTION_PATTERNS:
|
||||
match = re.match(pattern, name.lower())
|
||||
if match:
|
||||
agent_id = None
|
||||
if agent_group is not None:
|
||||
agent_slug = match.group(agent_group).lower()
|
||||
agent_id = AGENT_SLUGS.get(agent_slug, f"agt_{agent_slug}")
|
||||
elif "druid" in name.lower():
|
||||
agent_id = "agt_druid"
|
||||
elif "nutra" in name.lower():
|
||||
agent_id = "agt_nutra"
|
||||
|
||||
return {"agent_id": agent_id, "scope": scope, "tags": tags.copy()}
|
||||
return None
|
||||
|
||||
|
||||
def build_canonical_payload(legacy_payload: Dict, collection_info: Dict, config: Dict, point_id: str) -> Dict:
|
||||
"""Build canonical payload from legacy."""
|
||||
agent_id = collection_info.get("agent_id")
|
||||
scope = collection_info.get("scope", "docs")
|
||||
tags = collection_info.get("tags", [])
|
||||
|
||||
source_id = (
|
||||
legacy_payload.get("source_id") or
|
||||
legacy_payload.get("document_id") or
|
||||
legacy_payload.get("doc_id") or
|
||||
f"doc_{point_id}"
|
||||
)
|
||||
|
||||
if not re.match(r"^(doc|msg|art|web|code)_", source_id):
|
||||
prefix = "msg" if scope == "messages" else "doc"
|
||||
source_id = f"{prefix}_{source_id}"
|
||||
|
||||
chunk_idx = legacy_payload.get("chunk_idx", legacy_payload.get("chunk_index", 0))
|
||||
chunk_id = legacy_payload.get("chunk_id") or f"chk_{point_id}"
|
||||
if not chunk_id.startswith("chk_"):
|
||||
chunk_id = f"chk_{chunk_id}"
|
||||
|
||||
text = legacy_payload.get("text", legacy_payload.get("content", ""))
|
||||
fingerprint = legacy_payload.get("fingerprint") or compute_fingerprint(source_id, chunk_idx, text)
|
||||
|
||||
created_at = (
|
||||
legacy_payload.get("created_at") or
|
||||
legacy_payload.get("timestamp") or
|
||||
datetime.utcnow().isoformat() + "Z"
|
||||
)
|
||||
|
||||
source_kind = {"docs": "document", "messages": "message", "memory": "document", "artifacts": "artifact"}.get(scope, "document")
|
||||
|
||||
payload = {
|
||||
"schema_version": "cm_payload_v1",
|
||||
"tenant_id": config["tenant_id"],
|
||||
"team_id": config.get("team_id"),
|
||||
"project_id": legacy_payload.get("project_id"),
|
||||
"agent_id": agent_id,
|
||||
"owner_kind": config["default_owner_kind"],
|
||||
"owner_id": agent_id or config["team_id"],
|
||||
"scope": scope,
|
||||
"visibility": legacy_payload.get("visibility", config["default_visibility"]),
|
||||
"indexed": legacy_payload.get("indexed", True),
|
||||
"source_kind": source_kind,
|
||||
"source_id": source_id,
|
||||
"chunk": {"chunk_id": chunk_id, "chunk_idx": chunk_idx},
|
||||
"fingerprint": fingerprint,
|
||||
"created_at": created_at,
|
||||
"_legacy_collection": collection_info.get("_collection"),
|
||||
"_legacy_point_id": point_id,
|
||||
}
|
||||
|
||||
if tags:
|
||||
payload["tags"] = tags
|
||||
if legacy_payload.get("tags"):
|
||||
payload["tags"] = list(set(payload.get("tags", []) + legacy_payload["tags"]))
|
||||
|
||||
for field in ["lang", "importance", "channel_id"]:
|
||||
if field in legacy_payload:
|
||||
payload[field] = legacy_payload[field]
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
class MigrationStats:
|
||||
def __init__(self):
|
||||
self.collections_processed = 0
|
||||
self.points_read = 0
|
||||
self.points_migrated = 0
|
||||
self.points_skipped = 0
|
||||
self.errors = []
|
||||
self.dim_mismatches = []
|
||||
|
||||
def summary(self) -> Dict:
|
||||
return {
|
||||
"collections_processed": self.collections_processed,
|
||||
"points_read": self.points_read,
|
||||
"points_migrated": self.points_migrated,
|
||||
"points_skipped": self.points_skipped,
|
||||
"errors_count": len(self.errors),
|
||||
"dim_mismatches": self.dim_mismatches,
|
||||
}
|
||||
|
||||
|
||||
def migrate_collection(
|
||||
base_url: str,
|
||||
source_collection: str,
|
||||
target_collection: str,
|
||||
config: Dict,
|
||||
stats: MigrationStats,
|
||||
dry_run: bool = True,
|
||||
) -> None:
|
||||
"""Migrate a single collection."""
|
||||
logger.info(f"Migrating: {source_collection}")
|
||||
|
||||
# Get source info
|
||||
source_info = get_collection_info(base_url, source_collection)
|
||||
logger.info(f" Source: dim={source_info['dim']}, metric={source_info['metric']}, points={source_info['points_count']}")
|
||||
|
||||
# Check dim/metric
|
||||
if source_info["dim"] != config["text_dim"]:
|
||||
msg = f"Dim mismatch: {source_collection} has {source_info['dim']}, target expects {config['text_dim']}"
|
||||
logger.error(f" ❌ {msg}")
|
||||
stats.dim_mismatches.append(source_collection)
|
||||
stats.errors.append({"collection": source_collection, "error": msg})
|
||||
return
|
||||
|
||||
if source_info["metric"] and source_info["metric"] != config["text_metric"]:
|
||||
msg = f"Metric mismatch: {source_collection} has {source_info['metric']}, target expects {config['text_metric']}"
|
||||
logger.warning(f" ⚠️ {msg}")
|
||||
|
||||
# Parse collection name
|
||||
collection_info = parse_collection_name(source_collection) or {"agent_id": None, "scope": "docs", "tags": []}
|
||||
collection_info["_collection"] = source_collection
|
||||
logger.info(f" Mapped: agent={collection_info['agent_id']}, scope={collection_info['scope']}")
|
||||
|
||||
if dry_run:
|
||||
logger.info(f" [DRY RUN] Would migrate {source_info['points_count']} points")
|
||||
stats.points_read += source_info['points_count']
|
||||
stats.points_migrated += source_info['points_count']
|
||||
stats.collections_processed += 1
|
||||
return
|
||||
|
||||
# Scroll and migrate
|
||||
offset = None
|
||||
batch_count = 0
|
||||
collection_migrated = 0
|
||||
|
||||
while True:
|
||||
points, next_offset = scroll_points(base_url, source_collection, config["batch_size"], offset)
|
||||
|
||||
if not points:
|
||||
break
|
||||
|
||||
batch_count += 1
|
||||
stats.points_read += len(points)
|
||||
|
||||
canonical_points = []
|
||||
for point in points:
|
||||
try:
|
||||
legacy_payload = point.get("payload", {})
|
||||
vector = point.get("vector", [])
|
||||
point_id = str(point.get("id", ""))
|
||||
|
||||
canonical_payload = build_canonical_payload(legacy_payload, collection_info, config, point_id)
|
||||
|
||||
source_id = canonical_payload.get("source_id", "")
|
||||
chunk_idx = canonical_payload.get("chunk", {}).get("chunk_idx", 0)
|
||||
det_id = compute_deterministic_id(source_collection, point_id, source_id, chunk_idx)
|
||||
|
||||
canonical_points.append({
|
||||
"id": det_id,
|
||||
"vector": vector,
|
||||
"payload": canonical_payload,
|
||||
})
|
||||
except Exception as e:
|
||||
stats.errors.append({"collection": source_collection, "point": point_id, "error": str(e)})
|
||||
stats.points_skipped += 1
|
||||
|
||||
if canonical_points:
|
||||
upsert_points(base_url, target_collection, canonical_points)
|
||||
collection_migrated += len(canonical_points)
|
||||
stats.points_migrated += len(canonical_points)
|
||||
|
||||
if batch_count % 10 == 0:
|
||||
logger.info(f" Progress: {stats.points_read} read, {collection_migrated} migrated")
|
||||
|
||||
offset = next_offset
|
||||
if not offset:
|
||||
break
|
||||
|
||||
stats.collections_processed += 1
|
||||
logger.info(f" Completed: {collection_migrated} points migrated")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Qdrant Migration (REST API)")
|
||||
parser.add_argument("--url", default=DEFAULT_CONFIG["qdrant_url"], help="Qdrant URL")
|
||||
parser.add_argument("--collections", help="Comma-separated collections")
|
||||
parser.add_argument("--all", action="store_true", help="Migrate all legacy collections")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show what would be migrated")
|
||||
parser.add_argument("--dim", type=int, default=DEFAULT_CONFIG["text_dim"], help="Target dimension")
|
||||
parser.add_argument("--continue-on-error", action="store_true", help="Continue on errors")
|
||||
args = parser.parse_args()
|
||||
|
||||
config = DEFAULT_CONFIG.copy()
|
||||
config["qdrant_url"] = args.url
|
||||
config["text_dim"] = args.dim
|
||||
|
||||
base_url = config["qdrant_url"]
|
||||
|
||||
# Get collections
|
||||
logger.info(f"Connecting to Qdrant at {base_url}")
|
||||
all_collections = get_collections(base_url)
|
||||
logger.info(f"Found {len(all_collections)} collections")
|
||||
|
||||
# Determine which to migrate
|
||||
if args.collections:
|
||||
collections = [c.strip() for c in args.collections.split(",")]
|
||||
elif args.all:
|
||||
collections = [c for c in all_collections if not c.startswith("cm_")]
|
||||
logger.info(f"Legacy collections to migrate: {len(collections)}")
|
||||
else:
|
||||
print("\nLegacy collections:")
|
||||
for col in all_collections:
|
||||
if not col.startswith("cm_"):
|
||||
info = parse_collection_name(col)
|
||||
info_str = f"→ agent={info['agent_id']}, scope={info['scope']}" if info else "→ (unknown)"
|
||||
print(f" - {col} {info_str}")
|
||||
print("\nUse --collections <names> or --all to migrate")
|
||||
return
|
||||
|
||||
# Target collection
|
||||
target_collection = f"cm_text_{config['text_dim']}_v1"
|
||||
logger.info(f"Target collection: {target_collection}")
|
||||
|
||||
# Create target if needed
|
||||
if not args.dry_run:
|
||||
if target_collection not in all_collections:
|
||||
logger.info(f"Creating target collection: {target_collection}")
|
||||
create_collection(base_url, target_collection, config["text_dim"], config["text_metric"])
|
||||
else:
|
||||
logger.info(f"Target collection exists: {target_collection}")
|
||||
|
||||
# Migrate
|
||||
stats = MigrationStats()
|
||||
logger.info(f"Starting migration ({'DRY RUN' if args.dry_run else 'LIVE'})")
|
||||
|
||||
for collection in collections:
|
||||
try:
|
||||
migrate_collection(base_url, collection, target_collection, config, stats, args.dry_run)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed: {collection}: {e}")
|
||||
stats.errors.append({"collection": collection, "error": str(e)})
|
||||
if not args.continue_on_error:
|
||||
break
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
print("MIGRATION SUMMARY")
|
||||
print("=" * 60)
|
||||
summary = stats.summary()
|
||||
print(f"Target collection: {target_collection}")
|
||||
print(f"Collections processed: {summary['collections_processed']}/{len(collections)}")
|
||||
print(f"Points read: {summary['points_read']}")
|
||||
print(f"Points migrated: {summary['points_migrated']}")
|
||||
print(f"Points skipped: {summary['points_skipped']}")
|
||||
print(f"Errors: {summary['errors_count']}")
|
||||
|
||||
if summary['dim_mismatches']:
|
||||
print(f"\n❌ Dim mismatches ({len(summary['dim_mismatches'])}):")
|
||||
for col in summary['dim_mismatches']:
|
||||
print(f" - {col}")
|
||||
|
||||
if stats.errors[:5]:
|
||||
print("\nFirst 5 errors:")
|
||||
for err in stats.errors[:5]:
|
||||
print(f" - {err}")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n[DRY RUN] No changes were made")
|
||||
|
||||
if summary['dim_mismatches']:
|
||||
print("\n❌ MIGRATION BLOCKED due to dim mismatches")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user