#!/usr/bin/env python3 """ Qdrant Vector Collections Setup for Helion Memory v3.0 Collections: 1. helion_memory_items - Long-term memory facts (preferences, decisions, lessons) 2. helion_artifacts - Documents, specs, whitepaper embeddings 3. helion_messages - Recent message embeddings for context retrieval Run: python setup_qdrant_collections.py [--host HOST] [--port PORT] """ import argparse import sys from qdrant_client import QdrantClient from qdrant_client.http import models as qmodels # Cohere embed-multilingual-v3.0 produces 1024-dimensional vectors EMBEDDING_DIMENSIONS = 1024 def setup_collections(host: str = "localhost", port: int = 6333): """Create and configure Qdrant collections for Helion Memory""" print(f"šŸ”Œ Connecting to Qdrant at {host}:{port}...") client = QdrantClient(host=host, port=port) # Check connection try: collections = client.get_collections() print(f"āœ… Connected. Existing collections: {[c.name for c in collections.collections]}") except Exception as e: print(f"āŒ Failed to connect: {e}") sys.exit(1) # ========================================================================= # Collection 1: helion_memory_items # ========================================================================= collection_name = "helion_memory_items" print(f"\nšŸ“¦ Setting up collection: {collection_name}") if not client.collection_exists(collection_name): client.create_collection( collection_name=collection_name, vectors_config=qmodels.VectorParams( size=EMBEDDING_DIMENSIONS, distance=qmodels.Distance.COSINE ), # Optimized for filtering by user and type optimizers_config=qmodels.OptimizersConfigDiff( indexing_threshold=10000 ), # On-disk storage for large collections on_disk_payload=True ) print(f" āœ… Created collection: {collection_name}") else: print(f" ā„¹ļø Collection already exists: {collection_name}") # Create payload indexes for filtering print(f" šŸ“‡ Creating payload indexes...") indexes = [ ("platform_user_id", qmodels.PayloadSchemaType.KEYWORD), ("type", qmodels.PayloadSchemaType.KEYWORD), ("category", qmodels.PayloadSchemaType.KEYWORD), ("visibility", qmodels.PayloadSchemaType.KEYWORD), ("scope_ref", qmodels.PayloadSchemaType.KEYWORD), ("confidence", qmodels.PayloadSchemaType.FLOAT), ("created_at", qmodels.PayloadSchemaType.DATETIME), ("expires_at", qmodels.PayloadSchemaType.DATETIME), ("archived", qmodels.PayloadSchemaType.BOOL), ] for field_name, field_type in indexes: try: client.create_payload_index( collection_name=collection_name, field_name=field_name, field_schema=field_type, wait=False ) print(f" āœ… Index: {field_name} ({field_type.value})") except Exception as e: if "already exists" in str(e).lower(): print(f" ā„¹ļø Index exists: {field_name}") else: print(f" āš ļø Index {field_name}: {e}") # ========================================================================= # Collection 2: helion_artifacts # ========================================================================= collection_name = "helion_artifacts" print(f"\nšŸ“¦ Setting up collection: {collection_name}") if not client.collection_exists(collection_name): client.create_collection( collection_name=collection_name, vectors_config=qmodels.VectorParams( size=EMBEDDING_DIMENSIONS, distance=qmodels.Distance.COSINE ), on_disk_payload=True ) print(f" āœ… Created collection: {collection_name}") else: print(f" ā„¹ļø Collection already exists: {collection_name}") # Artifact indexes print(f" šŸ“‡ Creating payload indexes...") artifact_indexes = [ ("artifact_id", qmodels.PayloadSchemaType.KEYWORD), ("project_id", qmodels.PayloadSchemaType.KEYWORD), ("source", qmodels.PayloadSchemaType.KEYWORD), ("source_type", qmodels.PayloadSchemaType.KEYWORD), # whitepaper, spec, landing, faq ("language", qmodels.PayloadSchemaType.KEYWORD), ("version", qmodels.PayloadSchemaType.KEYWORD), ("chunk_index", qmodels.PayloadSchemaType.INTEGER), ("created_at", qmodels.PayloadSchemaType.DATETIME), ] for field_name, field_type in artifact_indexes: try: client.create_payload_index( collection_name=collection_name, field_name=field_name, field_schema=field_type, wait=False ) print(f" āœ… Index: {field_name} ({field_type.value})") except Exception as e: if "already exists" in str(e).lower(): print(f" ā„¹ļø Index exists: {field_name}") else: print(f" āš ļø Index {field_name}: {e}") # ========================================================================= # Collection 3: helion_messages (for recent context retrieval) # ========================================================================= collection_name = "helion_messages" print(f"\nšŸ“¦ Setting up collection: {collection_name}") if not client.collection_exists(collection_name): client.create_collection( collection_name=collection_name, vectors_config=qmodels.VectorParams( size=EMBEDDING_DIMENSIONS, distance=qmodels.Distance.COSINE ), # Faster retrieval for recent messages optimizers_config=qmodels.OptimizersConfigDiff( indexing_threshold=5000 ), on_disk_payload=True ) print(f" āœ… Created collection: {collection_name}") else: print(f" ā„¹ļø Collection already exists: {collection_name}") # Message indexes print(f" šŸ“‡ Creating payload indexes...") message_indexes = [ ("conversation_id", qmodels.PayloadSchemaType.KEYWORD), ("platform_user_id", qmodels.PayloadSchemaType.KEYWORD), ("channel", qmodels.PayloadSchemaType.KEYWORD), ("chat_id", qmodels.PayloadSchemaType.KEYWORD), ("role", qmodels.PayloadSchemaType.KEYWORD), # user, assistant, system ("timestamp", qmodels.PayloadSchemaType.DATETIME), ] for field_name, field_type in message_indexes: try: client.create_payload_index( collection_name=collection_name, field_name=field_name, field_schema=field_type, wait=False ) print(f" āœ… Index: {field_name} ({field_type.value})") except Exception as e: if "already exists" in str(e).lower(): print(f" ā„¹ļø Index exists: {field_name}") else: print(f" āš ļø Index {field_name}: {e}") # ========================================================================= # Summary # ========================================================================= print("\n" + "=" * 60) print("šŸ“Š Qdrant Collections Summary") print("=" * 60) for coll in client.get_collections().collections: info = client.get_collection(coll.name) print(f"\n{coll.name}:") print(f" Points: {info.points_count}") print(f" Vectors: {info.vectors_count}") print(f" Status: {info.status}") print("\nāœ… Qdrant setup complete!") return True def main(): parser = argparse.ArgumentParser(description="Setup Qdrant collections for Helion Memory") parser.add_argument("--host", default="localhost", help="Qdrant host") parser.add_argument("--port", type=int, default=6333, help="Qdrant port") args = parser.parse_args() setup_collections(args.host, args.port) if __name__ == "__main__": main()