#!/usr/bin/env python3
"""
Nexvor Vector Database Server (ChromaDB)
Handles embedding generation and vector search for knowledge bases

Communication: JSON over stdin/stdout
Java plugin sends commands, Python responds with results
"""

import sys
import json
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer
import os
from pathlib import Path

class NexvorVectorDB:
    def __init__(self, db_path):
        """Initialize ChromaDB and embedding model"""
        self.db_path = db_path

        # Initialize ChromaDB (persistent)
        self.client = chromadb.PersistentClient(
            path=db_path,
            settings=Settings(
                anonymized_telemetry=False,
                allow_reset=True
            )
        )

        # Load embedding model (lightweight, good quality)
        # all-MiniLM-L6-v2: 384 dimensions, 80MB, fast
        self.model = SentenceTransformer('all-MiniLM-L6-v2')

        # Collections for different data types
        self.collections = {}

        self.log("ChromaDB initialized at: " + db_path)
        self.log("Embedding model loaded: all-MiniLM-L6-v2")

    def log(self, message):
        """Log to stderr (stdout is for JSON responses)"""
        print(f"[ChromaDB] {message}", file=sys.stderr, flush=True)

    def get_or_create_collection(self, name):
        """Get or create a ChromaDB collection"""
        if name not in self.collections:
            self.collections[name] = self.client.get_or_create_collection(
                name=name,
                metadata={"description": f"Nexvor {name} collection"}
            )
        return self.collections[name]

    def add_documents(self, collection_name, documents, metadatas=None, ids=None):
        """
        Add documents to collection

        Args:
            collection_name: Name of collection
            documents: List of text documents
            metadatas: Optional list of metadata dicts
            ids: Optional list of document IDs
        """
        try:
            collection = self.get_or_create_collection(collection_name)

            # Generate embeddings
            embeddings = self.model.encode(documents).tolist()

            # Generate IDs if not provided
            if ids is None:
                existing_count = collection.count()
                ids = [f"doc_{existing_count + i}" for i in range(len(documents))]

            # Add to ChromaDB
            collection.add(
                documents=documents,
                embeddings=embeddings,
                metadatas=metadatas,
                ids=ids
            )

            return {
                "success": True,
                "collection": collection_name,
                "added": len(documents),
                "ids": ids
            }

        except Exception as e:
            return {
                "success": False,
                "error": str(e)
            }

    def search(self, collection_name, query, n_results=5, where=None):
        """
        Search for similar documents

        Args:
            collection_name: Name of collection
            query: Search query text
            n_results: Number of results to return
            where: Optional metadata filter
        """
        try:
            collection = self.get_or_create_collection(collection_name)

            # Generate query embedding
            query_embedding = self.model.encode([query]).tolist()

            # Search
            results = collection.query(
                query_embeddings=query_embedding,
                n_results=n_results,
                where=where
            )

            # Format results
            formatted_results = []
            if results['documents'] and len(results['documents']) > 0:
                for i in range(len(results['documents'][0])):
                    formatted_results.append({
                        "id": results['ids'][0][i],
                        "document": results['documents'][0][i],
                        "distance": results['distances'][0][i],
                        "metadata": results['metadatas'][0][i] if results['metadatas'] else None
                    })

            return {
                "success": True,
                "collection": collection_name,
                "query": query,
                "results": formatted_results
            }

        except Exception as e:
            return {
                "success": False,
                "error": str(e)
            }

    def delete_documents(self, collection_name, ids):
        """Delete documents by ID"""
        try:
            collection = self.get_or_create_collection(collection_name)
            collection.delete(ids=ids)

            return {
                "success": True,
                "collection": collection_name,
                "deleted": len(ids)
            }

        except Exception as e:
            return {
                "success": False,
                "error": str(e)
            }

    def get_collection_info(self, collection_name):
        """Get information about a collection"""
        try:
            collection = self.get_or_create_collection(collection_name)
            count = collection.count()

            return {
                "success": True,
                "collection": collection_name,
                "count": count
            }

        except Exception as e:
            return {
                "success": False,
                "error": str(e)
            }

    def list_collections(self):
        """List all collections"""
        try:
            collections = self.client.list_collections()

            return {
                "success": True,
                "collections": [
                    {
                        "name": col.name,
                        "count": col.count()
                    }
                    for col in collections
                ]
            }

        except Exception as e:
            return {
                "success": False,
                "error": str(e)
            }

    def clear_collection(self, collection_name):
        """Clear all documents from collection"""
        try:
            self.client.delete_collection(collection_name)
            if collection_name in self.collections:
                del self.collections[collection_name]

            return {
                "success": True,
                "collection": collection_name,
                "message": "Collection cleared"
            }

        except Exception as e:
            return {
                "success": False,
                "error": str(e)
            }

    def process_command(self, command_json):
        """Process a command from Java"""
        try:
            command = json.loads(command_json)
            action = command.get("action")

            if action == "add":
                return self.add_documents(
                    command["collection"],
                    command["documents"],
                    command.get("metadatas"),
                    command.get("ids")
                )

            elif action == "search":
                return self.search(
                    command["collection"],
                    command["query"],
                    command.get("n_results", 5),
                    command.get("where")
                )

            elif action == "delete":
                return self.delete_documents(
                    command["collection"],
                    command["ids"]
                )

            elif action == "info":
                return self.get_collection_info(command["collection"])

            elif action == "list":
                return self.list_collections()

            elif action == "clear":
                return self.clear_collection(command["collection"])

            elif action == "ping":
                return {"success": True, "message": "pong"}

            else:
                return {
                    "success": False,
                    "error": f"Unknown action: {action}"
                }

        except Exception as e:
            return {
                "success": False,
                "error": f"Command processing error: {str(e)}"
            }

    def run(self):
        """Main loop: read commands from stdin, write responses to stdout"""
        self.log("Server ready, waiting for commands...")

        try:
            for line in sys.stdin:
                line = line.strip()
                if not line:
                    continue

                # Process command
                response = self.process_command(line)

                # Send response as JSON
                print(json.dumps(response), flush=True)

        except KeyboardInterrupt:
            self.log("Server stopped")
        except Exception as e:
            self.log(f"Fatal error: {str(e)}")
            sys.exit(1)

def main():
    """Entry point"""
    if len(sys.argv) < 2:
        print("Usage: chroma_server.py <db_path>", file=sys.stderr)
        sys.exit(1)

    db_path = sys.argv[1]

    # Create directory if it doesn't exist
    Path(db_path).mkdir(parents=True, exist_ok=True)

    # Initialize and run server
    server = NexvorVectorDB(db_path)
    server.run()

if __name__ == "__main__":
    main()
