import json
import psycopg2
from pymilvus import Collection, connections, DataType, FieldSchema, CollectionSchema, utility
import ast
def migrar():
    # PostgreSQL
    conn = psycopg2.connect(
        host="198.251.78.201",
        database="tws2_db_prod_courier",
        user="tws2admin",
        password="Q@QMKUrTsFFa",
        port="5432")
    cur = conn.cursor()

    # cur.execute('select text, meta, embedding from collection_prueba order by id desc limit 5000 offset 20000')
    cur.execute(
        'select tsd.texto, td.nombre, tsd.embedding from lhia_ganansol.t_documento td, lhia_ganansol.t_seccion_documento tsd where td.id_documento =tsd.id_documento and tsd.generado = 1 LIMIT 5000 OFFSET 0')

    rows = cur.fetchall()
    textos = [row[0] for row in rows]
    nombreDocumentos = [row[1] for row in rows]
    embeddings = [ast.literal_eval(row[2]) for row in rows]
    cur.close()
    conn.close()

    # conn a Milvus
    connections.connect(db_name="coop_ganansol", host="74.208.60.79", port="19530")
    collection_name = "colaborador_ganansol"

    if not utility.has_collection(collection_name):
        # esquema
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
            FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=15000),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=15000),
            FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=3072)
        ]
        schema = CollectionSchema(fields, description="Colección de gañansol")
        collection = Collection(name=collection_name, schema=schema)
        print("coleccion creada")

    else:
        collection = Collection(name=collection_name)

    # Crear índice si no existe
    if not collection.has_index():
        index_params = {
            "metric_type": "COSINE",
            "index_type": "IVF_FLAT",
            "params": {"nlist": 36}
        }
        collection.create_index(field_name="vector", index_params=index_params)
        print("Índice creado.")

    # Insertar la data e milvus
    mr = collection.insert([textos, nombreDocumentos, embeddings])
    print('data insertada ids:', mr.primary_keys)
    print("terminado")
    collection.load()
    connections.disconnect("default")

migrar()