#!/usr/bin/env python3
import uuid
import time

import PyPDF2
import ads
from opensearchpy import OpenSearch

from chat_engine import create_embedding_model
from config import OPENSEARCH_END_POINT, INDEX_NAME, OCI_OPENSEARCH_USERNAME, \
    OCI_OPENSEARCH_PASSWORD, OCI_OPENSEARCH_VERIFY_CERTS
from oci_utils import load_oci_config

# Create the client with SSL/TLS and hostname verification disabled.
client = OpenSearch(
    OPENSEARCH_END_POINT,  # your OCI OpenSearch private endpoint
    http_auth=(OCI_OPENSEARCH_USERNAME, OCI_OPENSEARCH_PASSWORD),
    verify_certs=OCI_OPENSEARCH_VERIFY_CERTS,
)

# Load OCI configuration
oci_config = load_oci_config()
api_keys_config = ads.auth.api_keys(oci_config)


def save_uploaded_file(uploaded_file, upload_dir):
    file_path = upload_dir / uploaded_file.name
    with open(file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    return file_path


# Create index with appropriate settings and mappings
def create_index():
    settings = {
        "settings": {
            "index": {
                "knn": True,
            }
        },
        "mappings": {
            "properties": {
                "id": {"type": "keyword"},
                "page_number": {"type": "integer"},
                "body": {"type": "text"},
                "embedding": {
                    "type": "knn_vector",
                    "dimension": 1024,
                },
            }
        },
    }
    if not client.indices.exists(index=INDEX_NAME):
        response = client.indices.create(index=INDEX_NAME, body=settings)
        print("Index creation response:", response)
    else:
        print(f"Index {INDEX_NAME} already exists.")


# Extract text from PDF page by page
def extract_text_from_pdf_page_by_page(file_path):
    with open(file_path, 'rb') as pdf_file:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        for page_number, page in enumerate(pdf_reader.pages):
            text = page.extract_text() or ''
            yield page_number + 1, text  # Yield the page number and text


# Get embeddings from text
def get_embeddings(text, embed_model):
    embedding_response = embed_model.embed_documents([text])
    embedding = embedding_response[0]  # Get the first embedding
    print("Embedding size:", len(embedding))  # Debug: Check embedding size
    return embedding


# Index document into OpenSearch
def index_document_to_opensearch(index_name, doc_id, page_number, body, embedding):
    document = {
        "id": doc_id,
        "page_number": page_number,
        "body": body,
        "embedding": embedding,
    }
    response = client.index(index=index_name, id=f"{doc_id}_{page_number}", body=document)
    return response


# Function to generate a unique document ID
def generate_unique_doc_id():
    return f"doc_{int(time.time())}_{uuid.uuid4()}"


# Main function to process and index PDF page by page
def process_and_index_pdf_page_by_page(pdf_path, index_name):
    create_index()
    doc_id = generate_unique_doc_id()
    embed_model = create_embedding_model(auth=api_keys_config)
    try:
        for page_number, text in extract_text_from_pdf_page_by_page(pdf_path):
            content_vector = get_embeddings(text, embed_model)
            response = index_document_to_opensearch(index_name, doc_id, page_number, body=text,
                                                    embedding=content_vector)
            print(f"Page {page_number} indexed:", response)
    except Exception as e:
        print(f"Error in uploading: {e}")
    print("Finished uploading pages!")