import pandas as pd
import requests
from requests.auth import HTTPBasicAuth
import json

import openai
import re
import unicodedata
import time
import json
from bs4 import BeautifulSoup
import csv
from urllib.parse import urlparse


# Replace these with your actual credentials
site_url = "https://www.farmonaut.com"
username = "ankuromar296"
app_password = "Tjat A2hz 9XMv pXJi YbV0 GR8o"
auth    = HTTPBasicAuth(username, app_password)
headers = {"Content-Type": "application/json"}


def add_redirect(source, target):
    payload = {"source": source, "target": target}
    #print(payload)
    #return "done"
    resp = requests.post(
        f"{site_url}/wp-json/my-redirects/v1/rule",
        json=payload,
        auth=auth,
        headers=headers
    )
    resp.raise_for_status()
    return resp.json()

def update_blog_post(
    site_url,
    username,
    app_password,
    post_id,
    new_slug=None,
    new_title=None,
    new_excerpt=None,
    new_content=None,
):
    url = f"{site_url}/wp-json/wp/v2/posts/{post_id}"
    auth = HTTPBasicAuth(username, app_password)

    # Build the payload only with non-None values
    payload = {}
    if new_slug is not None:
        payload["slug"] = new_slug
    if new_title is not None:
        payload["title"] = new_title
    if new_excerpt is not None:
        payload["excerpt"] = new_excerpt
    if new_content is not None:
        payload["content"] = new_content

    # If nothing to update, return early
    if not payload:
        return {"status": "skipped", "reason": "No data to update."}

    response = requests.post(url, auth=auth, json=payload)

    return response

from urllib.parse import urlparse

from urllib.parse import urlparse

def extract_slug_and_category(url):
    parsed_url = urlparse(url)
    slug = parsed_url.path.strip('/')
    parts = slug.split('/')
    slug = '/' + slug
    category = parts[0] if parts else ''
    return slug, category


def append_to_csv(file_path, row):
    """
    Appends a single value as a new row to a one-column CSV file.

    Args:
        file_path (str): Path to the CSV file.
        value (str): The value to append.
    """
    with open(file_path, "a", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(row)


def getDictFromFile(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.loads(f.read())


def saveJsonDataToFile(path, data):
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)


def is_valid_json(s):
    """
    Checks if the given string `s` is valid JSON.
    Returns (True, parsed_json) if valid, otherwise (False, error message).
    """
    try:
        parsed = json.loads(s)
        return True, parsed
    except json.JSONDecodeError as e:
        return False, str(e)


def call_llm(prompt):
    try:
        response = client.chat.completions.create(
            # model="gpt-4.1-nano-2025-04-14",
            model="gpt-4.1-mini-2025-04-14",
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant that returns structured JSON for relevance classification.",
                },
                {"role": "user", "content": prompt},
            ],
            temperature=0,
        )

        text = response.choices[0].message.content

        is_json, json_output = is_valid_json(text)

        return {"json_output": json_output, "is_json": is_json}

    except Exception as e:
        print("Error calling OpenAI LLM:", e)
        return {"relevant": "Unknown", "reason": f"Error: {e}"}


def count_words_in_slug(slug):
    if "_" in slug:
        return len(slug.split("_"))
    elif "-" in slug:
        return len(slug.split("-"))
    else:
        return 1 if slug else 0


def title_to_slug(title):
    # Normalize to remove accents (e.g., á -> a)
    title = (
        unicodedata.normalize("NFKD", title).encode("ascii", "ignore").decode("utf-8")
    )
    # Lowercase
    title = title.lower()
    # Replace any non-alphanumeric characters with hyphens
    title = re.sub(r"[^a-z0-9]+", "-", title)
    # Remove leading/trailing hyphens
    title = title.strip("-")
    return title


def count_path_segments(url):
    parsed = urlparse(url)
    # Remove leading/trailing slashes and split by "/"
    path_segments = [seg for seg in parsed.path.strip("/").split("/") if seg]
    return len(path_segments)


df_new = pd.read_csv("all_pages_by_date_GSC.csv")
df_all_urls = df_new.groupby("clean_url")["clicks"].sum().reset_index()
df_all_urls["clean_url"] = df_all_urls["clean_url"].astype(str)
url_to_clicks = dict(zip(df_all_urls["clean_url"], df_all_urls["clicks"]))


dict_all_blogs = getDictFromFile("blog_posts.json")
df_all_blogs = pd.DataFrame.from_dict(dict_all_blogs, orient="index")

df_all_blogs["slug_length"] = df_all_blogs["slug"].str.len()
df_all_blogs["title_length"] = df_all_blogs["title"].str.len()
df_all_blogs["meta_description_length"] = df_all_blogs["meta_description"].str.len()

# Apply to create the 'clicks' column
df_all_blogs["clicks"] = df_all_blogs["url"].apply(
    lambda url: url_to_clicks.get(url, 0)
)

df_all_blogs["path_segments"] = df_all_blogs["url"].apply(count_path_segments)
df_all_blogs = df_all_blogs.drop_duplicates(subset="blog_id")

filtered_df = df_all_blogs[
    (df_all_blogs["path_segments"] > 1)
    & (
        df_all_blogs["title"].notna()
        | (df_all_blogs["slug_length"] > 75)
        | (df_all_blogs["title_length"] > 70)
        | (df_all_blogs["meta_description_length"] > 150)
    )
]
# print(filtered_df.head(5))
filtered_index_list = filtered_df.index.tolist()
print("Blog Posts to Update", len(filtered_index_list), filtered_index_list)

try:
    processed_df = pd.read_csv("processed_indices.csv", header=None)
    first_column_name = processed_df.columns[0]
    processed_list = processed_df[first_column_name].tolist()

except:
    processed_list = list()

try:
    unprocessed_df = pd.read_csv("unprocessed_indices.csv", header=None)
    first_column_name = unprocessed_df.columns[0]
    unprocessed_indices = unprocessed_df[first_column_name].tolist()

except:
    unprocessed_indices = list()

print("Already Processed", processed_list)
print("Already UnProcessed", unprocessed_indices)


# Set your OpenAI API key
API_KEY = "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"  # Replace with your actual OpenAI API key
client = openai.OpenAI(api_key=API_KEY)


# for key, blog_post_data in dict_all_blogs.items():
for index in filtered_index_list:
    # index = "5078"

    print("Processing Index", index)

    if (int(index) not in processed_list) and (int(index) not in unprocessed_indices):

        blog_post_data = df_all_blogs.loc[index]

        url_blog = blog_post_data["url"]
        slug_blog = blog_post_data.get("slug", "")
        title_blog = blog_post_data.get("title", "")
        meta_description_blog = blog_post_data.get("meta_description", "")
        id_blog = blog_post_data.get("blog_id", None)
        content_blog = blog_post_data.get("content", "")
        clicks = blog_post_data.get("clicks", 0)

        path_segments = count_path_segments(url_blog)

        if (
            url_blog
            and slug_blog
            and content_blog
            and count_words_in_slug(slug_blog) > 3
            and path_segments > 1
        ):

            prompt = f"""
    You are an expert SEO assistant. I will provide you with a blog's **title** and **meta_description**. Your task is to **rewrite all elements to maximize SEO effectiveness** based on best practices.

    SEO Guidelines to follow:

    **Title Optimization:**
    - Must be under 53 characters.
    - Place the **primary focus keyword** within the **first 50%** of the title.
    - Use **power words** and **emotional triggers** to make it more engaging.
    - Include **numbers** if relevant to increase specificity and clickability.
    - Avoid clickbait but make it compelling enough to boost CTR.

    **Slug Optimization:**
    - Keep it under 63 characters.
    - Ensure it includes the focus keyword and is **lowercase**, **hyphen-separated**, and **free from stop words** (like “and”, “the”, etc.)
    - Make it concise, descriptive, and easily readable.

    **Meta Description Optimization:**
    - Between 75 and 135 characters.
    - Include the **primary focus keyword** naturally.
    - Write it in an informative and persuasive tone to encourage users to click.

    Here is the input:
    title: {title_blog}
    slug : {slug_blog}
    meta_description: {meta_description_blog}

    Return the optimized result in the following JSON format with keys:

    "title": "...",
    "meta_description": "..."

    """

            innerloopcount = 0
            while True:

                # Call your LLM here using the prompt
                json_output_llm = call_llm(prompt)
                # json_output_llm = {
                #     "json_output": {
                #         "title": "New Title",
                #         "meta_description": "New Meta Description",
                #     },
                #     "is_json": True,
                # }

                if (
                    json_output_llm.get("is_json") is True
                    and isinstance(json_output_llm.get("json_output"), dict)
                    and all(
                        key in json_output_llm["json_output"]
                        and json_output_llm["json_output"][key]
                        for key in ["title", "meta_description"]
                    )
                ):

                    break

                else:
                    ## TODO :- Should not get stuck here
                    print(
                        f"Retrying for URL after 3 sec: {index},{url_blog}",
                        json_output_llm,
                    )
                    time.sleep(
                        3
                    )  # Optional delay between retries to avoid hammering the API
                    innerloopcount = innerloopcount + 1

                    if innerloopcount > 3:
                        append_to_csv("unprocessed_indices.csv", [index])

                        unprocessed_indices.append(index)

                        break

            new_title = json_output_llm["json_output"]["title"]
            new_meta_description = json_output_llm["json_output"]["meta_description"]

            # if clicks > 10:
            #     new_slug = None
            # else:
            new_slug = title_to_slug(new_title)

            ######## Remove H1 tags

            # Find all <h1> tags with a class attribute
            soup = BeautifulSoup(content_blog, "html.parser")

            # Find all <h1> tags with a class attribute
            h1_tags_all = soup.find_all("h1")
            h1_tags_with_class = soup.find_all("h1", class_=True)
            # print(h1_tags_with_class)

            for h1 in h1_tags_with_class:
                # Create a new h2 tag with the same class and content
                h2 = soup.new_tag("h2", **{"class": h1.get("class")})
                h2.string = h1.string  # Only works if h1 contains just a text node

                # If h1 has nested elements (not just plain text), use .contents
                if not h1.string:
                    for child in h1.contents:
                        h2.append(child)

                # Replace the h1 tag with h2
                h1.replace_with(h2)

            # Get updated HTML
            updated_content = str(soup)

            # Find all <h1> tags
            h1_tags_updated = soup.find_all("h1")

            ### making sure there were classes with h1 tags + after update there is only one h1 tag

            if len(h1_tags_all) > 1:
                if len(h1_tags_with_class) > 0 and len(h1_tags_updated) == 1:
                    new_content = updated_content
                else:

                    append_to_csv("unprocessed_indices.csv", [index])

                    unprocessed_indices.append(index)

                    ## Dont update the blog
                    continue

            else:
                new_content = None

            ## Upload and keep track
            response = update_blog_post(
                site_url=site_url,
                username=username,
                app_password=app_password,
                post_id=int(id_blog),
                new_slug=new_slug,
                new_title=new_title,
                new_excerpt=new_meta_description,
                new_content=new_content,
            )

            if response.status_code == 200:
                append_to_csv("processed_indices.csv", [index, new_title, new_slug])

                processed_list.append(index)
                old_slug, category = extract_slug_and_category(url_blog)
                #print(f'1: {site_url}, 2: {new_title} , 3: {new_slug}, 4: {old_slug}, 5: {category}')
                add_redirect(old_slug, (f'/{category}/{new_slug}') )
                add_redirect(f'{old_slug}/', (f'/{category}/{new_slug}') )
            else:
                append_to_csv("failed_indices.csv", [index])
                unprocessed_indices.append(index)

            ## TODO :- Remove this for infinite loop
            #break
            # if int(index) > 5:
            #     break

        else:

            append_to_csv("unprocessed_indices.csv", [index])
            unprocessed_indices.append(index)