import openai
import requests
from requests.auth import HTTPBasicAuth
import json
import pandas as pd


def update_blog_post(
    site_url,
    username,
    app_password,
    post_id,
    new_slug=None,
    new_title=None,
    new_excerpt=None,
    new_content=None,
):
    url = f"{site_url}/wp-json/wp/v2/posts/{post_id}"
    auth = HTTPBasicAuth(username, app_password)

    # Build the payload only with non-None values
    payload = {}
    if new_slug is not None:
        payload["slug"] = new_slug
    if new_title is not None:
        payload["title"] = new_title
    if new_excerpt is not None:
        payload["excerpt"] = new_excerpt
    if new_content is not None:
        payload["content"] = new_content

    # If nothing to update, return early
    if not payload:
        return {"status": "skipped", "reason": "No data to update."}

    response = requests.post(url, auth=auth, json=payload)

    return response


def is_valid_json(s):
    """
    Checks if the given string `s` is valid JSON.
    Returns (True, parsed_json) if valid, otherwise (False, error message).
    """
    try:
        parsed = json.loads(s)
        return True, parsed
    except json.JSONDecodeError as e:
        return False, str(e)


def call_llm(prompt):
    try:
        response = client.chat.completions.create(
            # model="gpt-4.1-nano-2025-04-14",
            model="gpt-4.1-mini-2025-04-14",
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant that returns structured JSON for relevance classification.",
                },
                {"role": "user", "content": prompt},
            ],
            temperature=0,
        )

        text = response.choices[0].message.content

        is_json, json_output = is_valid_json(text)

        return {"json_output": json_output, "is_json": is_json}

    except Exception as e:
        print("Error calling OpenAI LLM:", e)
        return {"relevant": "Unknown", "reason": f"Error: {e}"}


def generate_new_title_description(
    original_slug, original_title, original_description, top_queries
):
    prompt = f"""
You are an SEO optimization expert. I will provide you with a blog's **title**, **meta description**, and **top 10 queries from Google Search Console** for which the blog is ranking.

Your task is to **rewrite the title and meta description** to maximize SEO performance, aligning with the provided GSC queries and current SEO best practices.

**Guidelines to follow:**

**Title Optimization:**
- Keep it under **53 characters**.
- Place the **primary keyword** in the **first half** of the title.
- Use **power words** and **emotional triggers** to increase engagement.
- Add **numbers** if appropriate to boost specificity and click-through rates.
- Avoid clickbait; ensure it’s compelling yet trustworthy.

**Meta Description Optimization:**
- Length should be **75–135 characters**.
- Mandatory to Use the **Top Queries from GSC** naturally.
- Make it **informative and persuasive** to drive clicks.

### Input:
- **Title:** {original_title}
- **Slug:** {original_slug}
- **Meta Description:** {original_description}
- **Top Queries from GSC:** {top_queries}

### Output (in JSON format):
{{ 
  "title": "...", 
  "meta_description": "..." 
}}
"""

    json_output_llm = call_llm(prompt)
    response = json_output_llm["json_output"]

    return response.get("title", None), response.get("meta_description", None)


def fetch_queries_for_the_blog(url):
    # Define the date range
    start_date = "2024-05-22"
    end_date = "2025-05-22"

    # Specific page URL to filter
    specific_page = url  # "https://farmonaut.com/australia/revolutionizing-australian-agribusiness-farmonauts-innovative-livestock-management-software-for-efficient-farming/"
    # All responses storage
    all_responses = []
    startRow = 0

    while (startRow == 0) or (startRow % 25000 == 0):
        request = {
            "startDate": start_date,
            "endDate": end_date,
            "dimensions": [
                "query",
                # "date",
                # You can change this to "date", "device", etc.
            ],
            "dimensionFilterGroups": [
                {
                    "filters": [
                        {
                            "dimension": "page",
                            "operator": "equals",
                            "expression": specific_page,
                        }
                    ]
                }
            ],
            "rowLimit": 25000,
            "dataState": "final",
            "startRow": startRow,
        }

        response = (
            webmasters_service.searchanalytics()
            .query(siteUrl=website, body=request)
            .execute()
        )

        # Break loop if no rows returned
        if "rows" not in response:
            break

        startRow = startRow + len(response["rows"])
        # print("Fetched up to " + str(startRow) + " rows of data")

        all_responses.extend(response["rows"])

    # Convert to DataFrame
    import pandas as pd

    if all_responses == []:
        return pd.DataFrame()

    df_filtered_123_123 = pd.DataFrame(all_responses)
    df_filtered_123_123["multiplication_ratio"] = (
        df_filtered_123_123["position"] * df_filtered_123_123["impressions"]
    )

    df_filtered_123_123 = df_filtered_123_123.sort_values(
        by="multiplication_ratio", ascending=False
    )

    return df_filtered_123_123


from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# Path to your OAuth 2.0 client ID file
CLIENT_SECRETS_FILE = "oauth_clientID.json"

# Define the required scopes
SCOPES = ["https://www.googleapis.com/auth/webmasters"]

# Run the OAuth flow to get credentials
flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRETS_FILE, SCOPES)
credentials = flow.run_local_server(port=0, open_browser=False)
#credentials = flow.run_console()
# Build the service object
service = build("webmasters", "v3", credentials=credentials)

print("GSC Authentication Process Completed")

webmasters_service = service

# Get a list of site in my Google Search Console Account
site_list = webmasters_service.sites().list().execute()
print(site_list)

# What is the website I want the sitemap for?
website = site_list["siteEntry"][0]["siteUrl"]

# Set your OpenAI API key
API_KEY = "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"  # Replace with your actual OpenAI API key
client = openai.OpenAI(api_key=API_KEY)


# Replace these with your actual credentials
site_url = "https://www.farmonaut.com"
username = "ankuromar296"
app_password = "Tjat A2hz 9XMv pXJi YbV0 GR8o"

file_name = "blogs_to_remove_123.csv"
df_blogs_to_remove = pd.read_csv(file_name)


for index, row in df_blogs_to_remove.iterrows():

    try:
        print(index)

        if row["updated"] == True:
            continue

        url = row["url_uncleaned"]
        original_title = row["title"]
        original_descripition = row["meta_description"]
        original_slug = row["slug"]
        blog_id = row["blog_id"]

        if url == None:
            continue

        if pd.isna(url):
            continue
        queries = fetch_queries_for_the_blog(url)
        top_queries = (
            queries["keys"]
            .head(10)  ## Top 10 queries
            .apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None)
            .to_list()
        )

        new_title, new_description = generate_new_title_description(
            original_slug, original_title, original_descripition, top_queries
        )

        update_blog_post(
            site_url,
            username,
            app_password,
            post_id=blog_id,
            new_title=new_title,
            new_excerpt=new_description,
        )

        # Mark the row as updated
        df_blogs_to_remove.at[index, "updated"] = True

        # Save the CSV after every update

        df_blogs_to_remove.to_csv(file_name, index=False)

        if index % 10 == 0:
            print(index, "processed")

        #break
    except Exception as e:
        print("Error at index:", index, e)
        continue