#!/usr/bin/env python3
"""
WordPress SEO Updater for Rank Math using GPT-4o-mini

This script reads a CSV/Excel file containing page URLs and their ranking queries,
generates a new SEO title and meta description using OpenAI's API, and updates
the corresponding post in WordPress via its REST API, targeting Rank Math's fields.

It logs its progress to an Excel file to avoid re-processing posts if re-run.
"""

import os
import re
import json
import requests
import pandas as pd
from openai import OpenAI
from datetime import datetime
from urllib.parse import urlparse

# --- CONFIGURATION ---
# --- Fill in these details before running the script ---

# WordPress Configuration
WP_BASE_URL = "https://farmonaut.com"  # e.g., "https://example.com"
WP_USERNAME = "ankuromar296"    # Your WordPress username
WP_APPLICATION_PASSWORD = "2fZW 42ln jhSG qPfm Kx3b hacO" # The Application Password you generated

# OpenAI Configuration
OPENAI_API_KEY = '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' # Your OpenAI API key

# File Configuration
INPUT_FILE = "gsc_analysis_report_20250610_024030.csv" # The file from the previous script
LOG_FILE = "updated_seo_log_rankmath.xlsx" # The file to store results and track progress

# --- END OF CONFIGURATION ---


def load_processed_urls(log_file):
    """Loads the URLs of posts that have already been processed."""
    if not os.path.exists(log_file):
        return set()
    try:
        df = pd.read_excel(log_file)
        return set(df['Page URL'].tolist())
    except Exception as e:
        print(f"⚠️ Warning: Could not read log file {log_file}. Starting fresh. Error: {e}")
        return set()

def get_slug_from_url(page_url):
    """
    Extracts the final slug from a full URL, intelligently ignoring categories 
    or parent paths in the permalink structure.
    """
    if not isinstance(page_url, str):
        return None
    try:
        path = urlparse(page_url).path
        
        # Split the path by '/' and filter out any empty strings that result
        # from leading/trailing slashes (e.g., '/blog/my-post/' -> ['', 'blog', 'my-post', ''])
        path_parts = [part for part in path.split('/') if part]
        
        # The actual slug is the very last non-empty part of the path.
        if path_parts:
            slug = path_parts[-1]
            return slug
        else:
            # This handles the case of the root URL "/" (the homepage)
            return None
    except Exception:
        # General catch-all for any other parsing errors
        return None

def get_post_id_from_slug(slug):
    """Finds the WordPress post ID for a given slug."""
    if not slug:
        return None
    
    # We ask for posts, but this can also find pages if the slug is unique
    api_url = f"{WP_BASE_URL}/wp-json/wp/v2/posts?slug={slug}"
    try:
        response = requests.get(api_url, timeout=10)
        response.raise_for_status()
        posts = response.json()
        if posts and isinstance(posts, list):
            return posts[0].get('id')
        else:
            # If not found in posts, check pages
            print(f"  -> Post not found for slug '{slug}'. Checking pages...")
            api_url_pages = f"{WP_BASE_URL}/wp-json/wp/v2/pages?slug={slug}"
            response_pages = requests.get(api_url_pages, timeout=10)
            response_pages.raise_for_status()
            pages = response_pages.json()
            if pages and isinstance(pages, list):
                return pages[0].get('id')

            print(f"  -> ❌ Post or Page not found for slug: {slug}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"  -> ❌ Error fetching post/page ID for slug '{slug}': {e}")
        return None

def generate_seo_content(slug, queries, client):
    """Generates SEO title and meta description using OpenAI."""
    print(f"  -> 🤖 Generating SEO content for slug: {slug}...")
    
    prompt = f"""
You are an expert SEO copywriter for the Rank Math plugin. Your task is to write a compelling SEO title and meta description for a blog post.

**Context:**
- The post is already ranking for these unique user queries: `{queries}`

**Instructions:**
1.  **SEO Title:** Create a concise, click-worthy title. It MUST be under 50 characters. Incorporate the primary theme from the queries.
2.  **Meta Description:** Write an engaging meta description that encourages clicks. It MUST be under 150 characters. Summarize the content and include a call-to-action if appropriate.
3.  **Format:** Return your response as a single, valid JSON object with two keys: "title" and "description". Do not include any other text or explanations.

Example response:
{{
  "title": "Your New SEO Title Here",
  "description": "Your new, compelling meta description goes in this spot."
}}
"""
    try:
        response = client.chat.completions.create(
            model="gpt-4.1-mini-2025-04-14",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            response_format={"type": "json_object"}
        )
        content = response.choices[0].message.content
        seo_data = json.loads(content)

        if "title" in seo_data and "description" in seo_data:
            print("  -> ✅ SEO content generated successfully.")
            return seo_data
        else:
            print("  -> ❌ Generated JSON is missing 'title' or 'description' key.")
            return None

    except Exception as e:
        print(f"  -> ❌ An error occurred with OpenAI API: {e}")
        return None

# --- MODIFIED FUNCTION FOR RANK MATH ---
def update_rank_math_seo(post_id, title, description):
    """Updates the Rank Math SEO title and meta description for a given post ID."""
    api_url = f"{WP_BASE_URL}/wp-json/wp/v2/posts/{post_id}"
    
    # This payload is specific to Rank Math's REST API fields.
    # It updates the post's meta fields directly.
    # payload = {
    #     "meta": {
    #         "rank_math_title": title,
    #         "rank_math_description": description
    #     }
    # }
    payload = {

            "title": title,
            "excerpt": description
    }
    
    headers = {'Content-Type': 'application/json'}
    
    try:
        print(f"  -> ⬆️  Updating post ID: {post_id} with Rank Math data...")
        response = requests.post(
            api_url,
            auth=(WP_USERNAME, WP_APPLICATION_PASSWORD),
            headers=headers,
            json=payload,
            timeout=15
        )
        response.raise_for_status()
        print("  -> ✅ Post updated successfully on WordPress.")
        return True
    except requests.exceptions.RequestException as e:
        error_message = str(e)
        if response is not None and response.text:
            error_message += f"\n  -> Server response: {response.text}"
        
        # Check if the error is due to an invalid post type (e.g., trying to update a page via the /posts endpoint)
        if "rest_post_invalid_id" in error_message:
             print("  -> Retrying with /pages endpoint...")
             api_url = f"{WP_BASE_URL}/wp-json/wp/v2/pages/{post_id}"
             try:
                response = requests.post(
                    api_url,
                    auth=(WP_USERNAME, WP_APPLICATION_PASSWORD),
                    headers=headers,
                    json=payload,
                    timeout=15
                )
                response.raise_for_status()
                print("  -> ✅ Page updated successfully on WordPress.")
                return True
             except requests.exceptions.RequestException as page_e:
                print(f"  -> ❌ Failed to update page ID {post_id} as well. Error: {page_e}")
                if response is not None and response.text:
                    print(f"  -> Server response: {response.text}")
                return False
        else:
            print(f"  -> ❌ Failed to update WordPress post ID {post_id}. Error: {error_message}")
            return False


def main():
    """Main function to run the SEO update process."""
    print("🚀 Starting WordPress SEO Updater for Rank Math")

    # --- Initial Checks ---
    if "your-website.com" in WP_BASE_URL:
        print("❌ CRITICAL: Please update 'WP_BASE_URL' in the configuration.")
        return
    if not os.path.exists(INPUT_FILE):
        print(f"❌ CRITICAL: Input file '{INPUT_FILE}' not found. Please check the 'File Configuration'.")
        return

    # Initialize OpenAI client
    try:
        client = OpenAI(api_key=OPENAI_API_KEY)
    except Exception as e:
        print(f"❌ CRITICAL: Failed to initialize OpenAI client. Check your API key. Error: {e}")
        return

    # --- Load Data and Progress ---
    processed_urls = load_processed_urls(LOG_FILE)
    print(f"📖 Found {len(processed_urls)} previously processed URLs in '{LOG_FILE}'.")
    
    try:
        df = pd.read_csv(INPUT_FILE) if INPUT_FILE.endswith('.csv') else pd.read_excel(INPUT_FILE)
    except Exception as e:
        print(f"❌ CRITICAL: Could not read input file '{INPUT_FILE}'. Error: {e}")
        return

    # Check for required columns
    required_cols = ['Page URL', 'Unique Queries']
    if not all(col in df.columns for col in required_cols):
        print(f"❌ CRITICAL: Input file must contain the columns: {required_cols}")
        return

    # --- Main Processing Loop ---
    newly_processed_data = []
    for index, row in df.iterrows():
        page_url = row['Page URL']
        unique_queries = row['Unique Queries']

        print("-" * 50)
        print(f"Processing row {index + 1}/{len(df)}: {page_url}")

        if page_url in processed_urls:
            print("  -> ⏭️  Skipping, already processed.")
            continue
        
        if pd.isna(unique_queries) or not str(unique_queries).strip():
            print("  -> ⏭️  Skipping, no queries to process.")
            continue

        slug = get_slug_from_url(page_url)
        if not slug:
            print("  -> ⏭️  Skipping, could not extract slug from URL.")
            continue

        seo_data = generate_seo_content(slug, unique_queries, client)
        if not seo_data:
            continue

        new_title = seo_data['title']
        new_description = seo_data['description']
        print(f"  -> New Title: {new_title}")
        print(f"  -> New Description: {new_description}")

        post_id = get_post_id_from_slug(slug)
        if not post_id:
            continue

        # Use the Rank Math specific update function
        update_successful = update_rank_math_seo(post_id, new_title, new_description)

        if update_successful:
            newly_processed_data.append({
                "Page URL": page_url,
                "New Title": new_title,
                "New Description": new_description,
                "Date Updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            })

            # Save progress periodically
            if newly_processed_data:
                log_df_new = pd.DataFrame(newly_processed_data)
                log_df_old = pd.read_excel(LOG_FILE) if os.path.exists(LOG_FILE) else pd.DataFrame()
                log_df_combined = pd.concat([log_df_old, log_df_new]).drop_duplicates(subset=['Page URL'], keep='last')
                log_df_combined.to_excel(LOG_FILE, index=False)
                print(f"  -> ✅ Progress saved to '{LOG_FILE}'.")
                # Clear the list for the next periodic save
                newly_processed_data = []


    print("-" * 50)
    print("✅ All posts processed. Script finished.")

if __name__ == "__main__":
    main()