#!/usr/bin/env python3 """ WordPress SEO Updater for Rank Math using GPT-4o-mini This script reads a CSV/Excel file containing page URLs and their ranking queries, generates a new SEO title and meta description using OpenAI's API, and updates the corresponding post in WordPress via its REST API, targeting Rank Math's fields. It logs its progress to an Excel file to avoid re-processing posts if re-run. """ import os import re import json import requests import pandas as pd from openai import OpenAI from datetime import datetime from urllib.parse import urlparse # --- CONFIGURATION --- # --- Fill in these details before running the script --- # WordPress Configuration WP_BASE_URL = "https://farmonaut.com" # e.g., "https://example.com" WP_USERNAME = "ankuromar296" # Your WordPress username WP_APPLICATION_PASSWORD = "2fZW 42ln jhSG qPfm Kx3b hacO" # The Application Password you generated # OpenAI Configuration OPENAI_API_KEY = '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' # Your OpenAI API key # File Configuration INPUT_FILE = "gsc_analysis_report_20250610_024030.csv" # The file from the previous script LOG_FILE = "updated_seo_log_rankmath.xlsx" # The file to store results and track progress # --- END OF CONFIGURATION --- def load_processed_urls(log_file): """Loads the URLs of posts that have already been processed.""" if not os.path.exists(log_file): return set() try: df = pd.read_excel(log_file) return set(df['Page URL'].tolist()) except Exception as e: print(f"⚠️ Warning: Could not read log file {log_file}. Starting fresh. Error: {e}") return set() def get_slug_from_url(page_url): """ Extracts the final slug from a full URL, intelligently ignoring categories or parent paths in the permalink structure. """ if not isinstance(page_url, str): return None try: path = urlparse(page_url).path # Split the path by '/' and filter out any empty strings that result # from leading/trailing slashes (e.g., '/blog/my-post/' -> ['', 'blog', 'my-post', '']) path_parts = [part for part in path.split('/') if part] # The actual slug is the very last non-empty part of the path. if path_parts: slug = path_parts[-1] return slug else: # This handles the case of the root URL "/" (the homepage) return None except Exception: # General catch-all for any other parsing errors return None def get_post_id_from_slug(slug): """Finds the WordPress post ID for a given slug.""" if not slug: return None # We ask for posts, but this can also find pages if the slug is unique api_url = f"{WP_BASE_URL}/wp-json/wp/v2/posts?slug={slug}" try: response = requests.get(api_url, timeout=10) response.raise_for_status() posts = response.json() if posts and isinstance(posts, list): return posts[0].get('id') else: # If not found in posts, check pages print(f" -> Post not found for slug '{slug}'. Checking pages...") api_url_pages = f"{WP_BASE_URL}/wp-json/wp/v2/pages?slug={slug}" response_pages = requests.get(api_url_pages, timeout=10) response_pages.raise_for_status() pages = response_pages.json() if pages and isinstance(pages, list): return pages[0].get('id') print(f" -> ❌ Post or Page not found for slug: {slug}") return None except requests.exceptions.RequestException as e: print(f" -> ❌ Error fetching post/page ID for slug '{slug}': {e}") return None def generate_seo_content(slug, queries, client): """Generates SEO title and meta description using OpenAI.""" print(f" -> 🤖 Generating SEO content for slug: {slug}...") prompt = f""" You are an expert SEO copywriter for the Rank Math plugin. Your task is to write a compelling SEO title and meta description for a blog post. **Context:** - The post is already ranking for these unique user queries: `{queries}` **Instructions:** 1. **SEO Title:** Create a concise, click-worthy title. It MUST be under 50 characters. Incorporate the primary theme from the queries. 2. **Meta Description:** Write an engaging meta description that encourages clicks. It MUST be under 150 characters. Summarize the content and include a call-to-action if appropriate. 3. **Format:** Return your response as a single, valid JSON object with two keys: "title" and "description". Do not include any other text or explanations. Example response: {{ "title": "Your New SEO Title Here", "description": "Your new, compelling meta description goes in this spot." }} """ try: response = client.chat.completions.create( model="gpt-4.1-mini-2025-04-14", messages=[{"role": "user", "content": prompt}], temperature=0.7, response_format={"type": "json_object"} ) content = response.choices[0].message.content seo_data = json.loads(content) if "title" in seo_data and "description" in seo_data: print(" -> ✅ SEO content generated successfully.") return seo_data else: print(" -> ❌ Generated JSON is missing 'title' or 'description' key.") return None except Exception as e: print(f" -> ❌ An error occurred with OpenAI API: {e}") return None # --- MODIFIED FUNCTION FOR RANK MATH --- def update_rank_math_seo(post_id, title, description): """Updates the Rank Math SEO title and meta description for a given post ID.""" api_url = f"{WP_BASE_URL}/wp-json/wp/v2/posts/{post_id}" # This payload is specific to Rank Math's REST API fields. # It updates the post's meta fields directly. # payload = { # "meta": { # "rank_math_title": title, # "rank_math_description": description # } # } payload = { "title": title, "excerpt": description } headers = {'Content-Type': 'application/json'} try: print(f" -> ⬆️ Updating post ID: {post_id} with Rank Math data...") response = requests.post( api_url, auth=(WP_USERNAME, WP_APPLICATION_PASSWORD), headers=headers, json=payload, timeout=15 ) response.raise_for_status() print(" -> ✅ Post updated successfully on WordPress.") return True except requests.exceptions.RequestException as e: error_message = str(e) if response is not None and response.text: error_message += f"\n -> Server response: {response.text}" # Check if the error is due to an invalid post type (e.g., trying to update a page via the /posts endpoint) if "rest_post_invalid_id" in error_message: print(" -> Retrying with /pages endpoint...") api_url = f"{WP_BASE_URL}/wp-json/wp/v2/pages/{post_id}" try: response = requests.post( api_url, auth=(WP_USERNAME, WP_APPLICATION_PASSWORD), headers=headers, json=payload, timeout=15 ) response.raise_for_status() print(" -> ✅ Page updated successfully on WordPress.") return True except requests.exceptions.RequestException as page_e: print(f" -> ❌ Failed to update page ID {post_id} as well. Error: {page_e}") if response is not None and response.text: print(f" -> Server response: {response.text}") return False else: print(f" -> ❌ Failed to update WordPress post ID {post_id}. Error: {error_message}") return False def main(): """Main function to run the SEO update process.""" print("🚀 Starting WordPress SEO Updater for Rank Math") # --- Initial Checks --- if "your-website.com" in WP_BASE_URL: print("❌ CRITICAL: Please update 'WP_BASE_URL' in the configuration.") return if not os.path.exists(INPUT_FILE): print(f"❌ CRITICAL: Input file '{INPUT_FILE}' not found. Please check the 'File Configuration'.") return # Initialize OpenAI client try: client = OpenAI(api_key=OPENAI_API_KEY) except Exception as e: print(f"❌ CRITICAL: Failed to initialize OpenAI client. Check your API key. Error: {e}") return # --- Load Data and Progress --- processed_urls = load_processed_urls(LOG_FILE) print(f"📖 Found {len(processed_urls)} previously processed URLs in '{LOG_FILE}'.") try: df = pd.read_csv(INPUT_FILE) if INPUT_FILE.endswith('.csv') else pd.read_excel(INPUT_FILE) except Exception as e: print(f"❌ CRITICAL: Could not read input file '{INPUT_FILE}'. Error: {e}") return # Check for required columns required_cols = ['Page URL', 'Unique Queries'] if not all(col in df.columns for col in required_cols): print(f"❌ CRITICAL: Input file must contain the columns: {required_cols}") return # --- Main Processing Loop --- newly_processed_data = [] for index, row in df.iterrows(): page_url = row['Page URL'] unique_queries = row['Unique Queries'] print("-" * 50) print(f"Processing row {index + 1}/{len(df)}: {page_url}") if page_url in processed_urls: print(" -> ⏭️ Skipping, already processed.") continue if pd.isna(unique_queries) or not str(unique_queries).strip(): print(" -> ⏭️ Skipping, no queries to process.") continue slug = get_slug_from_url(page_url) if not slug: print(" -> ⏭️ Skipping, could not extract slug from URL.") continue seo_data = generate_seo_content(slug, unique_queries, client) if not seo_data: continue new_title = seo_data['title'] new_description = seo_data['description'] print(f" -> New Title: {new_title}") print(f" -> New Description: {new_description}") post_id = get_post_id_from_slug(slug) if not post_id: continue # Use the Rank Math specific update function update_successful = update_rank_math_seo(post_id, new_title, new_description) if update_successful: newly_processed_data.append({ "Page URL": page_url, "New Title": new_title, "New Description": new_description, "Date Updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S") }) # Save progress periodically if newly_processed_data: log_df_new = pd.DataFrame(newly_processed_data) log_df_old = pd.read_excel(LOG_FILE) if os.path.exists(LOG_FILE) else pd.DataFrame() log_df_combined = pd.concat([log_df_old, log_df_new]).drop_duplicates(subset=['Page URL'], keep='last') log_df_combined.to_excel(LOG_FILE, index=False) print(f" -> ✅ Progress saved to '{LOG_FILE}'.") # Clear the list for the next periodic save newly_processed_data = [] print("-" * 50) print("✅ All posts processed. Script finished.") if __name__ == "__main__": main()