#!/usr/bin/env python3 """ WordPress SEO Updater for Rank Math using GPT-4o-mini This script reads a CSV/Excel file containing page URLs and their ranking queries, generates a new SEO title and meta description using OpenAI's API, and updates the corresponding post in WordPress via its REST API, targeting Rank Math's fields. It logs its progress to an Excel file to avoid re-processing posts if re-run. Now includes tracking of failed updates and graceful handling of API timeouts. """ import os import re import json import requests import pandas as pd from openai import OpenAI from datetime import datetime from urllib.parse import urlparse import time # --- CONFIGURATION --- # --- Fill in these details before running the script --- # WordPress Configuration WP_BASE_URL = "https://farmonaut.com" # e.g., "https://example.com" WP_USERNAME = "ankuromar296" # Your WordPress username WP_APPLICATION_PASSWORD = "2fZW 42ln jhSG qPfm Kx3b hacO" # The Application Password you generated # OpenAI Configuration OPENAI_API_KEY = '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' # Your OpenAI API key # File Configuration INPUT_FILE = "gsc_analysis_report_20250610_024030.csv" # The file from the previous script LOG_FILE = "updated_seo_log_rankmath.xlsx" # The file to store results and track progress FAILED_LOG_FILE = "failed_updates_log.xlsx" # File to track failed updates # --- END OF CONFIGURATION --- def load_processed_urls(log_file): """Loads the URLs of posts that have already been processed.""" if not os.path.exists(log_file): return set() try: df = pd.read_excel(log_file) return set(df['Page URL'].tolist()) except Exception as e: print(f"⚠️ Warning: Could not read log file {log_file}. Starting fresh. Error: {e}") return set() def load_failed_urls(failed_log_file): """Loads the URLs of posts that have failed to update and should be skipped.""" if not os.path.exists(failed_log_file): return set() try: df = pd.read_excel(failed_log_file) return set(df['Page URL'].tolist()) except Exception as e: print(f"⚠️ Warning: Could not read failed log file {failed_log_file}. Error: {e}") return set() def save_failed_update(page_url, reason, failed_log_file): """Saves a failed update record to the failed log file.""" try: # Load existing failed records if os.path.exists(failed_log_file): failed_df = pd.read_excel(failed_log_file) else: failed_df = pd.DataFrame(columns=['Page URL', 'Failure Reason', 'Date Failed', 'Attempts']) # Check if this URL already exists in failed records if page_url in failed_df['Page URL'].values: # Update existing record mask = failed_df['Page URL'] == page_url failed_df.loc[mask, 'Failure Reason'] = reason failed_df.loc[mask, 'Date Failed'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") failed_df.loc[mask, 'Attempts'] = failed_df.loc[mask, 'Attempts'] + 1 else: # Add new failed record new_record = pd.DataFrame({ 'Page URL': [page_url], 'Failure Reason': [reason], 'Date Failed': [datetime.now().strftime("%Y-%m-%d %H:%M:%S")], 'Attempts': [1] }) failed_df = pd.concat([failed_df, new_record], ignore_index=True) # Save to file failed_df.to_excel(failed_log_file, index=False) print(f" -> 📝 Failed update logged to '{failed_log_file}'.") except Exception as e: print(f" -> ⚠️ Warning: Could not save failed update log. Error: {e}") def get_slug_from_url(page_url): """ Extracts the final slug from a full URL, intelligently ignoring categories or parent paths in the permalink structure. """ if not isinstance(page_url, str): return None try: path = urlparse(page_url).path # Split the path by '/' and filter out any empty strings that result # from leading/trailing slashes (e.g., '/blog/my-post/' -> ['', 'blog', 'my-post', '']) path_parts = [part for part in path.split('/') if part] # The actual slug is the very last non-empty part of the path. if path_parts: slug = path_parts[-1] return slug else: # This handles the case of the root URL "/" (the homepage) return None except Exception: # General catch-all for any other parsing errors return None def get_post_id_from_slug(slug, page_url): """Finds the WordPress post ID for a given slug.""" if not slug: save_failed_update(page_url, "Could not extract slug from URL", FAILED_LOG_FILE) return None try: # We ask for posts, but this can also find pages if the slug is unique api_url = f"{WP_BASE_URL}/wp-json/wp/v2/posts?slug={slug}" response = requests.get(api_url, timeout=15) response.raise_for_status() posts = response.json() if posts and isinstance(posts, list): return posts[0].get('id') else: # If not found in posts, check pages print(f" -> Post not found for slug '{slug}'. Checking pages...") api_url_pages = f"{WP_BASE_URL}/wp-json/wp/v2/pages?slug={slug}" response_pages = requests.get(api_url_pages, timeout=15) response_pages.raise_for_status() pages = response_pages.json() if pages and isinstance(pages, list): return pages[0].get('id') print(f" -> ❌ Post or Page not found for slug: {slug}") save_failed_update(page_url, f"Post/Page not found for slug: {slug}", FAILED_LOG_FILE) return None except requests.exceptions.Timeout: print(f" -> ⏰ Timeout while fetching post/page ID for slug '{slug}'") save_failed_update(page_url, f"Timeout while fetching post/page ID for slug: {slug}", FAILED_LOG_FILE) return None except requests.exceptions.RequestException as e: print(f" -> ❌ Error fetching post/page ID for slug '{slug}': {e}") save_failed_update(page_url, f"API error while fetching post/page ID: {str(e)}", FAILED_LOG_FILE) return None def generate_seo_content(slug, queries, client, page_url): """Generates SEO title and meta description using OpenAI.""" print(f" -> 🤖 Generating SEO content for slug: {slug}...") prompt = f""" You are an expert SEO copywriter for the Farmonaut - an ai spacetech agritech company. Your task is to write a compelling SEO title and meta description for a blog post. **Context:** - The post is already ranking for these unique user queries: `{queries}` - Current Slug: {slug} **Instructions:** 1. **SEO Title:** Create a concise, click-worthy title. It MUST be under 55 characters. Incorporate the primary theme from the queries. 2. **Meta Description:** Write an engaging meta description that encourages clicks. It MUST be under 160 characters. Summarize the content and include a call-to-action if appropriate. 3. **Format:** Return your response as a single, valid JSON object with two keys: "title" and "description". Do not include any other text or explanations. 4. **Important:** Mandatorily ensure to fit in as many keywords/ queries within the title and description. Example response: {{ "title": "Your New SEO Title Here", "description": "Your new, compelling meta description goes in this spot." }} """ try: response = client.chat.completions.create( model="gpt-4.1-mini-2025-04-14", messages=[{"role": "user", "content": prompt}], temperature=1, response_format={"type": "json_object"} ) content = response.choices[0].message.content seo_data = json.loads(content) if "title" in seo_data and "description" in seo_data: print(" -> ✅ SEO content generated successfully.") return seo_data else: print(" -> ❌ Generated JSON is missing 'title' or 'description' key.") save_failed_update(page_url, "OpenAI generated invalid JSON format", FAILED_LOG_FILE) return None except Exception as e: print(f" -> ❌ An error occurred with OpenAI API: {e}") save_failed_update(page_url, f"OpenAI API error: {str(e)}", FAILED_LOG_FILE) return None # --- MODIFIED FUNCTION FOR RANK MATH --- def update_rank_math_seo(post_id, title, description, page_url): """Updates the Rank Math SEO title and meta description for a given post ID.""" api_url = f"{WP_BASE_URL}/wp-json/wp/v2/posts/{post_id}" # This payload is specific to Rank Math's REST API fields. # It updates the post's meta fields directly. # payload = { # "meta": { # "rank_math_title": title, # "rank_math_description": description # } # } payload = { "title": title, "excerpt": description } headers = {'Content-Type': 'application/json'} try: print(f" -> ⬆️ Updating post ID: {post_id} with Rank Math data...") response = requests.post( api_url, auth=(WP_USERNAME, WP_APPLICATION_PASSWORD), headers=headers, json=payload, timeout=30 # Increased timeout ) response.raise_for_status() print(" -> ✅ Post updated successfully on WordPress.") return True except requests.exceptions.Timeout: print(f" -> ⏰ Timeout while updating post ID {post_id}") save_failed_update(page_url, f"Timeout while updating post ID: {post_id}", FAILED_LOG_FILE) return False except requests.exceptions.RequestException as e: error_message = str(e) if 'response' in locals() and response is not None and response.text: error_message += f"\n -> Server response: {response.text}" # Check if the error is due to an invalid post type (e.g., trying to update a page via the /posts endpoint) if "rest_post_invalid_id" in error_message: print(" -> Retrying with /pages endpoint...") api_url = f"{WP_BASE_URL}/wp-json/wp/v2/pages/{post_id}" try: response = requests.post( api_url, auth=(WP_USERNAME, WP_APPLICATION_PASSWORD), headers=headers, json=payload, timeout=30 ) response.raise_for_status() print(" -> ✅ Page updated successfully on WordPress.") return True except requests.exceptions.Timeout: print(f" -> ⏰ Timeout while updating page ID {post_id}") save_failed_update(page_url, f"Timeout while updating page ID: {post_id}", FAILED_LOG_FILE) return False except requests.exceptions.RequestException as page_e: print(f" -> ❌ Failed to update page ID {post_id} as well. Error: {page_e}") if 'response' in locals() and response is not None and response.text: print(f" -> Server response: {response.text}") save_failed_update(page_url, f"Failed to update both post and page ID {post_id}: {str(page_e)}", FAILED_LOG_FILE) return False else: print(f" -> ❌ Failed to update WordPress post ID {post_id}. Error: {error_message}") save_failed_update(page_url, f"Failed to update post ID {post_id}: {error_message}", FAILED_LOG_FILE) return False def main(): """Main function to run the SEO update process.""" print("🚀 Starting WordPress SEO Updater for Rank Math") # --- Initial Checks --- if "your-website.com" in WP_BASE_URL: print("❌ CRITICAL: Please update 'WP_BASE_URL' in the configuration.") return if not os.path.exists(INPUT_FILE): print(f"❌ CRITICAL: Input file '{INPUT_FILE}' not found. Please check the 'File Configuration'.") return # Initialize OpenAI client try: client = OpenAI(api_key=OPENAI_API_KEY) except Exception as e: print(f"❌ CRITICAL: Failed to initialize OpenAI client. Check your API key. Error: {e}") return # --- Load Data and Progress --- processed_urls = load_processed_urls(LOG_FILE) failed_urls = load_failed_urls(FAILED_LOG_FILE) print(f"📖 Found {len(processed_urls)} previously processed URLs in '{LOG_FILE}'.") print(f"⚠️ Found {len(failed_urls)} previously failed URLs in '{FAILED_LOG_FILE}' (will be skipped).") try: df = pd.read_csv(INPUT_FILE) if INPUT_FILE.endswith('.csv') else pd.read_excel(INPUT_FILE) except Exception as e: print(f"❌ CRITICAL: Could not read input file '{INPUT_FILE}'. Error: {e}") return # Check for required columns required_cols = ['Page URL', 'Unique Queries'] if not all(col in df.columns for col in required_cols): print(f"❌ CRITICAL: Input file must contain the columns: {required_cols}") return # --- Main Processing Loop --- newly_processed_data = [] total_processed = 0 total_skipped_already_done = 0 total_skipped_failed = 0 total_successful = 0 total_failed_new = 0 for index, row in df.iterrows(): page_url = row['Page URL'] unique_queries = row['Unique Queries'] print("-" * 50) print(f"Processing row {index + 1}/{len(df)}: {page_url}") total_processed += 1 if page_url in processed_urls: print(" -> ⏭️ Skipping, already processed successfully.") total_skipped_already_done += 1 continue if page_url in failed_urls: print(" -> ⏭️ Skipping, previously failed (check failed_updates_log.xlsx).") total_skipped_failed += 1 continue if pd.isna(unique_queries) or not str(unique_queries).strip(): print(" -> ⏭️ Skipping, no queries to process.") save_failed_update(page_url, "No queries provided", FAILED_LOG_FILE) continue slug = get_slug_from_url(page_url) if not slug: print(" -> ⏭️ Skipping, could not extract slug from URL.") continue seo_data = generate_seo_content(slug, unique_queries, client, page_url) if not seo_data: total_failed_new += 1 continue new_title = seo_data['title'] new_description = seo_data['description'] print(f" -> New Title: {new_title}") print(f" -> New Description: {new_description}") post_id = get_post_id_from_slug(slug, page_url) if not post_id: total_failed_new += 1 continue # Use the Rank Math specific update function update_successful = update_rank_math_seo(post_id, new_title, new_description, page_url) if update_successful: total_successful += 1 newly_processed_data.append({ "Page URL": page_url, "New Title": new_title, "New Description": new_description, "Date Updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S") }) # Save progress every 5 successful updates if len(newly_processed_data) >= 5: try: log_df_new = pd.DataFrame(newly_processed_data) log_df_old = pd.read_excel(LOG_FILE) if os.path.exists(LOG_FILE) else pd.DataFrame() log_df_combined = pd.concat([log_df_old, log_df_new]).drop_duplicates(subset=['Page URL'], keep='last') log_df_combined.to_excel(LOG_FILE, index=False) print(f" -> ✅ Progress saved to '{LOG_FILE}' (batch of {len(newly_processed_data)} updates).") newly_processed_data = [] # Clear the list after saving except Exception as e: print(f" -> ⚠️ Warning: Could not save progress. Error: {e}") else: total_failed_new += 1 # Add a small delay to avoid overwhelming the API time.sleep(1) # Save any remaining processed data if newly_processed_data: try: log_df_new = pd.DataFrame(newly_processed_data) log_df_old = pd.read_excel(LOG_FILE) if os.path.exists(LOG_FILE) else pd.DataFrame() log_df_combined = pd.concat([log_df_old, log_df_new]).drop_duplicates(subset=['Page URL'], keep='last') log_df_combined.to_excel(LOG_FILE, index=False) print(f" -> ✅ Final progress saved to '{LOG_FILE}'.") except Exception as e: print(f" -> ⚠️ Warning: Could not save final progress. Error: {e}") print("-" * 50) print("📊 SUMMARY:") print(f" Total URLs processed: {total_processed}") print(f" Already completed (skipped): {total_skipped_already_done}") print(f" Previously failed (skipped): {total_skipped_failed}") print(f" Successful updates: {total_successful}") print(f" New failures: {total_failed_new}") print(f" Success rate: {(total_successful / max(total_processed - total_skipped_already_done - total_skipped_failed, 1)) * 100:.1f}%") print("✅ Script finished. Check logs for details.") print(f" - Success log: {LOG_FILE}") print(f" - Failed updates log: {FAILED_LOG_FILE}") if __name__ == "__main__": main()