import pandas as pd import requests import json import time import logging from urllib.parse import urlparse, urljoin from datetime import datetime import os import openai class WordPressSEOUpdater: def __init__(self, wordpress_url, username, app_password, openai_api_key=None, history_file='processed_posts.json'): """ Initialize the WordPress SEO updater for RankMath Args: wordpress_url: Your WordPress site URL (e.g., 'https://yoursite.com') username: WordPress username app_password: WordPress application password (not regular password) openai_api_key: OpenAI API key for AI-powered SEO optimization history_file: File to store history of processed post IDs """ self.wordpress_url = wordpress_url.rstrip('/') self.api_base = f"{self.wordpress_url}/wp-json/wp/v2" self.username = username self.app_password = app_password self.session = requests.Session() self.session.auth = (username, app_password) self.history_file = history_file # Initialize OpenAI client if API key is provided self.openai_client = None self.use_openai = False if openai_api_key: try: self.openai_client = openai.OpenAI(api_key=openai_api_key) self.use_openai = True self.logger = logging.getLogger(__name__) self.logger.info("āœ… OpenAI API initialized successfully") except Exception as e: self.logger = logging.getLogger(__name__) self.logger.warning(f"āš ļø OpenAI API initialization failed: {e}. Falling back to basic optimization.") # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('wordpress_seo_update.log'), logging.StreamHandler() ] ) self.logger = logging.getLogger(__name__) # Lists to track results self.updated_posts = [] self.failed_posts = [] self.not_found_urls = [] self.skipped_posts = [] # Load processed posts history self.processed_posts = self.load_processed_posts() def load_processed_posts(self): """Load the history of processed post IDs from the history file""" try: if os.path.exists(self.history_file): with open(self.history_file, 'r') as f: data = json.load(f) self.logger.info(f"Loaded {len(data)} previously processed posts from {self.history_file}") return set(data.keys()) # Store post IDs as a set for O(1) lookup return set() except Exception as e: self.logger.warning(f"Failed to load processed posts history: {e}") return set() def save_processed_post(self, post_id): """Save a post ID to the history file with timestamp""" try: data = {} if os.path.exists(self.history_file): with open(self.history_file, 'r') as f: data = json.load(f) data[str(post_id)] = { 'last_updated': datetime.now().isoformat(), 'post_id': post_id } with open(self.history_file, 'w') as f: json.dump(data, f, indent=2) self.logger.info(f"Saved post ID {post_id} to history file") except Exception as e: self.logger.error(f"Failed to save post ID {post_id} to history: {e}") def test_connection(self): """Test WordPress API connection""" try: response = self.session.get(f"{self.api_base}/users/me") if response.status_code == 200: user_data = response.json() self.logger.info(f"Successfully connected to WordPress as: {user_data.get('name', 'Unknown')}") return True else: self.logger.error(f"Connection failed. Status: {response.status_code}, Response: {response.text}") return False except Exception as e: self.logger.error(f"Connection test failed: {str(e)}") return False def extract_post_slug_from_url(self, url): """ Extract post slug from WordPress URL Args: url: Full WordPress post URL """ try: parsed_url = urlparse(url) path = parsed_url.path.strip('/') # Handle different WordPress permalink structures path_parts = path.split('/') # Get the last non-empty part as slug slug = None for part in reversed(path_parts): if part and not part.isdigit(): slug = part break return slug except Exception as e: self.logger.error(f"Error extracting slug from URL {url}: {str(e)}") return None def get_post_by_slug(self, slug): """ Get WordPress post by slug Args: slug: Post slug """ try: response = self.session.get(f"{self.api_base}/posts", params={'slug': slug}) if response.status_code == 200: posts = response.json() if posts: return posts[0], 'post' response = self.session.get(f"{self.api_base}/pages", params={'slug': slug}) if response.status_code == 200: pages = response.json() if pages: return pages[0], 'page' return None, None except Exception as e: self.logger.error(f"Error getting post by slug {slug}: {str(e)}") return None, None def get_rankmath_meta(self, post_id, post_type='post'): """ Get RankMath SEO meta data for a post Args: post_id: WordPress post ID post_type: 'post' or 'page' """ try: endpoint = f"{self.api_base}/{post_type}s/{post_id}" response = self.session.get(endpoint, params={'context': 'edit'}) if response.status_code == 200: post_data = response.json() meta_data = post_data.get('meta', {}) rankmath_data = { 'title': meta_data.get('rank_math_title', ''), 'description': meta_data.get('rank_math_description', ''), 'focus_keyword': meta_data.get('rank_math_focus_keyword', ''), 'canonical_url': meta_data.get('rank_math_canonical_url', ''), 'robots': meta_data.get('rank_math_robots', []), 'og_title': meta_data.get('rank_math_facebook_title', ''), 'og_description': meta_data.get('rank_math_facebook_description', ''), 'twitter_title': meta_data.get('rank_math_twitter_title', ''), 'twitter_description': meta_data.get('rank_math_twitter_description', '') } if not rankmath_data['title']: rankmath_data['title'] = post_data.get('title', {}).get('rendered', '') return rankmath_data return {} except Exception as e: self.logger.error(f"Error getting RankMath meta for post {post_id}: {str(e)}") return {} def update_post_seo(self, post_id, new_title, new_meta_description, post_type='post', focus_keyword=None, update_social_meta=True): """ Update post title and meta description using RankMath Args: post_id: WordPress post ID new_title: New post title new_meta_description: New meta description post_type: 'post' or 'page' focus_keyword: Optional focus keyword for RankMath update_social_meta: Whether to update social media meta tags """ try: endpoint = f"{self.api_base}/{post_type}s/{post_id}" update_data = { 'title': new_title } meta_updates = {} if new_meta_description: meta_updates['rank_math_description'] = new_meta_description if new_title: meta_updates['rank_math_title'] = new_title if focus_keyword: meta_updates['rank_math_focus_keyword'] = focus_keyword if update_social_meta: if new_title: meta_updates['rank_math_facebook_title'] = new_title meta_updates['rank_math_twitter_title'] = new_title if new_meta_description: meta_updates['rank_math_facebook_description'] = new_meta_description meta_updates['rank_math_twitter_description'] = new_meta_description if meta_updates: update_data['meta'] = meta_updates response = self.session.post(endpoint, json=update_data) if response.status_code == 200: self.logger.info(f"Successfully updated {post_type} ID {post_id} with RankMath SEO data") return True, response.json() else: self.logger.error(f"Failed to update {post_type} ID {post_id}. Status: {response.status_code}, Response: {response.text}") return False, response.text except Exception as e: self.logger.error(f"Error updating {post_type} ID {post_id}: {str(e)}") return False, str(e) def generate_seo_with_openai(self, slug, current_title, current_description, query, position=None, post_content_excerpt=None): """ Use OpenAI API to generate optimized SEO title and meta description Args: slug: Post slug/URL path current_title: Current post title current_description: Current meta description query: Search query from GSC position: Current ranking position post_content_excerpt: First few paragraphs of post content (optional) Returns: tuple: (optimized_title, optimized_description, focus_keyword, success) """ if not self.use_openai: return None, None, None, False try: prompt = f""" You are an expert SEO specialist. I need you to optimize the title and meta description for a WordPress post to improve its Google Search Console ranking. **Current Post Information:** - Post Slug: {slug} - Current Title: {current_title} - Current Meta Description: {current_description or 'None'} - Target Search Query: {query} - Current Position: {position if position else 'Unknown'} **Post Content Preview:** {post_content_excerpt or 'Not available'} **Requirements:** 1. Create an SEO-optimized title (max 60 characters) that: - Includes the target keyword naturally - Is compelling and click-worthy - Maintains the original topic/intent - Uses power words when appropriate 2. Create an SEO-optimized meta description (max 160 characters) that: - Includes the target keyword naturally - Clearly describes what the reader will learn - Includes a call-to-action - Is compelling and encourages clicks 3. Suggest the primary focus keyword for RankMath **Important Guidelines:** - The title and description should feel natural, not keyword-stuffed - Maintain the original meaning and intent of the content - Make it compelling for human readers, not just search engines - Consider search intent behind the query Please respond in this exact JSON format: {{ "optimized_title": "Your optimized title here", "optimized_description": "Your optimized meta description here", "focus_keyword": "primary keyword phrase", "reasoning": "Brief explanation of your optimization strategy" }} """ response = self.openai_client.chat.completions.create( model="gpt-4o-mini", messages=[ {"role": "system", "content": "You are an SEO expert specializing in optimizing web content."}, {"role": "user", "content": prompt} ], max_tokens=1000, temperature=0.3, ) response_text = response.choices[0].message.content.strip() try: start_idx = response_text.find('{') end_idx = response_text.rfind('}') + 1 json_str = response_text[start_idx:end_idx] result = json.loads(json_str) optimized_title = result.get('optimized_title', '') optimized_description = result.get('optimized_description', '') focus_keyword = result.get('focus_keyword', query.lower()) reasoning = result.get('reasoning', '') if len(optimized_title) > 60: optimized_title = optimized_title[:57] + "..." if len(optimized_description) > 160: optimized_description = optimized_description[:157] + "..." self.logger.info(f"šŸ¤– OpenAI optimization reasoning: {reasoning}") return optimized_title, optimized_description, focus_keyword, True except json.JSONDecodeError: self.logger.error(f"Failed to parse OpenAI response as JSON: {response_text}") return None, None, None, False except Exception as e: self.logger.error(f"OpenAI API error: {str(e)}") return None, None, None, False def get_post_content_excerpt(self, post_data, max_length=500): """ Extract the first part of post content for OpenAI context Args: post_data: WordPress post data max_length: Maximum length of excerpt Returns: str: Post content excerpt """ try: content = post_data.get('content', {}).get('rendered', '') import re clean_content = re.sub(r'<[^>]+>', ' ', content) clean_content = re.sub(r'\s+', ' ', clean_content).strip() if len(clean_content) > max_length: sentences = clean_content[:max_length].split('.') if len(sentences) > 1: return '.'.join(sentences[:-1]) + '.' else: return clean_content[:max_length] + "..." return clean_content except Exception as e: self.logger.warning(f"Could not extract post content excerpt: {e}") return None def generate_new_title_and_description(self, slug, query, current_title, current_description, position=None, post_data=None): """ Generate new title and meta description based on query and current ranking position Uses OpenAI API if available, otherwise falls back to basic optimization Args: slug: Post slug query: The search query from GSC current_title: Current post title current_description: Current meta description position: Current ranking position (optional) post_data: Full post data for content context Returns: tuple: (new_title, new_meta_description, focus_keyword) """ if self.use_openai and post_data: post_content_excerpt = self.get_post_content_excerpt(post_data) openai_title, openai_description, openai_keyword, openai_success = self.generate_seo_with_openai( slug, current_title, current_description, query, position, post_content_excerpt ) if openai_success and openai_title and openai_description: self.logger.info("šŸ¤– Using OpenAI-optimized SEO content") return openai_title, openai_description, openai_keyword else: self.logger.warning("āš ļø OpenAI optimization failed, falling back to basic optimization") self.logger.info("šŸ”§ Using basic SEO optimization") clean_query = query.strip().title() focus_keyword = query.lower() new_title = current_title if query.lower() not in current_title.lower(): new_title = f"{clean_query} - {current_title}" if len(new_title) > 60: if len(clean_query) < 40: remaining_chars = 57 - len(clean_query) - 3 shortened_title = current_title[:remaining_chars] new_title = f"{clean_query} - {shortened_title}..." else: new_title = new_title[:57] + "..." new_description = current_description if not current_description or len(current_description) < 50: new_description = f"Discover everything about {query}. {current_title} - comprehensive guide with expert insights and practical tips." elif query.lower() not in current_description.lower(): new_description = f"{query.title()}: {current_description}" if len(new_description) > 160: new_description = new_description[:157] + "..." if focus_keyword not in new_description.lower(): words = new_description.split() if len(words) > 10: insert_point = min(8, len(words) // 2) words.insert(insert_point, f"({focus_keyword})") new_description = " ".join(words) if len(new_description) > 160: new_description = new_description[:157] + "..." return new_title, new_description, focus_keyword def process_excel_file(self, excel_file_path, sheet_name='English_Queries_Position_15+', dry_run=False, max_posts=None, force_update=False): """ Process the Excel file and update WordPress posts Args: excel_file_path: Path to the Excel file from GSC analysis sheet_name: Sheet name to process dry_run: If True, only simulate updates without making changes max_posts: Maximum number of posts to process (None for all) force_update: If True, update posts even if they were previously processed """ try: df = pd.read_excel(excel_file_path, sheet_name=sheet_name) self.logger.info(f"Loaded {len(df)} rows from {sheet_name}") df_with_pages = df[df['best_matching_page'].notna() & (df['best_matching_page'] != '')].copy() if 'position' in df_with_pages.columns: df_with_pages = df_with_pages.sort_values('position', ascending=False) if max_posts: df_with_pages = df_with_pages.head(max_posts) self.logger.info(f"Found {len(df_with_pages)} queries with matching pages to process") total_processed = 0 for index, row in df_with_pages.iterrows(): query = row['query'] page_url = row['best_matching_page'] position = row.get('position', None) match_type = row.get('match_type', 'Unknown') self.logger.info(f"Processing query: '{query}' (Position: {position}) -> {page_url}") slug = self.extract_post_slug_from_url(page_url) if not slug: self.logger.warning(f"Could not extract slug from URL: {page_url}") self.not_found_urls.append({ 'url': page_url, 'query': query, 'position': position, 'reason': 'Could not extract slug' }) continue post_data, post_type = self.get_post_by_slug(slug) if not post_data: self.logger.warning(f"Post not found for slug: {slug} (URL: {page_url})") self.not_found_urls.append({ 'url': page_url, 'query': query, 'position': position, 'reason': 'Post not found in WordPress' }) continue post_id = post_data['id'] # Check if post was previously processed if str(post_id) in self.processed_posts and not force_update: self.logger.info(f"Skipping post ID {post_id} (already processed)") self.skipped_posts.append({ 'post_id': post_id, 'url': page_url, 'query': query, 'position': position, 'status': 'Skipped (Previously Processed)' }) continue current_title = post_data['title']['rendered'] rankmath_meta = self.get_rankmath_meta(post_id, post_type) current_description = rankmath_meta.get('description', '') new_title, new_meta_description, focus_keyword = self.generate_new_title_and_description( slug, query, current_title, current_description, position, post_data ) self.logger.info(f"Current title: {current_title}") self.logger.info(f"New title: {new_title}") self.logger.info(f"Current description: {current_description}") self.logger.info(f"New description: {new_meta_description}") self.logger.info(f"Focus keyword: {focus_keyword}") if dry_run: self.logger.info("DRY RUN - Would update post but skipping actual update") self.updated_posts.append({ 'post_id': post_id, 'url': page_url, 'query': query, 'position': position, 'old_title': current_title, 'new_title': new_title, 'old_description': current_description, 'new_description': new_meta_description, 'focus_keyword': focus_keyword, 'status': 'DRY RUN' }) else: success, result = self.update_post_seo( post_id, new_title, new_meta_description, post_type, focus_keyword ) if success: self.updated_posts.append({ 'post_id': post_id, 'url': page_url, 'query': query, 'position': position, 'old_title': current_title, 'new_title': new_title, 'old_description': current_description, 'new_description': new_meta_description, 'focus_keyword': focus_keyword, 'status': 'Updated' }) self.logger.info(f"āœ… Successfully updated post ID {post_id}") # Save to history only on successful update self.save_processed_post(post_id) self.processed_posts.add(str(post_id)) else: self.failed_posts.append({ 'post_id': post_id, 'url': page_url, 'query': query, 'position': position, 'error': result, 'status': 'Failed' }) self.logger.error(f"āŒ Failed to update post ID {post_id}: {result}") total_processed += 1 time.sleep(2) if total_processed % 5 == 0: self.logger.info(f"Processed {total_processed}/{len(df_with_pages)} posts...") self.logger.info(f"Processing complete. Total processed: {total_processed}") self.generate_reports() except Exception as e: self.logger.error(f"Error processing Excel file: {str(e)}") raise def generate_reports(self): """Generate reports for updated posts, failures, not found URLs, and skipped posts""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") if self.updated_posts: updated_df = pd.DataFrame(self.updated_posts) updated_file = f"updated_posts_{timestamp}.xlsx" updated_df.to_excel(updated_file, index=False) self.logger.info(f"āœ… Updated posts report saved to: {updated_file}") if self.failed_posts: failed_df = pd.DataFrame(self.failed_posts) failed_file = f"failed_posts_{timestamp}.xlsx" failed_df.to_excel(failed_file, index=False) self.logger.info(f"āŒ Failed posts report saved to: {failed_file}") if self.not_found_urls: not_found_df = pd.DataFrame(self.not_found_urls) not_found_file = f"not_found_urls_{timestamp}.xlsx" not_found_df.to_excel(not_found_file, index=False) self.logger.info(f"āš ļø Not found URLs report saved to: {not_found_file}") if self.skipped_posts: skipped_df = pd.DataFrame(self.skipped_posts) skipped_file = f"skipped_posts_{timestamp}.xlsx" skipped_df.to_excel(skipped_file, index=False) self.logger.info(f"ā­ļø Skipped posts report saved to: {skipped_file}") self.logger.info("="*60) self.logger.info("RANKMATH SEO UPDATE SUMMARY") self.logger.info("="*60) self.logger.info(f"āœ… Successfully updated: {len(self.updated_posts)}") self.logger.info(f"āŒ Failed updates: {len(self.failed_posts)}") self.logger.info(f"āš ļø URLs not found: {len(self.not_found_urls)}") self.logger.info(f"ā­ļø Skipped (previously processed): {len(self.skipped_posts)}") self.logger.info("="*60) def main(): # Configuration - UPDATE THESE VALUES WORDPRESS_URL = 'https://farmonaut.com' # Update this # USERNAME = 'your-username' # Update this # APP_PASSWORD = 'your-app-password' # Update this (WordPress Application Password, not regular password!) USERNAME = "ankuromar296" APP_PASSWORD = "Tjat A2hz 9XMv pXJi YbV0 GR8o" OPENAI_API_KEY = "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" # Replace with your actual OpenAI API key # Processing options DRY_RUN = True MAX_POSTS = 10 SHEET_NAME = 'English_Queries_Position_15+' FORCE_UPDATE = False # Set to True to update posts even if previously processed print("šŸš€ Starting WordPress RankMath SEO Updater with OpenAI") print("="*60) updater = WordPressSEOUpdater(WORDPRESS_URL, USERNAME, APP_PASSWORD, OPENAI_API_KEY) print("šŸ”— Testing WordPress connection...") if not updater.test_connection(): print("āŒ Failed to connect to WordPress. Please check your credentials.") print("\nTroubleshooting:") print("1. Ensure you're using WordPress Application Password (not regular password)") print("2. Check if WordPress REST API is enabled") print("3. Verify your WordPress URL is correct") print("4. Make sure your user has sufficient permissions") return print("āœ… WordPress connection successful!") try: if not os.path.exists(EXCEL_FILE_PATH): print(f"āŒ Excel file not found: {EXCEL_FILE_PATH}") return print(f"šŸ“Š Processing Excel file: {EXCEL_FILE_PATH}") print(f"šŸ“„ Sheet: {SHEET_NAME}") print(f"šŸ”„ Mode: {'DRY RUN' if DRY_RUN else 'LIVE UPDATE'}") print(f"šŸ¤– OpenAI: {'Enabled' if updater.use_openai else 'Disabled (using basic optimization)'}") print(f"šŸ”„ Force Update: {'Enabled' if FORCE_UPDATE else 'Disabled (skipping previously processed posts)'}") if MAX_POSTS: print(f"šŸ“ˆ Max posts: {MAX_POSTS}") updater.process_excel_file( excel_file_path=EXCEL_FILE_PATH, sheet_name=SHEET_NAME, dry_run=DRY_RUN, max_posts=MAX_POSTS, force_update=FORCE_UPDATE ) print("\nšŸŽ‰ Processing completed successfully!") except FileNotFoundError: print(f"āŒ Excel file not found: {EXCEL_FILE_PATH}") print("Please make sure the file exists and the path is correct.") except Exception as e: print(f"āŒ Error: {e}") print("\nšŸ”§ Troubleshooting checklist:") print("1. WordPress Application Password is correctly set") print("2. Excel file exists and has the correct sheet name") print("3. WordPress REST API is enabled") print("4. RankMath SEO plugin is installed and active") print("5. User has sufficient permissions") if __name__ == "__main__": main()