import pandas as pd import requests import json import time import logging from urllib.parse import urlparse, urljoin from datetime import datetime import os import openai class WordPressSEOUpdater: def __init__(self, wordpress_url, username, app_password, openai_api_key=None): """ Initialize the WordPress SEO updater for RankMath Args: wordpress_url: Your WordPress site URL (e.g., 'https://yoursite.com') username: WordPress username app_password: WordPress application password (not regular password) openai_api_key: OpenAI API key for AI-powered SEO optimization """ self.wordpress_url = wordpress_url.rstrip('/') self.api_base = f"{self.wordpress_url}/wp-json/wp/v2" self.username = username self.app_password = app_password self.session = requests.Session() self.session.auth = (username, app_password) # Initialize OpenAI client if API key is provided self.openai_client = None self.use_openai = False if openai_api_key: try: self.openai_client = openai.OpenAI(api_key=openai_api_key) self.use_openai = True self.logger = logging.getLogger(__name__) self.logger.info("āœ… OpenAI API initialized successfully") except Exception as e: self.logger = logging.getLogger(__name__) self.logger.warning(f"āš ļø OpenAI API initialization failed: {e}. Falling back to basic optimization.") # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('wordpress_seo_update.log'), logging.StreamHandler() ] ) self.logger = logging.getLogger(__name__) # Lists to track results self.updated_posts = [] self.failed_posts = [] self.not_found_urls = [] def test_connection(self): """Test WordPress API connection""" try: response = self.session.get(f"{self.api_base}/users/me") if response.status_code == 200: user_data = response.json() self.logger.info(f"Successfully connected to WordPress as: {user_data.get('name', 'Unknown')}") return True else: self.logger.error(f"Connection failed. Status: {response.status_code}, Response: {response.text}") return False except Exception as e: self.logger.error(f"Connection test failed: {str(e)}") return False def extract_post_slug_from_url(self, url): """ Extract post slug from WordPress URL Args: url: Full WordPress post URL """ try: parsed_url = urlparse(url) path = parsed_url.path.strip('/') # Handle different WordPress permalink structures # Common patterns: /post-slug/, /category/post-slug/, /year/month/post-slug/ path_parts = path.split('/') # Get the last non-empty part as slug slug = None for part in reversed(path_parts): if part and not part.isdigit(): # Skip year/month numbers slug = part break return slug except Exception as e: self.logger.error(f"Error extracting slug from URL {url}: {str(e)}") return None def get_post_by_slug(self, slug): """ Get WordPress post by slug Args: slug: Post slug """ try: # Try posts first response = self.session.get(f"{self.api_base}/posts", params={'slug': slug}) if response.status_code == 200: posts = response.json() if posts: return posts[0], 'post' # Try pages if not found in posts response = self.session.get(f"{self.api_base}/pages", params={'slug': slug}) if response.status_code == 200: pages = response.json() if pages: return pages[0], 'page' return None, None except Exception as e: self.logger.error(f"Error getting post by slug {slug}: {str(e)}") return None, None def get_rankmath_meta(self, post_id, post_type='post'): """ Get RankMath SEO meta data for a post Args: post_id: WordPress post ID post_type: 'post' or 'page' """ try: endpoint = f"{self.api_base}/{post_type}s/{post_id}" response = self.session.get(endpoint, params={'context': 'edit'}) if response.status_code == 200: post_data = response.json() # Get RankMath meta data from post meta meta_data = post_data.get('meta', {}) # RankMath stores SEO data in these meta fields rankmath_data = { 'title': meta_data.get('rank_math_title', ''), 'description': meta_data.get('rank_math_description', ''), 'focus_keyword': meta_data.get('rank_math_focus_keyword', ''), 'canonical_url': meta_data.get('rank_math_canonical_url', ''), 'robots': meta_data.get('rank_math_robots', []), 'og_title': meta_data.get('rank_math_facebook_title', ''), 'og_description': meta_data.get('rank_math_facebook_description', ''), 'twitter_title': meta_data.get('rank_math_twitter_title', ''), 'twitter_description': meta_data.get('rank_math_twitter_description', '') } # If RankMath title is empty, it might be using the post title if not rankmath_data['title']: rankmath_data['title'] = post_data.get('title', {}).get('rendered', '') return rankmath_data return {} except Exception as e: self.logger.error(f"Error getting RankMath meta for post {post_id}: {str(e)}") return {} def update_post_seo(self, post_id, new_title, new_meta_description, post_type='post', focus_keyword=None, update_social_meta=True): """ Update post title and meta description using RankMath Args: post_id: WordPress post ID new_title: New post title new_meta_description: New meta description post_type: 'post' or 'page' focus_keyword: Optional focus keyword for RankMath update_social_meta: Whether to update social media meta tags """ try: endpoint = f"{self.api_base}/{post_type}s/{post_id}" # Prepare update data update_data = { 'title': new_title, 'description': new_meta_description, 'excerpt': new_meta_description } # Update RankMath SEO meta fields meta_updates = {} # Core RankMath fields if new_meta_description: meta_updates['rank_math_description'] = new_meta_description # Update RankMath title (SEO title) if new_title: meta_updates['rank_math_title'] = new_title # Update focus keyword if provided if focus_keyword: meta_updates['rank_math_focus_keyword'] = focus_keyword # Update social media meta tags if requested if update_social_meta: if new_title: meta_updates['rank_math_facebook_title'] = new_title meta_updates['rank_math_twitter_title'] = new_title if new_meta_description: meta_updates['rank_math_facebook_description'] = new_meta_description meta_updates['rank_math_twitter_description'] = new_meta_description if meta_updates: update_data['meta'] = meta_updates response = self.session.post(endpoint, json=update_data) if response.status_code == 200: self.logger.info(f"Successfully updated {post_type} ID {post_id} with RankMath SEO data") return True, response.json() else: self.logger.error(f"Failed to update {post_type} ID {post_id}. Status: {response.status_code}, Response: {response.text}") return False, response.text except Exception as e: self.logger.error(f"Error updating {post_type} ID {post_id}: {str(e)}") return False, str(e) def generate_seo_with_openai(self, slug, current_title, current_description, query, position=None, post_content_excerpt=None): """ Use OpenAI API to generate optimized SEO title and meta description Args: slug: Post slug/URL path current_title: Current post title current_description: Current meta description query: Search query from GSC position: Current ranking position post_content_excerpt: First few paragraphs of post content (optional) Returns: tuple: (optimized_title, optimized_description, focus_keyword, success) """ if not self.use_openai: return None, None, None, False try: # Prepare the prompt for OpenAI prompt = f""" You are an expert SEO specialist. I need you to optimize the title and meta description for a WordPress post to improve its Google Search Console ranking. **Current Post Information:** - Post Slug: {slug} - Current Title: {current_title} - Current Meta Description: {current_description or 'None'} - Target Search Query: {query} - Current Position: {position if position else 'Unknown'} **Post Content Preview:** {post_content_excerpt or 'Not available'} **Requirements:** 1. Create an SEO-optimized title (max 45 characters) that: - Includes the target keyword naturally - Is compelling and click-worthy - Maintains the original topic/intent - Uses power words when appropriate 2. Create an SEO-optimized meta description (max 150 characters) that: - Includes the target keyword naturally - Clearly describes what the reader will learn - Includes a call-to-action - Is compelling and encourages clicks 3. Suggest the primary focus keyword for RankMath **Important Guidelines:** - The title and description should feel natural, not keyword-stuffed - Maintain the original meaning and intent of the content - Make it compelling for human readers, not just search engines - Consider search intent behind the query Please respond in this exact JSON format: {{ "optimized_title": "Your optimized title here", "optimized_description": "Your optimized meta description here", "focus_keyword": "primary keyword phrase", "reasoning": "Brief explanation of your optimization strategy" }} """ # Make API call to OpenAI response = self.openai_client.chat.completions.create( model="gpt-4.1", messages=[ {"role": "system", "content": "You are an SEO expert specializing in optimizing web content."}, {"role": "user", "content": prompt} ], max_tokens=1000, temperature=0.3, ) # Parse the response response_text = response.choices[0].message.content.strip() # Try to extract JSON from the response try: # Find JSON in the response (in case OpenAI adds extra text) start_idx = response_text.find('{') end_idx = response_text.rfind('}') + 1 json_str = response_text[start_idx:end_idx] result = json.loads(json_str) optimized_title = result.get('optimized_title', '') optimized_description = result.get('optimized_description', '') focus_keyword = result.get('focus_keyword', query.lower()) reasoning = result.get('reasoning', '') # Validate lengths if len(optimized_title) > 60: optimized_title = optimized_title[:57] + "..." if len(optimized_description) > 160: optimized_description = optimized_description[:157] + "..." self.logger.info(f"šŸ¤– OpenAI optimization reasoning: {reasoning}") return optimized_title, optimized_description, focus_keyword, True except json.JSONDecodeError: self.logger.error(f"Failed to parse OpenAI response as JSON: {response_text}") return None, None, None, False except Exception as e: self.logger.error(f"OpenAI API error: {str(e)}") return None, None, None, False def get_post_content_excerpt(self, post_data, max_length=500): """ Extract the first part of post content for OpenAI context Args: post_data: WordPress post data max_length: Maximum length of excerpt Returns: str: Post content excerpt """ try: content = post_data.get('content', {}).get('rendered', '') # Remove HTML tags for cleaner text import re clean_content = re.sub(r'<[^>]+>', ' ', content) clean_content = re.sub(r'\s+', ' ', clean_content).strip() if len(clean_content) > max_length: # Try to cut at sentence boundary sentences = clean_content[:max_length].split('.') if len(sentences) > 1: return '.'.join(sentences[:-1]) + '.' else: return clean_content[:max_length] + "..." return clean_content except Exception as e: self.logger.warning(f"Could not extract post content excerpt: {e}") return None def generate_new_title_and_description(self, slug, query, current_title, current_description, position=None, post_data=None): """ Generate new title and meta description based on query and current ranking position Uses OpenAI API if available, otherwise falls back to basic optimization Args: slug: Post slug query: The search query from GSC current_title: Current post title current_description: Current meta description position: Current ranking position (optional) post_data: Full post data for content context Returns: tuple: (new_title, new_meta_description, focus_keyword) """ # Try OpenAI API first if available if self.use_openai and post_data: post_content_excerpt = self.get_post_content_excerpt(post_data) openai_title, openai_description, openai_keyword, openai_success = self.generate_seo_with_openai( slug, current_title, current_description, query, position, post_content_excerpt ) if openai_success and openai_title and openai_description: self.logger.info("šŸ¤– Using OpenAI-optimized SEO content") return openai_title, openai_description, openai_keyword else: self.logger.warning("āš ļø OpenAI optimization failed, falling back to basic optimization") # Fallback to basic optimization self.logger.info("šŸ”§ Using basic SEO optimization") # Clean the query for better processing clean_query = query.strip().title() focus_keyword = query.lower() # Generate optimized title new_title = current_title # If query is not in title, consider adding it if query.lower() not in current_title.lower(): # Option 1: Add query to beginning new_title = f"{clean_query} - {current_title}" # Option 2: Add query to end (uncomment if preferred) # new_title = f"{current_title} | {clean_query}" # Option 3: Replace title completely (be careful with this) # new_title = f"{clean_query} - Complete Guide" # Ensure title is under 60 characters for better SEO if len(new_title) > 60: # Try to shorten while keeping the query if len(clean_query) < 40: remaining_chars = 57 - len(clean_query) - 3 # 3 for " - " shortened_title = current_title[:remaining_chars] new_title = f"{clean_query} - {shortened_title}..." else: new_title = new_title[:57] + "..." # Generate optimized meta description new_description = current_description if not current_description or len(current_description) < 50: # Create a new description that includes the query new_description = f"Discover everything about {query}. {current_title} - comprehensive guide with expert insights and practical tips." elif query.lower() not in current_description.lower(): # Add query to existing description new_description = f"{query.title()}: {current_description}" # Ensure description is under 160 characters for better SEO if len(new_description) > 160: new_description = new_description[:157] + "..." # Ensure description includes the focus keyword if focus_keyword not in new_description.lower(): # Try to naturally incorporate the keyword words = new_description.split() if len(words) > 10: # Only if description is long enough # Insert keyword in the first part of description insert_point = min(8, len(words) // 2) words.insert(insert_point, f"({focus_keyword})") new_description = " ".join(words) # Check length again and trim if needed if len(new_description) > 160: new_description = new_description[:157] + "..." return new_title, new_description, focus_keyword def process_excel_file(self, excel_file_path, sheet_name='English_Queries_Position_15+', dry_run=False, max_posts=None): """ Process the Excel file and update WordPress posts Args: excel_file_path: Path to the Excel file from GSC analysis sheet_name: Sheet name to process dry_run: If True, only simulate updates without making changes max_posts: Maximum number of posts to process (None for all) """ try: # Read Excel file df = pd.read_excel(excel_file_path, sheet_name=sheet_name) self.logger.info(f"Loaded {len(df)} rows from {sheet_name}") # Filter rows with matching pages df_with_pages = df[df['best_matching_page'].notna() & (df['best_matching_page'] != '')].copy() # Sort by position (higher positions first for priority) if 'position' in df_with_pages.columns: df_with_pages = df_with_pages.sort_values('position', ascending=False) # Limit processing if max_posts is specified if max_posts: df_with_pages = df_with_pages.head(max_posts) self.logger.info(f"Found {len(df_with_pages)} queries with matching pages to process") total_processed = 0 for index, row in df_with_pages.iterrows(): query = row['query'] page_url = row['best_matching_page'] position = row.get('position', None) match_type = row.get('match_type', 'Unknown') self.logger.info(f"Processing query: '{query}' (Position: {position}) -> {page_url}") # Extract slug from URL slug = self.extract_post_slug_from_url(page_url) if not slug: self.logger.warning(f"Could not extract slug from URL: {page_url}") self.not_found_urls.append({ 'url': page_url, 'query': query, 'position': position, 'reason': 'Could not extract slug' }) continue # Get post by slug post_data, post_type = self.get_post_by_slug(slug) if not post_data: self.logger.warning(f"Post not found for slug: {slug} (URL: {page_url})") self.not_found_urls.append({ 'url': page_url, 'query': query, 'position': position, 'reason': 'Post not found in WordPress' }) continue post_id = post_data['id'] current_title = post_data['title']['rendered'] # Get current RankMath meta description rankmath_meta = self.get_rankmath_meta(post_id, post_type) current_description = rankmath_meta.get('description', '') # Generate new title and description using OpenAI or fallback new_title, new_meta_description, focus_keyword = self.generate_new_title_and_description( slug, query, current_title, current_description, position, post_data ) self.logger.info(f"Current title: {current_title}") self.logger.info(f"New title: {new_title}") self.logger.info(f"Current description: {current_description}") self.logger.info(f"New description: {new_meta_description}") self.logger.info(f"Focus keyword: {focus_keyword}") if dry_run: self.logger.info("DRY RUN - Would update post but skipping actual update") self.updated_posts.append({ 'post_id': post_id, 'url': page_url, 'query': query, 'position': position, 'old_title': current_title, 'new_title': new_title, 'old_description': current_description, 'new_description': new_meta_description, 'focus_keyword': focus_keyword, 'status': 'DRY RUN' }) else: # Update the post success, result = self.update_post_seo( post_id, new_title, new_meta_description, post_type, focus_keyword ) if success: self.updated_posts.append({ 'post_id': post_id, 'url': page_url, 'query': query, 'position': position, 'old_title': current_title, 'new_title': new_title, 'old_description': current_description, 'new_description': new_meta_description, 'focus_keyword': focus_keyword, 'status': 'Updated' }) self.logger.info(f"āœ… Successfully updated post ID {post_id}") else: self.failed_posts.append({ 'post_id': post_id, 'url': page_url, 'query': query, 'position': position, 'error': result, 'status': 'Failed' }) self.logger.error(f"āŒ Failed to update post ID {post_id}: {result}") total_processed += 1 # Add delay to avoid overwhelming the server time.sleep(2) # Increased delay for safety # Progress update every 5 posts if total_processed % 5 == 0: self.logger.info(f"Processed {total_processed}/{len(df_with_pages)} posts...") self.logger.info(f"Processing complete. Total processed: {total_processed}") self.generate_reports() except Exception as e: self.logger.error(f"Error processing Excel file: {str(e)}") raise def generate_reports(self): """Generate reports for updated posts, failures, and not found URLs""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Save updated posts report if self.updated_posts: updated_df = pd.DataFrame(self.updated_posts) updated_file = f"updated_posts_{timestamp}.xlsx" updated_df.to_excel(updated_file, index=False) self.logger.info(f"āœ… Updated posts report saved to: {updated_file}") # Save failed posts report if self.failed_posts: failed_df = pd.DataFrame(self.failed_posts) failed_file = f"failed_posts_{timestamp}.xlsx" failed_df.to_excel(failed_file, index=False) self.logger.info(f"āŒ Failed posts report saved to: {failed_file}") # Save not found URLs report if self.not_found_urls: not_found_df = pd.DataFrame(self.not_found_urls) not_found_file = f"not_found_urls_{timestamp}.xlsx" not_found_df.to_excel(not_found_file, index=False) self.logger.info(f"āš ļø Not found URLs report saved to: {not_found_file}") # Print summary self.logger.info("="*60) self.logger.info("RANKMATH SEO UPDATE SUMMARY") self.logger.info("="*60) self.logger.info(f"āœ… Successfully updated: {len(self.updated_posts)}") self.logger.info(f"āŒ Failed updates: {len(self.failed_posts)}") self.logger.info(f"āš ļø URLs not found: {len(self.not_found_urls)}") self.logger.info("="*60) def main(): # Configuration - UPDATE THESE VALUES WORDPRESS_URL = 'https://farmonaut.com' # Update this USERNAME = 'your-username' # Update this APP_PASSWORD = 'your-app-password' # Update this (WordPress Application Password, not regular password!) USERNAME = "ankuromar296" APP_PASSWORD = "Tjat A2hz 9XMv pXJi YbV0 GR8o" OPENAI_API_KEY = "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" # Replace with your actual OpenAI API key #OPENAI_API_KEY = 'your-openai-api-key' # Add your OpenAI API key here (optional but recommended) EXCEL_FILE_PATH = 'gsc_analysis_english_filtered.xlsx' # Path to your GSC analysis file # Processing options DRY_RUN = False # Set to False when ready to make actual updates MAX_POSTS = None # Set to None to process all posts, or a number to limit SHEET_NAME = 'English_Queries_Position_15+' # Sheet name in your Excel file print("šŸš€ Starting WordPress RankMath SEO Updater with OpenAI") print("="*60) # Initialize updater updater = WordPressSEOUpdater(WORDPRESS_URL, USERNAME, APP_PASSWORD, OPENAI_API_KEY) # Test connection print("šŸ”— Testing WordPress connection...") if not updater.test_connection(): print("āŒ Failed to connect to WordPress. Please check your credentials.") print("\nTroubleshooting:") print("1. Ensure you're using WordPress Application Password (not regular password)") print("2. Check if WordPress REST API is enabled") print("3. Verify your WordPress URL is correct") print("4. Make sure your user has sufficient permissions") return print("āœ… WordPress connection successful!") try: # Check if Excel file exists if not os.path.exists(EXCEL_FILE_PATH): print(f"āŒ Excel file not found: {EXCEL_FILE_PATH}") return print(f"šŸ“Š Processing Excel file: {EXCEL_FILE_PATH}") print(f"šŸ“„ Sheet: {SHEET_NAME}") print(f"šŸ”„ Mode: {'DRY RUN' if DRY_RUN else 'LIVE UPDATE'}") print(f"šŸ¤– OpenAI: {'Enabled' if updater.use_openai else 'Disabled (using basic optimization)'}") if MAX_POSTS: print(f"šŸ“ˆ Max posts: {MAX_POSTS}") # Process the Excel file updater.process_excel_file( excel_file_path=EXCEL_FILE_PATH, sheet_name=SHEET_NAME, dry_run=DRY_RUN, max_posts=MAX_POSTS ) print("\nšŸŽ‰ Processing completed successfully!") except FileNotFoundError: print(f"āŒ Excel file not found: {EXCEL_FILE_PATH}") print("Please make sure the file exists and the path is correct.") except Exception as e: print(f"āŒ Error: {e}") print("\nšŸ”§ Troubleshooting checklist:") print("1. WordPress Application Password is correctly set") print("2. Excel file exists and has the correct sheet name") print("3. WordPress REST API is enabled") print("4. RankMath SEO plugin is installed and active") print("5. User has sufficient permissions") if __name__ == "__main__": main()