import pandas as pd
import requests
import json
import time
import logging
from urllib.parse import urlparse, urljoin
from datetime import datetime
import os
import openai

class WordPressSEOUpdater:
    def __init__(self, wordpress_url, username, app_password, openai_api_key=None):
        """
        Initialize the WordPress SEO updater for RankMath
        
        Args:
            wordpress_url: Your WordPress site URL (e.g., 'https://yoursite.com')
            username: WordPress username
            app_password: WordPress application password (not regular password)
            openai_api_key: OpenAI API key for AI-powered SEO optimization
        """
        self.wordpress_url = wordpress_url.rstrip('/')
        self.api_base = f"{self.wordpress_url}/wp-json/wp/v2"
        self.username = username
        self.app_password = app_password
        self.session = requests.Session()
        self.session.auth = (username, app_password)
        
        # Initialize OpenAI client if API key is provided
        self.openai_client = None
        self.use_openai = False
        if openai_api_key:
            try:
                self.openai_client = openai.OpenAI(api_key=openai_api_key)
                self.use_openai = True
                self.logger = logging.getLogger(__name__)
                self.logger.info("✅ OpenAI API initialized successfully")
            except Exception as e:
                self.logger = logging.getLogger(__name__)
                self.logger.warning(f"⚠️ OpenAI API initialization failed: {e}. Falling back to basic optimization.")
        
        # Setup logging
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler('wordpress_seo_update.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger(__name__)
        
        # Lists to track results
        self.updated_posts = []
        self.failed_posts = []
        self.not_found_urls = []
        
    def test_connection(self):
        """Test WordPress API connection"""
        try:
            response = self.session.get(f"{self.api_base}/users/me")
            if response.status_code == 200:
                user_data = response.json()
                self.logger.info(f"Successfully connected to WordPress as: {user_data.get('name', 'Unknown')}")
                return True
            else:
                self.logger.error(f"Connection failed. Status: {response.status_code}, Response: {response.text}")
                return False
        except Exception as e:
            self.logger.error(f"Connection test failed: {str(e)}")
            return False
    
    def extract_post_slug_from_url(self, url):
        """
        Extract post slug from WordPress URL
        
        Args:
            url: Full WordPress post URL
        """
        try:
            parsed_url = urlparse(url)
            path = parsed_url.path.strip('/')
            
            # Handle different WordPress permalink structures
            # Common patterns: /post-slug/, /category/post-slug/, /year/month/post-slug/
            path_parts = path.split('/')
            
            # Get the last non-empty part as slug
            slug = None
            for part in reversed(path_parts):
                if part and not part.isdigit():  # Skip year/month numbers
                    slug = part
                    break
            
            return slug
        except Exception as e:
            self.logger.error(f"Error extracting slug from URL {url}: {str(e)}")
            return None
    
    def get_post_by_slug(self, slug):
        """
        Get WordPress post by slug
        
        Args:
            slug: Post slug
        """
        try:
            # Try posts first
            response = self.session.get(f"{self.api_base}/posts", params={'slug': slug})
            if response.status_code == 200:
                posts = response.json()
                if posts:
                    return posts[0], 'post'
            
            # Try pages if not found in posts
            response = self.session.get(f"{self.api_base}/pages", params={'slug': slug})
            if response.status_code == 200:
                pages = response.json()
                if pages:
                    return pages[0], 'page'
            
            return None, None
        except Exception as e:
            self.logger.error(f"Error getting post by slug {slug}: {str(e)}")
            return None, None
    
    def get_rankmath_meta(self, post_id, post_type='post'):
        """
        Get RankMath SEO meta data for a post
        
        Args:
            post_id: WordPress post ID
            post_type: 'post' or 'page'
        """
        try:
            endpoint = f"{self.api_base}/{post_type}s/{post_id}"
            response = self.session.get(endpoint, params={'context': 'edit'})
            
            if response.status_code == 200:
                post_data = response.json()
                
                # Get RankMath meta data from post meta
                meta_data = post_data.get('meta', {})
                
                # RankMath stores SEO data in these meta fields
                rankmath_data = {
                    'title': meta_data.get('rank_math_title', ''),
                    'description': meta_data.get('rank_math_description', ''),
                    'focus_keyword': meta_data.get('rank_math_focus_keyword', ''),
                    'canonical_url': meta_data.get('rank_math_canonical_url', ''),
                    'robots': meta_data.get('rank_math_robots', []),
                    'og_title': meta_data.get('rank_math_facebook_title', ''),
                    'og_description': meta_data.get('rank_math_facebook_description', ''),
                    'twitter_title': meta_data.get('rank_math_twitter_title', ''),
                    'twitter_description': meta_data.get('rank_math_twitter_description', '')
                }
                
                # If RankMath title is empty, it might be using the post title
                if not rankmath_data['title']:
                    rankmath_data['title'] = post_data.get('title', {}).get('rendered', '')
                
                return rankmath_data
            
            return {}
        except Exception as e:
            self.logger.error(f"Error getting RankMath meta for post {post_id}: {str(e)}")
            return {}
    
    def update_post_seo(self, post_id, new_title, new_meta_description, post_type='post', focus_keyword=None, update_social_meta=True):
        """
        Update post title and meta description using RankMath
        
        Args:
            post_id: WordPress post ID
            new_title: New post title
            new_meta_description: New meta description
            post_type: 'post' or 'page'
            focus_keyword: Optional focus keyword for RankMath
            update_social_meta: Whether to update social media meta tags
        """
        try:
            endpoint = f"{self.api_base}/{post_type}s/{post_id}"
            
            # Prepare update data
            update_data = {
                'title': new_title,
                'description': new_meta_description,
                'excerpt': new_meta_description
            }
            
            # Update RankMath SEO meta fields
            meta_updates = {}
            
            # Core RankMath fields
            if new_meta_description:
                meta_updates['rank_math_description'] = new_meta_description
            
            # Update RankMath title (SEO title)
            if new_title:
                meta_updates['rank_math_title'] = new_title
            
            # Update focus keyword if provided
            if focus_keyword:
                meta_updates['rank_math_focus_keyword'] = focus_keyword
            
            # Update social media meta tags if requested
            if update_social_meta:
                if new_title:
                    meta_updates['rank_math_facebook_title'] = new_title
                    meta_updates['rank_math_twitter_title'] = new_title
                if new_meta_description:
                    meta_updates['rank_math_facebook_description'] = new_meta_description
                    meta_updates['rank_math_twitter_description'] = new_meta_description
            
            if meta_updates:
                update_data['meta'] = meta_updates
            
            response = self.session.post(endpoint, json=update_data)
            
            if response.status_code == 200:
                self.logger.info(f"Successfully updated {post_type} ID {post_id} with RankMath SEO data")
                return True, response.json()
            else:
                self.logger.error(f"Failed to update {post_type} ID {post_id}. Status: {response.status_code}, Response: {response.text}")
                return False, response.text
                
        except Exception as e:
            self.logger.error(f"Error updating {post_type} ID {post_id}: {str(e)}")
            return False, str(e)
    
    def generate_seo_with_openai(self, slug, current_title, current_description, query, position=None, post_content_excerpt=None):
        """
        Use OpenAI API to generate optimized SEO title and meta description
        
        Args:
            slug: Post slug/URL path
            current_title: Current post title
            current_description: Current meta description
            query: Search query from GSC
            position: Current ranking position
            post_content_excerpt: First few paragraphs of post content (optional)
            
        Returns:
            tuple: (optimized_title, optimized_description, focus_keyword, success)
        """
        if not self.use_openai:
            return None, None, None, False
        
        try:
            # Prepare the prompt for OpenAI
            prompt = f"""
You are an expert SEO specialist. I need you to optimize the title and meta description for a WordPress post to improve its Google Search Console ranking.

**Current Post Information:**
- Post Slug: {slug}
- Current Title: {current_title}
- Current Meta Description: {current_description or 'None'}
- Target Search Query: {query}
- Current Position: {position if position else 'Unknown'}

**Post Content Preview:**
{post_content_excerpt or 'Not available'}

**Requirements:**
1. Create an SEO-optimized title (max 45 characters) that:
   - Includes the target keyword naturally
   - Is compelling and click-worthy
   - Maintains the original topic/intent
   - Uses power words when appropriate

2. Create an SEO-optimized meta description (max 150 characters) that:
   - Includes the target keyword naturally
   - Clearly describes what the reader will learn
   - Includes a call-to-action
   - Is compelling and encourages clicks

3. Suggest the primary focus keyword for RankMath

**Important Guidelines:**
- The title and description should feel natural, not keyword-stuffed
- Maintain the original meaning and intent of the content
- Make it compelling for human readers, not just search engines
- Consider search intent behind the query

Please respond in this exact JSON format:
{{
    "optimized_title": "Your optimized title here",
    "optimized_description": "Your optimized meta description here",
    "focus_keyword": "primary keyword phrase",
    "reasoning": "Brief explanation of your optimization strategy"
}}
"""

            # Make API call to OpenAI
            response = self.openai_client.chat.completions.create(
                model="gpt-4.1",
                messages=[
                    {"role": "system", "content": "You are an SEO expert specializing in optimizing web content."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=1000,
                temperature=0.3,
            )
            
            # Parse the response
            response_text = response.choices[0].message.content.strip()
            
            # Try to extract JSON from the response
            try:
                # Find JSON in the response (in case OpenAI adds extra text)
                start_idx = response_text.find('{')
                end_idx = response_text.rfind('}') + 1
                json_str = response_text[start_idx:end_idx]
                
                result = json.loads(json_str)
                
                optimized_title = result.get('optimized_title', '')
                optimized_description = result.get('optimized_description', '')
                focus_keyword = result.get('focus_keyword', query.lower())
                reasoning = result.get('reasoning', '')
                
                # Validate lengths
                if len(optimized_title) > 60:
                    optimized_title = optimized_title[:57] + "..."
                if len(optimized_description) > 160:
                    optimized_description = optimized_description[:157] + "..."
                
                self.logger.info(f"🤖 OpenAI optimization reasoning: {reasoning}")
                
                return optimized_title, optimized_description, focus_keyword, True
                
            except json.JSONDecodeError:
                self.logger.error(f"Failed to parse OpenAI response as JSON: {response_text}")
                return None, None, None, False
                
        except Exception as e:
            self.logger.error(f"OpenAI API error: {str(e)}")
            return None, None, None, False
    
    def get_post_content_excerpt(self, post_data, max_length=500):
        """
        Extract the first part of post content for OpenAI context
        
        Args:
            post_data: WordPress post data
            max_length: Maximum length of excerpt
            
        Returns:
            str: Post content excerpt
        """
        try:
            content = post_data.get('content', {}).get('rendered', '')
            
            # Remove HTML tags for cleaner text
            import re
            clean_content = re.sub(r'<[^>]+>', ' ', content)
            clean_content = re.sub(r'\s+', ' ', clean_content).strip()
            
            if len(clean_content) > max_length:
                # Try to cut at sentence boundary
                sentences = clean_content[:max_length].split('.')
                if len(sentences) > 1:
                    return '.'.join(sentences[:-1]) + '.'
                else:
                    return clean_content[:max_length] + "..."
            
            return clean_content
            
        except Exception as e:
            self.logger.warning(f"Could not extract post content excerpt: {e}")
            return None
    
    def generate_new_title_and_description(self, slug, query, current_title, current_description, position=None, post_data=None):
        """
        Generate new title and meta description based on query and current ranking position
        Uses OpenAI API if available, otherwise falls back to basic optimization
        
        Args:
            slug: Post slug
            query: The search query from GSC
            current_title: Current post title
            current_description: Current meta description
            position: Current ranking position (optional)
            post_data: Full post data for content context
            
        Returns:
            tuple: (new_title, new_meta_description, focus_keyword)
        """
        
        # Try OpenAI API first if available
        if self.use_openai and post_data:
            post_content_excerpt = self.get_post_content_excerpt(post_data)
            
            openai_title, openai_description, openai_keyword, openai_success = self.generate_seo_with_openai(
                slug, current_title, current_description, query, position, post_content_excerpt
            )
            
            if openai_success and openai_title and openai_description:
                self.logger.info("🤖 Using OpenAI-optimized SEO content")
                return openai_title, openai_description, openai_keyword
            else:
                self.logger.warning("⚠️ OpenAI optimization failed, falling back to basic optimization")
        
        # Fallback to basic optimization
        self.logger.info("🔧 Using basic SEO optimization")
        
        # Clean the query for better processing
        clean_query = query.strip().title()
        focus_keyword = query.lower()
        
        # Generate optimized title
        new_title = current_title
        
        # If query is not in title, consider adding it
        if query.lower() not in current_title.lower():
            # Option 1: Add query to beginning
            new_title = f"{clean_query} - {current_title}"
            
            # Option 2: Add query to end (uncomment if preferred)
            # new_title = f"{current_title} | {clean_query}"
            
            # Option 3: Replace title completely (be careful with this)
            # new_title = f"{clean_query} - Complete Guide"
        
        # Ensure title is under 60 characters for better SEO
        if len(new_title) > 60:
            # Try to shorten while keeping the query
            if len(clean_query) < 40:
                remaining_chars = 57 - len(clean_query) - 3  # 3 for " - "
                shortened_title = current_title[:remaining_chars]
                new_title = f"{clean_query} - {shortened_title}..."
            else:
                new_title = new_title[:57] + "..."
        
        # Generate optimized meta description
        new_description = current_description
        
        if not current_description or len(current_description) < 50:
            # Create a new description that includes the query
            new_description = f"Discover everything about {query}. {current_title} - comprehensive guide with expert insights and practical tips."
        elif query.lower() not in current_description.lower():
            # Add query to existing description
            new_description = f"{query.title()}: {current_description}"
        
        # Ensure description is under 160 characters for better SEO
        if len(new_description) > 160:
            new_description = new_description[:157] + "..."
        
        # Ensure description includes the focus keyword
        if focus_keyword not in new_description.lower():
            # Try to naturally incorporate the keyword
            words = new_description.split()
            if len(words) > 10:  # Only if description is long enough
                # Insert keyword in the first part of description
                insert_point = min(8, len(words) // 2)
                words.insert(insert_point, f"({focus_keyword})")
                new_description = " ".join(words)
                
                # Check length again and trim if needed
                if len(new_description) > 160:
                    new_description = new_description[:157] + "..."
        
        return new_title, new_description, focus_keyword
    
    def process_excel_file(self, excel_file_path, sheet_name='English_Queries_Position_15+', dry_run=False, max_posts=None):
        """
        Process the Excel file and update WordPress posts
        
        Args:
            excel_file_path: Path to the Excel file from GSC analysis
            sheet_name: Sheet name to process
            dry_run: If True, only simulate updates without making changes
            max_posts: Maximum number of posts to process (None for all)
        """
        try:
            # Read Excel file
            df = pd.read_excel(excel_file_path, sheet_name=sheet_name)
            self.logger.info(f"Loaded {len(df)} rows from {sheet_name}")
            
            # Filter rows with matching pages
            df_with_pages = df[df['best_matching_page'].notna() & (df['best_matching_page'] != '')].copy()
            
            # Sort by position (higher positions first for priority)
            if 'position' in df_with_pages.columns:
                df_with_pages = df_with_pages.sort_values('position', ascending=False)
            
            # Limit processing if max_posts is specified
            if max_posts:
                df_with_pages = df_with_pages.head(max_posts)
            
            self.logger.info(f"Found {len(df_with_pages)} queries with matching pages to process")
            
            total_processed = 0
            
            for index, row in df_with_pages.iterrows():
                query = row['query']
                page_url = row['best_matching_page']
                position = row.get('position', None)
                match_type = row.get('match_type', 'Unknown')
                
                self.logger.info(f"Processing query: '{query}' (Position: {position}) -> {page_url}")
                
                # Extract slug from URL
                slug = self.extract_post_slug_from_url(page_url)
                if not slug:
                    self.logger.warning(f"Could not extract slug from URL: {page_url}")
                    self.not_found_urls.append({
                        'url': page_url,
                        'query': query,
                        'position': position,
                        'reason': 'Could not extract slug'
                    })
                    continue
                
                # Get post by slug
                post_data, post_type = self.get_post_by_slug(slug)
                if not post_data:
                    self.logger.warning(f"Post not found for slug: {slug} (URL: {page_url})")
                    self.not_found_urls.append({
                        'url': page_url,
                        'query': query,
                        'position': position,
                        'reason': 'Post not found in WordPress'
                    })
                    continue
                
                post_id = post_data['id']
                current_title = post_data['title']['rendered']
                
                # Get current RankMath meta description
                rankmath_meta = self.get_rankmath_meta(post_id, post_type)
                current_description = rankmath_meta.get('description', '')
                
                # Generate new title and description using OpenAI or fallback
                new_title, new_meta_description, focus_keyword = self.generate_new_title_and_description(
                    slug, query, current_title, current_description, position, post_data
                )
                
                self.logger.info(f"Current title: {current_title}")
                self.logger.info(f"New title: {new_title}")
                self.logger.info(f"Current description: {current_description}")
                self.logger.info(f"New description: {new_meta_description}")
                self.logger.info(f"Focus keyword: {focus_keyword}")
                
                if dry_run:
                    self.logger.info("DRY RUN - Would update post but skipping actual update")
                    self.updated_posts.append({
                        'post_id': post_id,
                        'url': page_url,
                        'query': query,
                        'position': position,
                        'old_title': current_title,
                        'new_title': new_title,
                        'old_description': current_description,
                        'new_description': new_meta_description,
                        'focus_keyword': focus_keyword,
                        'status': 'DRY RUN'
                    })
                else:
                    # Update the post
                    success, result = self.update_post_seo(
                        post_id, new_title, new_meta_description, post_type, focus_keyword
                    )
                    
                    if success:
                        self.updated_posts.append({
                            'post_id': post_id,
                            'url': page_url,
                            'query': query,
                            'position': position,
                            'old_title': current_title,
                            'new_title': new_title,
                            'old_description': current_description,
                            'new_description': new_meta_description,
                            'focus_keyword': focus_keyword,
                            'status': 'Updated'
                        })
                        self.logger.info(f"✅ Successfully updated post ID {post_id}")
                    else:
                        self.failed_posts.append({
                            'post_id': post_id,
                            'url': page_url,
                            'query': query,
                            'position': position,
                            'error': result,
                            'status': 'Failed'
                        })
                        self.logger.error(f"❌ Failed to update post ID {post_id}: {result}")
                
                total_processed += 1
                
                # Add delay to avoid overwhelming the server
                time.sleep(2)  # Increased delay for safety
                
                # Progress update every 5 posts
                if total_processed % 5 == 0:
                    self.logger.info(f"Processed {total_processed}/{len(df_with_pages)} posts...")
            
            self.logger.info(f"Processing complete. Total processed: {total_processed}")
            self.generate_reports()
            
        except Exception as e:
            self.logger.error(f"Error processing Excel file: {str(e)}")
            raise
    
    def generate_reports(self):
        """Generate reports for updated posts, failures, and not found URLs"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Save updated posts report
        if self.updated_posts:
            updated_df = pd.DataFrame(self.updated_posts)
            updated_file = f"updated_posts_{timestamp}.xlsx"
            updated_df.to_excel(updated_file, index=False)
            self.logger.info(f"✅ Updated posts report saved to: {updated_file}")
        
        # Save failed posts report
        if self.failed_posts:
            failed_df = pd.DataFrame(self.failed_posts)
            failed_file = f"failed_posts_{timestamp}.xlsx"
            failed_df.to_excel(failed_file, index=False)
            self.logger.info(f"❌ Failed posts report saved to: {failed_file}")
        
        # Save not found URLs report
        if self.not_found_urls:
            not_found_df = pd.DataFrame(self.not_found_urls)
            not_found_file = f"not_found_urls_{timestamp}.xlsx"
            not_found_df.to_excel(not_found_file, index=False)
            self.logger.info(f"⚠️ Not found URLs report saved to: {not_found_file}")
        
        # Print summary
        self.logger.info("="*60)
        self.logger.info("RANKMATH SEO UPDATE SUMMARY")
        self.logger.info("="*60)
        self.logger.info(f"✅ Successfully updated: {len(self.updated_posts)}")
        self.logger.info(f"❌ Failed updates: {len(self.failed_posts)}")
        self.logger.info(f"⚠️ URLs not found: {len(self.not_found_urls)}")
        self.logger.info("="*60)

def main():
    # Configuration - UPDATE THESE VALUES
    WORDPRESS_URL = 'https://farmonaut.com'  # Update this
    USERNAME = 'your-username'  # Update this
    APP_PASSWORD = 'your-app-password'  # Update this (WordPress Application Password, not regular password!)
    USERNAME = "ankuromar296"
    APP_PASSWORD = "Tjat A2hz 9XMv pXJi YbV0 GR8o"
    OPENAI_API_KEY = "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"  # Replace with your actual OpenAI API key

    #OPENAI_API_KEY = 'your-openai-api-key'  # Add your OpenAI API key here (optional but recommended)
    EXCEL_FILE_PATH = 'gsc_analysis_english_filtered.xlsx'  # Path to your GSC analysis file
    
    # Processing options
    DRY_RUN = False  # Set to False when ready to make actual updates
    MAX_POSTS = None  # Set to None to process all posts, or a number to limit
    SHEET_NAME = 'English_Queries_Position_15+'  # Sheet name in your Excel file
    
    print("🚀 Starting WordPress RankMath SEO Updater with OpenAI")
    print("="*60)
    
    # Initialize updater
    updater = WordPressSEOUpdater(WORDPRESS_URL, USERNAME, APP_PASSWORD, OPENAI_API_KEY)
    
    # Test connection
    print("🔗 Testing WordPress connection...")
    if not updater.test_connection():
        print("❌ Failed to connect to WordPress. Please check your credentials.")
        print("\nTroubleshooting:")
        print("1. Ensure you're using WordPress Application Password (not regular password)")
        print("2. Check if WordPress REST API is enabled")
        print("3. Verify your WordPress URL is correct")
        print("4. Make sure your user has sufficient permissions")
        return
    
    print("✅ WordPress connection successful!")
    
    try:
        # Check if Excel file exists
        if not os.path.exists(EXCEL_FILE_PATH):
            print(f"❌ Excel file not found: {EXCEL_FILE_PATH}")
            return
        
        print(f"📊 Processing Excel file: {EXCEL_FILE_PATH}")
        print(f"📄 Sheet: {SHEET_NAME}")
        print(f"🔄 Mode: {'DRY RUN' if DRY_RUN else 'LIVE UPDATE'}")
        print(f"🤖 OpenAI: {'Enabled' if updater.use_openai else 'Disabled (using basic optimization)'}")
        if MAX_POSTS:
            print(f"📈 Max posts: {MAX_POSTS}")
        
        # Process the Excel file
        updater.process_excel_file(
            excel_file_path=EXCEL_FILE_PATH,
            sheet_name=SHEET_NAME,
            dry_run=DRY_RUN,
            max_posts=MAX_POSTS
        )
        
        print("\n🎉 Processing completed successfully!")
        
    except FileNotFoundError:
        print(f"❌ Excel file not found: {EXCEL_FILE_PATH}")
        print("Please make sure the file exists and the path is correct.")
    except Exception as e:
        print(f"❌ Error: {e}")
        print("\n🔧 Troubleshooting checklist:")
        print("1. WordPress Application Password is correctly set")
        print("2. Excel file exists and has the correct sheet name")
        print("3. WordPress REST API is enabled")
        print("4. RankMath SEO plugin is installed and active")
        print("5. User has sufficient permissions")

if __name__ == "__main__":
    main()