import pandas as pd
import requests
import json
import time
import logging
from urllib.parse import urlparse, urljoin
from datetime import datetime
import os
import openai

class WordPressSEOUpdater:
    def __init__(self, wordpress_url, username, app_password, openai_api_key=None, history_file='processed_posts.json'):
        """
        Initialize the WordPress SEO updater for RankMath
        
        Args:
            wordpress_url: Your WordPress site URL (e.g., 'https://yoursite.com')
            username: WordPress username
            app_password: WordPress application password (not regular password)
            openai_api_key: OpenAI API key for AI-powered SEO optimization
            history_file: File to store history of processed post IDs
        """
        self.wordpress_url = wordpress_url.rstrip('/')
        self.api_base = f"{self.wordpress_url}/wp-json/wp/v2"
        self.username = username
        self.app_password = app_password
        self.session = requests.Session()
        self.session.auth = (username, app_password)
        self.history_file = history_file
        
        # Initialize OpenAI client if API key is provided
        self.openai_client = None
        self.use_openai = False
        if openai_api_key:
            try:
                self.openai_client = openai.OpenAI(api_key=openai_api_key)
                self.use_openai = True
                self.logger = logging.getLogger(__name__)
                self.logger.info("✅ OpenAI API initialized successfully")
            except Exception as e:
                self.logger = logging.getLogger(__name__)
                self.logger.warning(f"⚠️ OpenAI API initialization failed: {e}. Falling back to basic optimization.")
        
        # Setup logging
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler('wordpress_seo_update.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger(__name__)
        
        # Lists to track results
        self.updated_posts = []
        self.failed_posts = []
        self.not_found_urls = []
        self.skipped_posts = []
        
        # Load processed posts history
        self.processed_posts = self.load_processed_posts()
        
    def load_processed_posts(self):
        """Load the history of processed post IDs from the history file"""
        try:
            if os.path.exists(self.history_file):
                with open(self.history_file, 'r') as f:
                    data = json.load(f)
                    self.logger.info(f"Loaded {len(data)} previously processed posts from {self.history_file}")
                    return set(data.keys())  # Store post IDs as a set for O(1) lookup
            return set()
        except Exception as e:
            self.logger.warning(f"Failed to load processed posts history: {e}")
            return set()
    
    def save_processed_post(self, post_id):
        """Save a post ID to the history file with timestamp"""
        try:
            data = {}
            if os.path.exists(self.history_file):
                with open(self.history_file, 'r') as f:
                    data = json.load(f)
            
            data[str(post_id)] = {
                'last_updated': datetime.now().isoformat(),
                'post_id': post_id
            }
            
            with open(self.history_file, 'w') as f:
                json.dump(data, f, indent=2)
            self.logger.info(f"Saved post ID {post_id} to history file")
        except Exception as e:
            self.logger.error(f"Failed to save post ID {post_id} to history: {e}")
    
    def test_connection(self):
        """Test WordPress API connection"""
        try:
            response = self.session.get(f"{self.api_base}/users/me")
            if response.status_code == 200:
                user_data = response.json()
                self.logger.info(f"Successfully connected to WordPress as: {user_data.get('name', 'Unknown')}")
                return True
            else:
                self.logger.error(f"Connection failed. Status: {response.status_code}, Response: {response.text}")
                return False
        except Exception as e:
            self.logger.error(f"Connection test failed: {str(e)}")
            return False
    
    def extract_post_slug_from_url(self, url):
        """
        Extract post slug from WordPress URL
        
        Args:
            url: Full WordPress post URL
        """
        try:
            parsed_url = urlparse(url)
            path = parsed_url.path.strip('/')
            
            # Handle different WordPress permalink structures
            path_parts = path.split('/')
            
            # Get the last non-empty part as slug
            slug = None
            for part in reversed(path_parts):
                if part and not part.isdigit():
                    slug = part
                    break
            
            return slug
        except Exception as e:
            self.logger.error(f"Error extracting slug from URL {url}: {str(e)}")
            return None
    
    def get_post_by_slug(self, slug):
        """
        Get WordPress post by slug
        
        Args:
            slug: Post slug
        """
        try:
            response = self.session.get(f"{self.api_base}/posts", params={'slug': slug})
            if response.status_code == 200:
                posts = response.json()
                if posts:
                    return posts[0], 'post'
            
            response = self.session.get(f"{self.api_base}/pages", params={'slug': slug})
            if response.status_code == 200:
                pages = response.json()
                if pages:
                    return pages[0], 'page'
            
            return None, None
        except Exception as e:
            self.logger.error(f"Error getting post by slug {slug}: {str(e)}")
            return None, None
    
    def get_rankmath_meta(self, post_id, post_type='post'):
        """
        Get RankMath SEO meta data for a post
        
        Args:
            post_id: WordPress post ID
            post_type: 'post' or 'page'
        """
        try:
            endpoint = f"{self.api_base}/{post_type}s/{post_id}"
            response = self.session.get(endpoint, params={'context': 'edit'})
            
            if response.status_code == 200:
                post_data = response.json()
                meta_data = post_data.get('meta', {})
                
                rankmath_data = {
                    'title': meta_data.get('rank_math_title', ''),
                    'description': meta_data.get('rank_math_description', ''),
                    'focus_keyword': meta_data.get('rank_math_focus_keyword', ''),
                    'canonical_url': meta_data.get('rank_math_canonical_url', ''),
                    'robots': meta_data.get('rank_math_robots', []),
                    'og_title': meta_data.get('rank_math_facebook_title', ''),
                    'og_description': meta_data.get('rank_math_facebook_description', ''),
                    'twitter_title': meta_data.get('rank_math_twitter_title', ''),
                    'twitter_description': meta_data.get('rank_math_twitter_description', '')
                }
                
                if not rankmath_data['title']:
                    rankmath_data['title'] = post_data.get('title', {}).get('rendered', '')
                
                return rankmath_data
            
            return {}
        except Exception as e:
            self.logger.error(f"Error getting RankMath meta for post {post_id}: {str(e)}")
            return {}
    
    def update_post_seo(self, post_id, new_title, new_meta_description, post_type='post', focus_keyword=None, update_social_meta=True):
        """
        Update post title and meta description using RankMath
        
        Args:
            post_id: WordPress post ID
            new_title: New post title
            new_meta_description: New meta description
            post_type: 'post' or 'page'
            focus_keyword: Optional focus keyword for RankMath
            update_social_meta: Whether to update social media meta tags
        """
        try:
            endpoint = f"{self.api_base}/{post_type}s/{post_id}"
            
            update_data = {
                'title': new_title
            }
            
            meta_updates = {}
            
            if new_meta_description:
                meta_updates['rank_math_description'] = new_meta_description
            
            if new_title:
                meta_updates['rank_math_title'] = new_title
            
            if focus_keyword:
                meta_updates['rank_math_focus_keyword'] = focus_keyword
            
            if update_social_meta:
                if new_title:
                    meta_updates['rank_math_facebook_title'] = new_title
                    meta_updates['rank_math_twitter_title'] = new_title
                if new_meta_description:
                    meta_updates['rank_math_facebook_description'] = new_meta_description
                    meta_updates['rank_math_twitter_description'] = new_meta_description
            
            if meta_updates:
                update_data['meta'] = meta_updates
            
            response = self.session.post(endpoint, json=update_data)
            
            if response.status_code == 200:
                self.logger.info(f"Successfully updated {post_type} ID {post_id} with RankMath SEO data")
                return True, response.json()
            else:
                self.logger.error(f"Failed to update {post_type} ID {post_id}. Status: {response.status_code}, Response: {response.text}")
                return False, response.text
                
        except Exception as e:
            self.logger.error(f"Error updating {post_type} ID {post_id}: {str(e)}")
            return False, str(e)
    
    def generate_seo_with_openai(self, slug, current_title, current_description, query, position=None, post_content_excerpt=None):
        """
        Use OpenAI API to generate optimized SEO title and meta description
        
        Args:
            slug: Post slug/URL path
            current_title: Current post title
            current_description: Current meta description
            query: Search query from GSC
            position: Current ranking position
            post_content_excerpt: First few paragraphs of post content (optional)
            
        Returns:
            tuple: (optimized_title, optimized_description, focus_keyword, success)
        """
        if not self.use_openai:
            return None, None, None, False
        
        try:
            prompt = f"""
You are an expert SEO specialist. I need you to optimize the title and meta description for a WordPress post to improve its Google Search Console ranking.

**Current Post Information:**
- Post Slug: {slug}
- Current Title: {current_title}
- Current Meta Description: {current_description or 'None'}
- Target Search Query: {query}
- Current Position: {position if position else 'Unknown'}

**Post Content Preview:**
{post_content_excerpt or 'Not available'}

**Requirements:**
1. Create an SEO-optimized title (max 60 characters) that:
   - Includes the target keyword naturally
   - Is compelling and click-worthy
   - Maintains the original topic/intent
   - Uses power words when appropriate

2. Create an SEO-optimized meta description (max 160 characters) that:
   - Includes the target keyword naturally
   - Clearly describes what the reader will learn
   - Includes a call-to-action
   - Is compelling and encourages clicks

3. Suggest the primary focus keyword for RankMath

**Important Guidelines:**
- The title and description should feel natural, not keyword-stuffed
- Maintain the original meaning and intent of the content
- Make it compelling for human readers, not just search engines
- Consider search intent behind the query

Please respond in this exact JSON format:
{{
    "optimized_title": "Your optimized title here",
    "optimized_description": "Your optimized meta description here",
    "focus_keyword": "primary keyword phrase",
    "reasoning": "Brief explanation of your optimization strategy"
}}
"""
            response = self.openai_client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": "You are an SEO expert specializing in optimizing web content."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=1000,
                temperature=0.3,
            )
            
            response_text = response.choices[0].message.content.strip()
            
            try:
                start_idx = response_text.find('{')
                end_idx = response_text.rfind('}') + 1
                json_str = response_text[start_idx:end_idx]
                
                result = json.loads(json_str)
                
                optimized_title = result.get('optimized_title', '')
                optimized_description = result.get('optimized_description', '')
                focus_keyword = result.get('focus_keyword', query.lower())
                reasoning = result.get('reasoning', '')
                
                if len(optimized_title) > 60:
                    optimized_title = optimized_title[:57] + "..."
                if len(optimized_description) > 160:
                    optimized_description = optimized_description[:157] + "..."
                
                self.logger.info(f"🤖 OpenAI optimization reasoning: {reasoning}")
                
                return optimized_title, optimized_description, focus_keyword, True
                
            except json.JSONDecodeError:
                self.logger.error(f"Failed to parse OpenAI response as JSON: {response_text}")
                return None, None, None, False
                
        except Exception as e:
            self.logger.error(f"OpenAI API error: {str(e)}")
            return None, None, None, False
    
    def get_post_content_excerpt(self, post_data, max_length=500):
        """
        Extract the first part of post content for OpenAI context
        
        Args:
            post_data: WordPress post data
            max_length: Maximum length of excerpt
            
        Returns:
            str: Post content excerpt
        """
        try:
            content = post_data.get('content', {}).get('rendered', '')
            
            import re
            clean_content = re.sub(r'<[^>]+>', ' ', content)
            clean_content = re.sub(r'\s+', ' ', clean_content).strip()
            
            if len(clean_content) > max_length:
                sentences = clean_content[:max_length].split('.')
                if len(sentences) > 1:
                    return '.'.join(sentences[:-1]) + '.'
                else:
                    return clean_content[:max_length] + "..."
            
            return clean_content
            
        except Exception as e:
            self.logger.warning(f"Could not extract post content excerpt: {e}")
            return None
    
    def generate_new_title_and_description(self, slug, query, current_title, current_description, position=None, post_data=None):
        """
        Generate new title and meta description based on query and current ranking position
        Uses OpenAI API if available, otherwise falls back to basic optimization
        
        Args:
            slug: Post slug
            query: The search query from GSC
            current_title: Current post title
            current_description: Current meta description
            position: Current ranking position (optional)
            post_data: Full post data for content context
            
        Returns:
            tuple: (new_title, new_meta_description, focus_keyword)
        """
        if self.use_openai and post_data:
            post_content_excerpt = self.get_post_content_excerpt(post_data)
            
            openai_title, openai_description, openai_keyword, openai_success = self.generate_seo_with_openai(
                slug, current_title, current_description, query, position, post_content_excerpt
            )
            
            if openai_success and openai_title and openai_description:
                self.logger.info("🤖 Using OpenAI-optimized SEO content")
                return openai_title, openai_description, openai_keyword
            else:
                self.logger.warning("⚠️ OpenAI optimization failed, falling back to basic optimization")
        
        self.logger.info("🔧 Using basic SEO optimization")
        
        clean_query = query.strip().title()
        focus_keyword = query.lower()
        
        new_title = current_title
        
        if query.lower() not in current_title.lower():
            new_title = f"{clean_query} - {current_title}"
        
        if len(new_title) > 60:
            if len(clean_query) < 40:
                remaining_chars = 57 - len(clean_query) - 3
                shortened_title = current_title[:remaining_chars]
                new_title = f"{clean_query} - {shortened_title}..."
            else:
                new_title = new_title[:57] + "..."
        
        new_description = current_description
        
        if not current_description or len(current_description) < 50:
            new_description = f"Discover everything about {query}. {current_title} - comprehensive guide with expert insights and practical tips."
        elif query.lower() not in current_description.lower():
            new_description = f"{query.title()}: {current_description}"
        
        if len(new_description) > 160:
            new_description = new_description[:157] + "..."
        
        if focus_keyword not in new_description.lower():
            words = new_description.split()
            if len(words) > 10:
                insert_point = min(8, len(words) // 2)
                words.insert(insert_point, f"({focus_keyword})")
                new_description = " ".join(words)
                
                if len(new_description) > 160:
                    new_description = new_description[:157] + "..."
        
        return new_title, new_description, focus_keyword
    
    def process_excel_file(self, excel_file_path, sheet_name='English_Queries_Position_15+', dry_run=False, max_posts=None, force_update=False):
        """
        Process the Excel file and update WordPress posts
        
        Args:
            excel_file_path: Path to the Excel file from GSC analysis
            sheet_name: Sheet name to process
            dry_run: If True, only simulate updates without making changes
            max_posts: Maximum number of posts to process (None for all)
            force_update: If True, update posts even if they were previously processed
        """
        try:
            df = pd.read_excel(excel_file_path, sheet_name=sheet_name)
            self.logger.info(f"Loaded {len(df)} rows from {sheet_name}")
            
            df_with_pages = df[df['best_matching_page'].notna() & (df['best_matching_page'] != '')].copy()
            
            if 'position' in df_with_pages.columns:
                df_with_pages = df_with_pages.sort_values('position', ascending=False)
            
            if max_posts:
                df_with_pages = df_with_pages.head(max_posts)
            
            self.logger.info(f"Found {len(df_with_pages)} queries with matching pages to process")
            
            total_processed = 0
            
            for index, row in df_with_pages.iterrows():
                query = row['query']
                page_url = row['best_matching_page']
                position = row.get('position', None)
                match_type = row.get('match_type', 'Unknown')
                
                self.logger.info(f"Processing query: '{query}' (Position: {position}) -> {page_url}")
                
                slug = self.extract_post_slug_from_url(page_url)
                if not slug:
                    self.logger.warning(f"Could not extract slug from URL: {page_url}")
                    self.not_found_urls.append({
                        'url': page_url,
                        'query': query,
                        'position': position,
                        'reason': 'Could not extract slug'
                    })
                    continue
                
                post_data, post_type = self.get_post_by_slug(slug)
                if not post_data:
                    self.logger.warning(f"Post not found for slug: {slug} (URL: {page_url})")
                    self.not_found_urls.append({
                        'url': page_url,
                        'query': query,
                        'position': position,
                        'reason': 'Post not found in WordPress'
                    })
                    continue
                
                post_id = post_data['id']
                
                # Check if post was previously processed
                if str(post_id) in self.processed_posts and not force_update:
                    self.logger.info(f"Skipping post ID {post_id} (already processed)")
                    self.skipped_posts.append({
                        'post_id': post_id,
                        'url': page_url,
                        'query': query,
                        'position': position,
                        'status': 'Skipped (Previously Processed)'
                    })
                    continue
                
                current_title = post_data['title']['rendered']
                rankmath_meta = self.get_rankmath_meta(post_id, post_type)
                current_description = rankmath_meta.get('description', '')
                
                new_title, new_meta_description, focus_keyword = self.generate_new_title_and_description(
                    slug, query, current_title, current_description, position, post_data
                )
                
                self.logger.info(f"Current title: {current_title}")
                self.logger.info(f"New title: {new_title}")
                self.logger.info(f"Current description: {current_description}")
                self.logger.info(f"New description: {new_meta_description}")
                self.logger.info(f"Focus keyword: {focus_keyword}")
                
                if dry_run:
                    self.logger.info("DRY RUN - Would update post but skipping actual update")
                    self.updated_posts.append({
                        'post_id': post_id,
                        'url': page_url,
                        'query': query,
                        'position': position,
                        'old_title': current_title,
                        'new_title': new_title,
                        'old_description': current_description,
                        'new_description': new_meta_description,
                        'focus_keyword': focus_keyword,
                        'status': 'DRY RUN'
                    })
                else:
                    success, result = self.update_post_seo(
                        post_id, new_title, new_meta_description, post_type, focus_keyword
                    )
                    
                    if success:
                        self.updated_posts.append({
                            'post_id': post_id,
                            'url': page_url,
                            'query': query,
                            'position': position,
                            'old_title': current_title,
                            'new_title': new_title,
                            'old_description': current_description,
                            'new_description': new_meta_description,
                            'focus_keyword': focus_keyword,
                            'status': 'Updated'
                        })
                        self.logger.info(f"✅ Successfully updated post ID {post_id}")
                        # Save to history only on successful update
                        self.save_processed_post(post_id)
                        self.processed_posts.add(str(post_id))
                    else:
                        self.failed_posts.append({
                            'post_id': post_id,
                            'url': page_url,
                            'query': query,
                            'position': position,
                            'error': result,
                            'status': 'Failed'
                        })
                        self.logger.error(f"❌ Failed to update post ID {post_id}: {result}")
                
                total_processed += 1
                
                time.sleep(2)
                
                if total_processed % 5 == 0:
                    self.logger.info(f"Processed {total_processed}/{len(df_with_pages)} posts...")
            
            self.logger.info(f"Processing complete. Total processed: {total_processed}")
            self.generate_reports()
            
        except Exception as e:
            self.logger.error(f"Error processing Excel file: {str(e)}")
            raise
    
    def generate_reports(self):
        """Generate reports for updated posts, failures, not found URLs, and skipped posts"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        if self.updated_posts:
            updated_df = pd.DataFrame(self.updated_posts)
            updated_file = f"updated_posts_{timestamp}.xlsx"
            updated_df.to_excel(updated_file, index=False)
            self.logger.info(f"✅ Updated posts report saved to: {updated_file}")
        
        if self.failed_posts:
            failed_df = pd.DataFrame(self.failed_posts)
            failed_file = f"failed_posts_{timestamp}.xlsx"
            failed_df.to_excel(failed_file, index=False)
            self.logger.info(f"❌ Failed posts report saved to: {failed_file}")
        
        if self.not_found_urls:
            not_found_df = pd.DataFrame(self.not_found_urls)
            not_found_file = f"not_found_urls_{timestamp}.xlsx"
            not_found_df.to_excel(not_found_file, index=False)
            self.logger.info(f"⚠️ Not found URLs report saved to: {not_found_file}")
        
        if self.skipped_posts:
            skipped_df = pd.DataFrame(self.skipped_posts)
            skipped_file = f"skipped_posts_{timestamp}.xlsx"
            skipped_df.to_excel(skipped_file, index=False)
            self.logger.info(f"⏭️ Skipped posts report saved to: {skipped_file}")
        
        self.logger.info("="*60)
        self.logger.info("RANKMATH SEO UPDATE SUMMARY")
        self.logger.info("="*60)
        self.logger.info(f"✅ Successfully updated: {len(self.updated_posts)}")
        self.logger.info(f"❌ Failed updates: {len(self.failed_posts)}")
        self.logger.info(f"⚠️ URLs not found: {len(self.not_found_urls)}")
        self.logger.info(f"⏭️ Skipped (previously processed): {len(self.skipped_posts)}")
        self.logger.info("="*60)

def main():
    # Configuration - UPDATE THESE VALUES
    WORDPRESS_URL = 'https://farmonaut.com'  # Update this
    # USERNAME = 'your-username'  # Update this
    # APP_PASSWORD = 'your-app-password'  # Update this (WordPress Application Password, not regular password!)
    USERNAME = "ankuromar296"
    APP_PASSWORD = "Tjat A2hz 9XMv pXJi YbV0 GR8o"
    OPENAI_API_KEY = "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"  # Replace with your actual OpenAI API key
    
    # Processing options
    DRY_RUN = True
    MAX_POSTS = 10
    SHEET_NAME = 'English_Queries_Position_15+'
    FORCE_UPDATE = False  # Set to True to update posts even if previously processed
    
    print("🚀 Starting WordPress RankMath SEO Updater with OpenAI")
    print("="*60)
    
    updater = WordPressSEOUpdater(WORDPRESS_URL, USERNAME, APP_PASSWORD, OPENAI_API_KEY)
    
    print("🔗 Testing WordPress connection...")
    if not updater.test_connection():
        print("❌ Failed to connect to WordPress. Please check your credentials.")
        print("\nTroubleshooting:")
        print("1. Ensure you're using WordPress Application Password (not regular password)")
        print("2. Check if WordPress REST API is enabled")
        print("3. Verify your WordPress URL is correct")
        print("4. Make sure your user has sufficient permissions")
        return
    
    print("✅ WordPress connection successful!")
    
    try:
        if not os.path.exists(EXCEL_FILE_PATH):
            print(f"❌ Excel file not found: {EXCEL_FILE_PATH}")
            return
        
        print(f"📊 Processing Excel file: {EXCEL_FILE_PATH}")
        print(f"📄 Sheet: {SHEET_NAME}")
        print(f"🔄 Mode: {'DRY RUN' if DRY_RUN else 'LIVE UPDATE'}")
        print(f"🤖 OpenAI: {'Enabled' if updater.use_openai else 'Disabled (using basic optimization)'}")
        print(f"🔄 Force Update: {'Enabled' if FORCE_UPDATE else 'Disabled (skipping previously processed posts)'}")
        if MAX_POSTS:
            print(f"📈 Max posts: {MAX_POSTS}")
        
        updater.process_excel_file(
            excel_file_path=EXCEL_FILE_PATH,
            sheet_name=SHEET_NAME,
            dry_run=DRY_RUN,
            max_posts=MAX_POSTS,
            force_update=FORCE_UPDATE
        )
        
        print("\n🎉 Processing completed successfully!")
        
    except FileNotFoundError:
        print(f"❌ Excel file not found: {EXCEL_FILE_PATH}")
        print("Please make sure the file exists and the path is correct.")
    except Exception as e:
        print(f"❌ Error: {e}")
        print("\n🔧 Troubleshooting checklist:")
        print("1. WordPress Application Password is correctly set")
        print("2. Excel file exists and has the correct sheet name")
        print("3. WordPress REST API is enabled")
        print("4. RankMath SEO plugin is installed and active")
        print("5. User has sufficient permissions")

if __name__ == "__main__":
    main()