import openai import requests from requests.auth import HTTPBasicAuth import json import pandas as pd def update_blog_post( site_url, username, app_password, post_id, new_slug=None, new_title=None, new_excerpt=None, new_content=None, ): url = f"{site_url}/wp-json/wp/v2/posts/{post_id}" auth = HTTPBasicAuth(username, app_password) # Build the payload only with non-None values payload = {} if new_slug is not None: payload["slug"] = new_slug if new_title is not None: payload["title"] = new_title if new_excerpt is not None: payload["excerpt"] = new_excerpt if new_content is not None: payload["content"] = new_content # If nothing to update, return early if not payload: return {"status": "skipped", "reason": "No data to update."} response = requests.post(url, auth=auth, json=payload) return response def is_valid_json(s): """ Checks if the given string `s` is valid JSON. Returns (True, parsed_json) if valid, otherwise (False, error message). """ try: parsed = json.loads(s) return True, parsed except json.JSONDecodeError as e: return False, str(e) def call_llm(prompt): try: response = client.chat.completions.create( # model="gpt-4.1-nano-2025-04-14", model="gpt-4.1-mini-2025-04-14", messages=[ { "role": "system", "content": "You are a helpful assistant that returns structured JSON for relevance classification.", }, {"role": "user", "content": prompt}, ], temperature=0, ) text = response.choices[0].message.content is_json, json_output = is_valid_json(text) return {"json_output": json_output, "is_json": is_json} except Exception as e: print("Error calling OpenAI LLM:", e) return {"relevant": "Unknown", "reason": f"Error: {e}"} def generate_new_title_description( original_slug, original_title, original_description, top_queries ): prompt = f""" You are an SEO optimization expert. I will provide you with a blog's **title**, **meta description**, and **top 10 queries from Google Search Console** for which the blog is ranking. Your task is to **rewrite the title and meta description** to maximize SEO performance, aligning with the provided GSC queries and current SEO best practices. **Guidelines to follow:** **Title Optimization:** - Keep it under **53 characters**. - Place the **primary keyword** in the **first half** of the title. - Use **power words** and **emotional triggers** to increase engagement. - Add **numbers** if appropriate to boost specificity and click-through rates. - Avoid clickbait; ensure it’s compelling yet trustworthy. **Meta Description Optimization:** - Length should be **75–135 characters**. - Mandatory to Use the **Top Queries from GSC** naturally. - Make it **informative and persuasive** to drive clicks. ### Input: - **Title:** {original_title} - **Slug:** {original_slug} - **Meta Description:** {original_description} - **Top Queries from GSC:** {top_queries} ### Output (in JSON format): {{ "title": "...", "meta_description": "..." }} """ json_output_llm = call_llm(prompt) response = json_output_llm["json_output"] return response.get("title", None), response.get("meta_description", None) def fetch_queries_for_the_blog(url): # Define the date range start_date = "2024-05-22" end_date = "2025-05-22" # Specific page URL to filter specific_page = url # "https://farmonaut.com/australia/revolutionizing-australian-agribusiness-farmonauts-innovative-livestock-management-software-for-efficient-farming/" # All responses storage all_responses = [] startRow = 0 while (startRow == 0) or (startRow % 25000 == 0): request = { "startDate": start_date, "endDate": end_date, "dimensions": [ "query", # "date", # You can change this to "date", "device", etc. ], "dimensionFilterGroups": [ { "filters": [ { "dimension": "page", "operator": "equals", "expression": specific_page, } ] } ], "rowLimit": 25000, "dataState": "final", "startRow": startRow, } response = ( webmasters_service.searchanalytics() .query(siteUrl=website, body=request) .execute() ) # Break loop if no rows returned if "rows" not in response: break startRow = startRow + len(response["rows"]) # print("Fetched up to " + str(startRow) + " rows of data") all_responses.extend(response["rows"]) # Convert to DataFrame import pandas as pd if all_responses == []: return pd.DataFrame() df_filtered_123_123 = pd.DataFrame(all_responses) df_filtered_123_123["multiplication_ratio"] = ( df_filtered_123_123["position"] * df_filtered_123_123["impressions"] ) df_filtered_123_123 = df_filtered_123_123.sort_values( by="multiplication_ratio", ascending=False ) return df_filtered_123_123 from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.discovery import build # Path to your OAuth 2.0 client ID file CLIENT_SECRETS_FILE = "oauth_clientID.json" # Define the required scopes SCOPES = ["https://www.googleapis.com/auth/webmasters"] # Run the OAuth flow to get credentials flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRETS_FILE, SCOPES) credentials = flow.run_local_server(port=0, open_browser=False) #credentials = flow.run_console() # Build the service object service = build("webmasters", "v3", credentials=credentials) print("GSC Authentication Process Completed") webmasters_service = service # Get a list of site in my Google Search Console Account site_list = webmasters_service.sites().list().execute() print(site_list) # What is the website I want the sitemap for? website = site_list["siteEntry"][0]["siteUrl"] # Set your OpenAI API key API_KEY = "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" # Replace with your actual OpenAI API key client = openai.OpenAI(api_key=API_KEY) # Replace these with your actual credentials site_url = "https://www.farmonaut.com" username = "ankuromar296" app_password = "Tjat A2hz 9XMv pXJi YbV0 GR8o" file_name = "blogs_to_remove_123.csv" df_blogs_to_remove = pd.read_csv(file_name) for index, row in df_blogs_to_remove.iterrows(): try: print(index) if row["updated"] == True: continue url = row["url_uncleaned"] original_title = row["title"] original_descripition = row["meta_description"] original_slug = row["slug"] blog_id = row["blog_id"] if url == None: continue if pd.isna(url): continue queries = fetch_queries_for_the_blog(url) top_queries = ( queries["keys"] .head(10) ## Top 10 queries .apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None) .to_list() ) new_title, new_description = generate_new_title_description( original_slug, original_title, original_descripition, top_queries ) update_blog_post( site_url, username, app_password, post_id=blog_id, new_title=new_title, new_excerpt=new_description, ) # Mark the row as updated df_blogs_to_remove.at[index, "updated"] = True # Save the CSV after every update df_blogs_to_remove.to_csv(file_name, index=False) if index % 10 == 0: print(index, "processed") #break except Exception as e: print("Error at index:", index, e) continue