import requests import time from google.cloud import firestore import firebase_admin from firebase_admin import credentials from google.cloud.firestore_v1.base_query import FieldFilter import traceback # Initialize Firestore client from collections import Counter import re def get_keywords(text, top_n=100): # Define a simple set of stopwords stopwords = { "a", "an", "the", "we", "were", "you", "your", "yours", "he", "she", "it", "they", "them", "his", "her", "our", "ours", "their", "was", "is", "are", "am", "be", "been", "to", "in", "of", "and", "or", "on", "for", "with", "as", "by", "at", "from", "this", "that", "these", "those" } # Convert text to lowercase and extract words words = re.findall(r"\b\w+\b", text.lower()) # Filter out stopwords filtered_words = [word for word in words if word not in stopwords] # Count word frequencies word_counts = Counter(filtered_words) # Return top N keywords as a list return [word for word, _ in word_counts.most_common(top_n)] cred = credentials.Certificate("servicekey.json") try: firebase_admin.initialize_app(cred) except Exception as e: print(e) db = firestore.Client() # Endpoint URL url = "https://us-central1-farmbase-b2f7e.cloudfunctions.net/create_advisory_blog_predata2" def get_jeevnai_advisories(): to_time = int(time.time()) from_time = int((to_time - 60 * 60 * 24*10)) # JSON payload payload = { "isTesting": True, "RequiredBlogsCount": 10, "FromTime": from_time, "ToTime": to_time, "AvoidFromDays": 30 } # print(payload) # Send POST request with JSON data response = requests.post(url, json=payload) all_content = [] all_keywords = [] try: #print(response.text) response_json = response.json() #print("Response JSON:", response_json) crop_blogs = response_json.get("Crops", None) #print(crop_blogs) if crop_blogs is not None: for crop_name, crop_blog_keys in crop_blogs.items(): #print(crop_name, crop_blog_keys) final_advisories_arr = [] # print("query_started", crop_blog_keys) for single_key in crop_blog_keys: single_key = str(single_key) advisory_ref = db.collection("JEEVN_AI_advisories").where(filter=FieldFilter("timestamp", "==", single_key)).get() # print("query_complete") # Print results for doc in advisory_ref: # print(f"{doc.id} => {doc.to_dict()}") advisory_obj = doc.to_dict() advisory_obj["UID"] = "None" advisory_obj["fieldID"] = "None" advisory_obj["fieldName"] = "None" final_advisories_arr.append(str(advisory_obj)) #keywords = get_keywords(str(final_advisories_arr), top_n=100) content_obj = { 'body': final_advisories_arr } all_content.append(content_obj) #all_keywords.append(keywords) # print(final_advisories_obj) #time.sleep(100000) #print(all_content) return all_content, '' except: print(traceback.format_exc())