import json import os from huggingface_hub import InferenceClient from moviepy.editor import VideoFileClip, concatenate_videoclips, vfx, AudioFileClip, CompositeAudioClip from get_news_articles import get_news_articles from eventregistry import * from datetime import datetime, timedelta from tqdm import tqdm from openai import OpenAI import traceback import time import ast from pydub import AudioSegment from gdrive_upload import upload_files_to_drive from pathlib import Path from mutagen.mp3 import MP3 import librosa import soundfile as sf # Create directory structure for each article def ensure_article_directories(base_dir, date_str, article_folder): """ base_dir: main articles directory (e.g., 'articles') date_str: date string in YYYY-MM-DD format article_folder: unique article folder name (e.g., '123456_Title') """ paths = [ os.path.join(base_dir, date_str, article_folder, "content"), os.path.join(base_dir, date_str, article_folder, "assets/videos"), os.path.join(base_dir, date_str, article_folder, "assets/audio"), os.path.join(base_dir, date_str, article_folder, "assets/images"), os.path.join(base_dir, date_str, article_folder, "output"), ] for path in paths: os.makedirs(path, exist_ok=True) # API keys and clients setup hf2 = "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" model_key = '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' client = OpenAI(api_key=model_key) #farmonaut_info farmonaut_info = ''' Farmonaut is an agricultural technology company founded in 2018 that leverages satellite imagery, artificial intelligence (AI), and remote sensing to provide precision farming solutions accessible via Android, iOS, web platforms, and APIs. Its platform enables farmers to monitor crop health in real-time using vegetation indices like NDVI, NDRE, and EVI, detect early signs of stress, assess soil moisture, and receive personalized advisories through its Jeevn AI system. By integrating satellite data with AI-driven analytics, Farmonaut assists farmers in making informed decisions on irrigation, fertilization, and pest control, leading to optimized resource use and improved yields. The platform supports features such as yield prediction, field boundary detection, and historical crop performance analysis, making it suitable for both small-scale growers and large agribusinesses. Farmonaut's services are designed to be affordable and user-friendly, aiming to democratize access to advanced agricultural technologies and promote sustainable farming practices globally. ''' # Narration guidelines unchanged... narration_guidelines = { "video_narration_guidelines": { "attention_grabbing_start": { "description": "Hook viewers emotionally or with curiosity to immediately engage them." }, "script_writing": { "description": "Write clear, concise, and conversational scripts aligned with visuals.", "key_points": [ "Use simple, relatable language", "Avoid repetition and unnecessary words", "Focus on a single clear message or topic" ] }, "pacing_and_timing": { "description": "Maintain natural pace with pauses and sync narration with visuals.", "key_points": [ "Avoid too fast or too slow pacing", "Include pauses for effect", "Match narration timing with video transitions" ] }, "vocal_tone_and_inflection": { "description": "Use a friendly, clear, and varied tone suited to the audience.", "key_points": [ "Speak clearly and concisely", "Avoid monotone or overly dramatic voices", "Adjust energy level based on video type" ] }, "audio_quality": { "description": "Ensure high-quality audio with good microphones and quiet recording environment.", "key_points": [ "Use good quality microphones", "Record in quiet, acoustically treated spaces", "Maintain consistent audio levels" ] }, "length": { "description": "Keep narration length appropriate to video length and content depth.", "key_points": [ "Aim for about 125 words per minute", "Longer videos (~10 minutes) can perform better if content is relevant" ] }, "storytelling_and_energy": { "description": "Incorporate storytelling and enthusiasm to increase engagement.", "key_points": [ "Use storytelling elements for immersion", "Bring energy and passion to narration" ] }, "audience_and_platform_tailoring": { "description": "Adapt narration style, tone, and length to the platform and audience.", "key_points": [ "Tailor style for social media vs webinars", "Consider audience demographics and preferences" ] }, "captions_and_transcripts": { "description": "Add captions and transcripts to improve accessibility and SEO." } } } def merge_audio_video(video_path, audio_path, output_path=None, audio_start=0, keep_original_audio=False, audio_volume=1.0): """ Merge audio and video files using MoviePy. Parameters: ----------- video_path : str Path to the video file audio_path : str Path to the audio file output_path : str, optional Path to save the output file. If None, creates a file with '_merged' suffix in the same directory audio_start : float, optional Time in seconds where the audio should start in the video (default: 0) keep_original_audio : bool, optional Whether to keep the original video audio (default: False) audio_volume : float, optional Volume level for the added audio (default: 1.0) Returns: -------- str Path to the output video file """ try: # Load video and audio clips video_clip = VideoFileClip(video_path) audio_clip = AudioFileClip(audio_path) # Set audio start time and duration audio_clip = audio_clip.set_start(audio_start) # Clip audio to match video duration max_audio_duration = max(0, video_clip.duration - audio_start) if audio_clip.duration > max_audio_duration: audio_clip = audio_clip.subclip(0, max_audio_duration) # Set audio volume audio_clip = audio_clip.volumex(audio_volume) # Create composite audio if keeping original audio if keep_original_audio and video_clip.audio is not None: final_audio = CompositeAudioClip([video_clip.audio, audio_clip]) else: final_audio = audio_clip # Set the audio to the video clip final_clip = video_clip.set_audio(final_audio) # Generate output path if not provided if output_path is None: filename, ext = os.path.splitext(video_path) output_path = f"{filename}_merged{ext}" # Write the output file final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac") # Close clips to free resources video_clip.close() audio_clip.close() final_clip.close() return output_path except Exception as e: print(f"Error merging audio and video: {e}") return None def update_video_speed(video_file_name, output_video_file_name, target_duration): # Load your video clip = VideoFileClip(video_file_name) # Calculate the speed factor speed_factor = clip.duration / target_duration # Apply speed change new_clip = clip.fx(vfx.speedx, factor=speed_factor) # Write the result to a file new_clip.write_videofile(output_video_file_name) # Close clips clip.close() new_clip.close() # In the generate_videos_from_json function def generate_videos_from_json(video_prompts, video_dir_prefix, target_duration): """ Generate videos from prompts and merge them into a single video, saving all files in the given directory. Args: video_prompts: List of video prompt strings video_dir_prefix: Path prefix for saving video files (e.g., .../assets/videos/Clean_Title) target_duration: Target duration in seconds Returns: Path to the final merged video """ # Use the provided directory for all video files temp_directory = os.path.dirname(video_dir_prefix) os.makedirs(temp_directory, exist_ok=True) # Initialize Hugging Face client client = InferenceClient( provider="fal-ai", api_key="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", ) # Store paths of generated videos video_paths = [] # Generate videos for each prompt for i, prompt in enumerate(video_prompts): # Generate video ai_model = "Wan-AI/Wan2.1-T2V-14B" processed = 0 while processed == 0: try: print(f"Generating video {i+1}/{len(video_prompts)}: '{prompt}'") video_output = client.text_to_video( prompt=prompt, negative_prompt="Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, text, overlay text, subtitles", num_frames=81, guidance_scale=5.0, model=ai_model ) processed = 1 except: print(traceback.format_exc()) time.sleep(30) # Save to local storage segment_path = f"{video_dir_prefix}_segment_{i+1}.mp4" with open(segment_path, "wb") as f: f.write(video_output) video_paths.append(segment_path) print(f"Video segment saved to {segment_path}") # Merge all videos into one print("Merging video segments...") video_clips = [VideoFileClip(path) for path in video_paths] final_clip = concatenate_videoclips(video_clips) # Save the merged video before speed adjustment merged_path = f"{video_dir_prefix}_merged.mp4" final_clip.write_videofile(merged_path, fps = 16) # Close video clips for clip in video_clips: clip.close() final_clip.close() # Adjust speed to match target duration final_path = f"{video_dir_prefix}_final.mp4" update_video_speed(merged_path, final_path, target_duration) print(f"Final speed-adjusted video saved to {final_path}") return final_path def call_genai(prompt): model = "gpt-4.1" model = "gpt-4.1-mini-2025-04-14" completion = client.chat.completions.create( model=model, messages=[ { "role": "user", "content": prompt } ] ) print(completion.choices[0].message.content) return completion.choices[0].message.content def generate_video_script(content, duration, video_segments): total_words = 8*video_segments prompt = f'''I am making a video of {duration} seconds duration based on the content below. The video is divided into equal 5 second segments. How do these 5 second segments look like is provided in the segment_info below. I want the video narration for my {duration} seconds video which will be spoken by a human as the audio of this video. can you provide me the script for this. return the script as a single paragraph. Include how Farmonaut (the company which is making this video) can play a role in this. Can you make this script into a super simple, relatable, and story-style format, so that even someone with no background in this topic can easily understand and stay engaged. Only return the script. No other text. - total_words_in_script:less than {total_words} words - content: {content} - segment_info: {video_segments} - narration_guidelines: {narration_guidelines} - information_about_farmonaut: {farmonaut_info}''' script = call_genai(prompt) return script def text_to_audio(prompt, file_name): # Ensure the path exists os.makedirs(os.path.dirname(file_name), exist_ok=True) with client.audio.speech.with_streaming_response.create( model="gpt-4o-mini-tts", voice="coral", input=prompt, instructions="Speak in a professional tone.", ) as response: response.stream_to_file(file_name) def make_video_scene_prompts(content, x): video_instruction = { "prompt_guidelines": { "structure": { "formula": "Subject + Scene + Motion", "components": { "subject": "The main object or character (e.g., 'a knight', 'a dog', 'a futuristic robot')", "scene": "The environment or setting (e.g., 'in a medieval city at dusk', 'on a rainbow', 'in a neon-lit alley')", "motion": "The action or movement (e.g., 'rides a flying dragon', 'skateboarding', 'walking through the rain')" }, "example": "A knight in shining armor rides a flying dragon over a medieval city at dusk, cinematic lighting, smooth camera pan." }, "details": { "add_cinematic_and_visual_details": [ "lighting (e.g., 'soft morning light', 'cinematic lighting', 'neon glow')", "camera movement (e.g., 'smooth camera pan', 'zoom in', 'overhead shot')", "style (e.g., 'realistic', 'cartoon', 'cyberpunk', 'Van Gogh style')", "atmosphere (e.g., 'foggy', 'cheerful', 'mysterious')" ] }, "focus_and_consistency": { "one_scene_per_prompt": "Each prompt should focus on one subject, one scene, and one primary action.", "avoid_mid_clip_changes": "Do not switch subjects or settings within a single prompt." }, "negative_prompts": { "usage": "Specify what you don't want (e.g., '--no blur, no text, no watermark') to reduce artifacts." }, "advanced_dimensions": { "shot_size": "e.g., 'close-up', 'wide shot'", "angle": "e.g., 'low angle', 'bird’s-eye view'", "lens_type": "e.g., 'fisheye', 'telephoto'", "camera_movement": "e.g., 'tracking shot', 'dolly zoom'", "speed_effects": "e.g., 'slow motion', 'time lapse'", "atmosphere": "e.g., 'foggy', 'cheerful', 'mysterious'", "style": "e.g., 'realistic', 'cartoon', 'cyberpunk', 'Van Gogh style'" }, "generation_settings": { "guidance_scale": "Use a moderate value (5–7). Too high causes flicker, too low causes drift.", "diffusion_steps": "More steps sharpen details but take longer. Start moderate and adjust.", "consistency_aids": "Use tools or workflows that improve frame-to-frame consistency if available." }, "iteration": { "refine_prompt": "Clarify or expand your prompt if the result isn’t as expected.", "adjust_negative_prompts": "Remove unwanted elements as needed.", "tweak_settings": "Adjust guidance and steps for smoother motion or sharper visuals." }, "examples": [ { "type": "Basic Formula", "prompt": "A dog skateboarding on a rainbow, cartoon style, bright daylight, smooth camera tracking." }, { "type": "Cinematic Formula", "prompt": "A detective walks through a neon-lit alley at night, rain falling, cinematic lighting, slow motion." }, { "type": "Artistic Formula", "prompt": "A ballerina dances in a surreal dreamscape, pastel colors, soft focus, inspired by Monet." } ], "pro_tips": [ "Test your prompt as a single image before generating the video.", "For complex scenes, generate separate clips and merge them in post-editing.", "Explore community workflows or prompt libraries for inspiration." ] } } prompt = f''' "I need to create a video containing {x} scenes based on the following content and video_instructions. For each segment, write a concise prompt that includes the relevant slice of the content. Include how Farmonaut (the company which is making this video) can play a role. Output all segment prompts as a comma-separated array. add more details into these scenes to make them more engaging and having the capability to hook the viewer. Note: The array should only have the prompt text. nothing else." Variables: - content: {content} - video_best_guidelines: {video_instruction} - information_about_farmonaut: {farmonaut_info} ''' return call_genai(prompt) if __name__ == "__main__": article_num = 200 geographies = [ ['http://en.wikipedia.org/wiki/United_States', article_num, "en"], ['http://en.wikipedia.org/wiki/Europe', article_num, "NA"], ['http://en.wikipedia.org/wiki/Canada', article_num, "NA"], ['http://en.wikipedia.org/wiki/United_Kingdom', article_num, "en"], ['http://en.wikipedia.org/wiki/New_Zealand', article_num, "en"], ['http://en.wikipedia.org/wiki/South_America', article_num, "NA"], ['http://en.wikipedia.org/wiki/Australia', article_num, "en"], ['http://en.wikipedia.org/wiki/Southeast_Asia', article_num, "NA"], ['http://en.wikipedia.org/wiki/Africa', article_num, "NA"], ] APIKEY = "ae5e326f-a428-41d3-b0c8-09f1746f98b1" # Initialize News API er = EventRegistry(apiKey=APIKEY, allowUseOfArchive=False) duration = 120 total_scenes = duration/5 for geography_obj in geographies: end_date = datetime.today().strftime("%Y-%m-%d") start_date = (datetime.today() - timedelta(days=6)).strftime("%Y-%m-%d") print(start_date, end_date) articles = [] geography = geography_obj[0] max_items = geography_obj[1] news_articles = get_news_articles(er, start_date, end_date, geography, "en", max_items=max_items) for single_article in news_articles: articles.append(single_article) article_count = 0 for article in tqdm(articles, desc="Processing News articles"): try: # Content filtering first content = article.get("body", "No content available") prompt = f'Output YES if the provided text is related to any of the following a. advertisement, b. meat, c. animals, d. mentions pakistan, palestine, north korea or iran, e. suicide, murder, homicide, f.sports, g. religion, h. info/news about an event from 2024 or earlier, i. about a company, j. about a person, k. about a real farmer, l. about a school or a real organization, m. an event/ exhibiton/ summit/ meeting, n. a place of historical significance, o. including politics, leaders or politicians. Only answer YES OR NO. Text: {content}' is_advertisement = call_genai(prompt) print('ad_status', is_advertisement) if is_advertisement.lower() == "yes": continue is_in_domain = call_genai(f'Output YES if the provided text is related to any of the following a. agriculture, b. farming, c. forest. Only answer YES OR NO. Text: {content}') print('domain_status', is_in_domain) if is_in_domain.lower() == "no": continue # worthy_video = call_genai(f'Farmonaut (an agritech startup) is planning to make a video on this topic. Assess the content to decide if this video is suitable for Farmonaut Youtube Channel. Only answer YES OR NO. Text: {content}') # print('is_worthy', worthy_video) # if worthy_video.lower() == "no": # continue content = call_genai(f'I am planning to make a video using the attached reference content. However, I want the video to be informational, general knowledge. Can you go through this content and convert it into an informational, general knowledge of about 500 words. Only return the content. No other text. Text content: {content}') # Only now create folders and save files for articles that will have videos made article_id = str(int(time.time())) article_title = call_genai(f'Can you make an interesting, catchy title for this article in less than 90 characters? I will use it for making a Youtube video. It will be published on Youtube channel of Farmonaut. The title should match with what Farmonaut is and does. The title should have the potential to attract viewers. Only return the title. - Text: {content}. - info_about_farmonaut: {farmonaut_info}') print('title', article_title) video_caption = call_genai(f'Farmonaut (an agritech startup) is planning to make a video on this topic. Can you provide a caption for this video in about 1000 characters with 5-10 relevant hashtags? Only return the caption. No other text. - Text content: {content}, - info_about_farmonaut: {farmonaut_info}, - title: {article_title}') clean_title = "".join(c for c in article_title if c.isalnum() or c.isspace()).strip() clean_title = clean_title.replace(" ", "_")[:50] date_str = datetime.today().strftime("%Y-%m-%d") article_folder = f"{article_id}_{clean_title}" base_dir = "articles" # Create article-specific directories ensure_article_directories(base_dir, date_str, article_folder) article_base_path = os.path.join(base_dir, date_str, article_folder) # Save article_title and video_caption to a single txt file in output folder output_info_path = os.path.join(article_base_path, "output", "video_info.txt") with open(output_info_path, "w") as info_f: info_f.write(f"Title:\n{article_title}\n\nCaption:\n{video_caption}\n") # Save metadata metadata = { "id": article_id, "title": article_title, "clean_title": clean_title, "date": date_str, "geography": geography, "source": article.get("source", ""), "url": article.get("url", ""), "raw_article": article } with open(os.path.join(article_base_path, "metadata.json"), "w") as meta_f: json.dump(metadata, meta_f, indent=2) # Save content with open(os.path.join(article_base_path, "content", "article.txt"), "w") as content_f: content_f.write(content) # Generate video prompts video_prompts = make_video_scene_prompts(content, total_scenes) video_prompts = ast.literal_eval(video_prompts) # Generate script and audio final_script = generate_video_script(content, duration, video_prompts) script_audio_file_name = os.path.join(article_base_path, "assets/audio", f"{clean_title}_{article_id}.mp3") text_to_audio(final_script, script_audio_file_name) # Get audio duration audio = MP3(script_audio_file_name) actual_duration = audio.info.length # Generate and process videos final_video_path = generate_videos_from_json( video_prompts, os.path.join(article_base_path, "assets/videos", clean_title), actual_duration ) # Merge narration audio with video output_path = os.path.join(article_base_path, "output", f"{clean_title}_{article_id}.mp4") print('Merging narration audio and video') narration_merged_path = output_path merge_audio_video( video_path=final_video_path, audio_path=script_audio_file_name, output_path=narration_merged_path, audio_start=0, keep_original_audio=False, audio_volume=1.0 ) # Merge background audio with narration-merged video print('Adding background audio (prism.mp3)') bg_audio_merged_path = os.path.join(article_base_path, "output", f"{clean_title}_{article_id}_with_bg.mp4") merge_audio_video( video_path=narration_merged_path, audio_path="bg_audio/prism.mp3", output_path=bg_audio_merged_path, audio_start=0, keep_original_audio=True, audio_volume=0.1 ) print(f"Final video created at: {bg_audio_merged_path}") # Append join_now.mp4 before watermarking with fade transition from moviepy.editor import VideoFileClip, concatenate_videoclips, vfx join_now_path = "watermarks/join_now.mp4" final_with_joinnow_path = os.path.join(article_base_path, "output", f"{clean_title}_{article_id}_with_bg_joinnow.mp4") fade_duration = 1 # seconds try: with VideoFileClip(bg_audio_merged_path) as main_clip, VideoFileClip(join_now_path) as join_clip: # Apply fade out to the end of the main video main_clip = main_clip.fx(vfx.fadeout, fade_duration) # Apply fade in to the start of the join_now video join_clip = join_clip.fx(vfx.fadein, fade_duration) # Concatenate with a smooth transition final_with_joinnow = concatenate_videoclips([main_clip, join_clip], method="compose") final_with_joinnow.write_videofile(final_with_joinnow_path, codec="libx264", audio_codec="aac") final_with_joinnow.close() except Exception as e: print(f"Error appending join_now.mp4 with fade transition: {e}") # Add watermarks to the final video try: def add_watermarks_to_video(video_path, logo_path, footer_path): from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip import os video = VideoFileClip(video_path) w, h = video.size # Farmonaut logo: 7% of video height, top right logo = ImageClip(logo_path) logo_height = int(h * 0.1) logo = logo.resize(height=logo_height) logo = logo.set_position(("right", "top")).set_start(0).set_duration(video.duration) # Footer: full width, bottom footer = ImageClip(footer_path) footer = footer.resize(width=w) footer_h = footer.size[1] footer = footer.set_position(("center", h - footer_h)).set_start(0).set_duration(video.duration) # Composite final = CompositeVideoClip([video, logo, footer]) # Always output to a new file with _watermarked before extension base, ext = os.path.splitext(video_path) output_path = f"{base}_watermarked{ext}" final.write_videofile(output_path, codec="libx264", audio_codec="aac") video.close() logo.close() footer.close() final.close() return output_path watermarks_dir = "watermarks" logo_path = os.path.join(watermarks_dir, "farmonaut_logo.png") footer_path = os.path.join(watermarks_dir, "footer.png") watermarked_path = add_watermarks_to_video( final_with_joinnow_path, logo_path, footer_path ) print(f"Watermarks added to: {watermarked_path}") # Upload watermarked video and uniquely named video_info.txt to Google Drive try: # Create a uniquely named copy of video_info.txt unique_info_name = f"{clean_title}_{article_id}_video_info.txt" unique_info_path = os.path.join(article_base_path, "output", unique_info_name) import shutil shutil.copyfile(output_info_path, unique_info_path) upload_results = upload_files_to_drive([watermarked_path, unique_info_path]) print("Google Drive upload results:") for f, link in upload_results.items(): print(f"{f}: {link}") # Optionally, remove the temp unique info file after upload try: os.remove(unique_info_path) except Exception as cleanup_e: print(f"Warning: Could not remove temp file {unique_info_path}: {cleanup_e}") except Exception as e: print(f"Error uploading to Google Drive: {e}") except Exception as e: print(f"Error adding watermarks: {traceback.format_exc()}") # print(f"Error creating Jira ticket: {traceback.format_exc()}") article_count = article_count + 1 if article_count % 10: time.sleep(60*60*60) except Exception as e: print(f"Error processing article: {traceback.format_exc()}") continue