import os import json from moviepy.editor import VideoFileClip, concatenate_videoclips, vfx, AudioFileClip, CompositeAudioClip from mutagen.mp3 import MP3 DEFAULT_FPS = 30 # fallback fps if not detected def find_article_folders(base_dir="articles"): for root, dirs, files in os.walk(base_dir): if "assets" in dirs and "output" in dirs: yield root def get_video_segments(assets_videos_dir): return sorted([ os.path.join(assets_videos_dir, f) for f in os.listdir(assets_videos_dir) if f.endswith(".mp4") and "_segment_" in f ]) def merge_segments_with_fps(segments, merged_path, fps=DEFAULT_FPS): video_clips = [VideoFileClip(path) for path in segments] final_clip = concatenate_videoclips(video_clips) # Use fps from first segment if available, else fallback detected_fps = getattr(video_clips[0], "fps", None) if detected_fps is None: detected_fps = fps final_clip.write_videofile(merged_path, fps=detected_fps) for clip in video_clips: clip.close() final_clip.close() def update_video_speed(video_file_name, output_video_file_name, target_duration): clip = VideoFileClip(video_file_name) speed_factor = clip.duration / target_duration new_clip = clip.fx(vfx.speedx, factor=speed_factor) new_clip.write_videofile(output_video_file_name, fps=getattr(clip, "fps", DEFAULT_FPS)) clip.close() new_clip.close() def merge_audio_video(video_path, audio_path, output_path, audio_start=0, keep_original_audio=False, audio_volume=1.0): video_clip = VideoFileClip(video_path) audio_clip = AudioFileClip(audio_path).set_start(audio_start).volumex(audio_volume) max_audio_duration = max(0, video_clip.duration - audio_start) if audio_clip.duration > max_audio_duration: audio_clip = audio_clip.subclip(0, max_audio_duration) if keep_original_audio and video_clip.audio is not None: final_audio = CompositeAudioClip([video_clip.audio, audio_clip]) else: final_audio = audio_clip final_clip = video_clip.set_audio(final_audio) final_clip.write_videofile(output_path, fps=getattr(video_clip, "fps", DEFAULT_FPS), codec="libx264", audio_codec="aac") video_clip.close() audio_clip.close() final_clip.close() def process_article(article_base_path): assets_videos_dir = os.path.join(article_base_path, "assets", "videos") output_dir = os.path.join(article_base_path, "output") audio_dir = os.path.join(article_base_path, "assets", "audio") metadata_path = os.path.join(article_base_path, "metadata.json") if not os.path.exists(metadata_path): print(f"Skipping {article_base_path}: no metadata.json") return with open(metadata_path) as f: metadata = json.load(f) clean_title = metadata.get("clean_title") article_id = metadata.get("id") segments = get_video_segments(assets_videos_dir) if not segments: print(f"Skipping {article_base_path}: no video segments found") return merged_path = os.path.join(assets_videos_dir, f"{clean_title}_merged.mp4") final_path = os.path.join(assets_videos_dir, f"{clean_title}_final.mp4") narration_audio = os.path.join(audio_dir, f"{clean_title}_{article_id}.mp3") output_video = os.path.join(output_dir, f"{clean_title}_{article_id}.mp4") bg_audio = "bg_audio/prism.mp3" output_with_bg = os.path.join(output_dir, f"{clean_title}_{article_id}_with_bg.mp4") # Step 1: Merge segments if merged file doesn't exist if not os.path.exists(merged_path): print(f"Merging segments for {article_base_path}") merge_segments_with_fps(segments, merged_path) else: print(f"Merged video already exists for {article_base_path}") # Step 2: Adjust speed to match narration audio duration if not os.path.exists(final_path): print(f"Adjusting speed for {article_base_path}") audio = MP3(narration_audio) actual_duration = audio.info.length update_video_speed(merged_path, final_path, actual_duration) else: print(f"Speed-adjusted video already exists for {article_base_path}") # Step 3: Merge narration audio with video if not os.path.exists(output_video): print(f"Merging narration audio for {article_base_path}") merge_audio_video(final_path, narration_audio, output_video, audio_start=0, keep_original_audio=False, audio_volume=1.0) else: print(f"Narration-merged video already exists for {article_base_path}") # Step 4: Merge background audio if not os.path.exists(output_with_bg): print(f"Adding background audio for {article_base_path}") merge_audio_video(output_video, bg_audio, output_with_bg, audio_start=0, keep_original_audio=True, audio_volume=0.1) else: print(f"Background audio already exists for {article_base_path}") print(f"Recovery complete for {article_base_path}") if __name__ == "__main__": for article_base_path in find_article_folders(): process_article(article_base_path)