|
@@ -0,0 +1,181 @@
|
|
|
+import gradio as gr
|
|
|
+import subprocess
|
|
|
+from moviepy.editor import VideoFileClip
|
|
|
+import datetime
|
|
|
+
|
|
|
+def convert_to_mp4_with_aac(input_path, output_path):
|
|
|
+ # Load the video
|
|
|
+ video = VideoFileClip(input_path)
|
|
|
+
|
|
|
+ # Set the output format to mp4 with AAC codec
|
|
|
+ video.write_videofile(output_path, codec="libx264", audio_codec="aac")
|
|
|
+
|
|
|
+ return output_path
|
|
|
+
|
|
|
+
|
|
|
+# Function to check if the audio file path exists in the list
|
|
|
+def check_file_exists(file_path, audio_list):
|
|
|
+ return file_path in audio_list
|
|
|
+
|
|
|
+def load_audio(audio_listed):
|
|
|
+ if audio_listed is None:
|
|
|
+ return None
|
|
|
+ else:
|
|
|
+ return f"data/audio/{audio_listed}"
|
|
|
+
|
|
|
+def execute_command(command: str) -> None:
|
|
|
+ subprocess.run(command, check=True)
|
|
|
+
|
|
|
+def infer(audio_input, image_path, emotional_style):
|
|
|
+ # Get the current timestamp
|
|
|
+ timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
|
|
+
|
|
|
+ output_name = f"lipsynced_result_{timestamp}"
|
|
|
+
|
|
|
+ command = [
|
|
|
+ f"python",
|
|
|
+ f"inference_for_demo_video.py",
|
|
|
+ f"--wav_path={audio_input}",
|
|
|
+ f"--style_clip_path=data/style_clip/3DMM/{emotional_style}",
|
|
|
+ f"--pose_path=data/pose/RichardShelby_front_neutral_level1_001.mat",
|
|
|
+ f"--image_path={image_path}",
|
|
|
+ f"--cfg_scale=1.0",
|
|
|
+ f"--max_gen_len=30",
|
|
|
+ f"--output_name={output_name}"
|
|
|
+ ]
|
|
|
+
|
|
|
+ execute_command(command)
|
|
|
+
|
|
|
+ # Convert video to compatible codecs
|
|
|
+ input_file = f"output_video/{output_name}.mp4"
|
|
|
+ output_file = f"{output_name}.mp4"
|
|
|
+
|
|
|
+ result = convert_to_mp4_with_aac(input_file, output_file)
|
|
|
+
|
|
|
+ return result
|
|
|
+
|
|
|
+css="""
|
|
|
+#col-container{
|
|
|
+ margin: 0 auto;
|
|
|
+ max-width: 940px;
|
|
|
+}
|
|
|
+#project-links{
|
|
|
+ margin: 0 0 12px !important;
|
|
|
+ column-gap: 8px;
|
|
|
+ display: flex;
|
|
|
+ justify-content: center;
|
|
|
+ flex-wrap: nowrap;
|
|
|
+ flex-direction: row;
|
|
|
+ align-items: center;
|
|
|
+}
|
|
|
+#run-btn{
|
|
|
+ border: var(--button-border-width) solid var(--button-primary-border-color);
|
|
|
+ background: var(--button-primary-background-fill);
|
|
|
+ color: var(--button-primary-text-color);
|
|
|
+}
|
|
|
+#run-btn:hover{
|
|
|
+ border-color: var(--button-primary-border-color-hover);
|
|
|
+ background: var(--button-primary-background-fill-hover);
|
|
|
+ color: var(--button-primary-text-color-hover);
|
|
|
+}
|
|
|
+"""
|
|
|
+with gr.Blocks(css=css) as demo:
|
|
|
+ with gr.Column(elem_id="col-container"):
|
|
|
+ gr.HTML("""
|
|
|
+ <h2 style="text-align: center;">DreamTalk</h2>
|
|
|
+ <p style="text-align: center;">When Expressive Talking Head Generation Meets Diffusion Probabilistic Models</p>
|
|
|
+ <p style="margin:12px auto;display: flex;justify-content: center;">
|
|
|
+ <a href="https://huggingface.co/spaces/fffiloni/dreamtalk?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space"></a>
|
|
|
+ </p>
|
|
|
+
|
|
|
+ """)
|
|
|
+ with gr.Row():
|
|
|
+ with gr.Column():
|
|
|
+ image_path = gr.Image(label="Image", type="filepath", sources=["upload"])
|
|
|
+ audio_input = gr.Audio(label="Audio input", type="filepath", sources=["upload"], value="data/audio/acknowledgement_english.m4a")
|
|
|
+ with gr.Row():
|
|
|
+ audio_list = gr.Dropdown(
|
|
|
+ label="Choose an audio (optional)",
|
|
|
+ choices=[
|
|
|
+ "German1.wav", "German2.wav", "German3.wav", "German4.wav",
|
|
|
+ "acknowledgement_chinese.m4a", "acknowledgement_english.m4a",
|
|
|
+ "chinese1_haierlizhi.wav", "chinese2_guanyu.wav",
|
|
|
+ "french1.wav", "french2.wav", "french3.wav",
|
|
|
+ "italian1.wav", "italian2.wav", "italian3.wav",
|
|
|
+ "japan1.wav", "japan2.wav", "japan3.wav",
|
|
|
+ "korean1.wav", "korean2.wav", "korean3.wav",
|
|
|
+ "noisy_audio_cafeter_snr_0.wav", "noisy_audio_meeting_snr_0.wav", "noisy_audio_meeting_snr_10.wav", "noisy_audio_meeting_snr_20.wav", "noisy_audio_narrative.wav", "noisy_audio_office_snr_0.wav", "out_of_domain_narrative.wav",
|
|
|
+ "spanish1.wav", "spanish2.wav", "spanish3.wav"
|
|
|
+ ],
|
|
|
+ value = "acknowledgement_english.m4a"
|
|
|
+ )
|
|
|
+ audio_list.change(
|
|
|
+ fn = load_audio,
|
|
|
+ inputs = [audio_list],
|
|
|
+ outputs = [audio_input]
|
|
|
+ )
|
|
|
+ emotional_style = gr.Dropdown(
|
|
|
+ label = "emotional style",
|
|
|
+ choices = [
|
|
|
+ "M030_front_angry_level3_001.mat",
|
|
|
+ "M030_front_contempt_level3_001.mat",
|
|
|
+ "M030_front_disgusted_level3_001.mat",
|
|
|
+ "M030_front_fear_level3_001.mat",
|
|
|
+ "M030_front_happy_level3_001.mat",
|
|
|
+ "M030_front_neutral_level1_001.mat",
|
|
|
+ "M030_front_sad_level3_001.mat",
|
|
|
+ "M030_front_surprised_level3_001.mat",
|
|
|
+ "W009_front_angry_level3_001.mat",
|
|
|
+ "W009_front_contempt_level3_001.mat",
|
|
|
+ "W009_front_disgusted_level3_001.mat",
|
|
|
+ "W009_front_fear_level3_001.mat",
|
|
|
+ "W009_front_happy_level3_001.mat",
|
|
|
+ "W009_front_neutral_level1_001.mat",
|
|
|
+ "W009_front_sad_level3_001.mat",
|
|
|
+ "W009_front_surprised_level3_001.mat",
|
|
|
+ "W011_front_angry_level3_001.mat",
|
|
|
+ "W011_front_contempt_level3_001.mat",
|
|
|
+ "W011_front_disgusted_level3_001.mat",
|
|
|
+ "W011_front_fear_level3_001.mat",
|
|
|
+ "W011_front_happy_level3_001.mat",
|
|
|
+ "W011_front_neutral_level1_001.mat",
|
|
|
+ "W011_front_sad_level3_001.mat",
|
|
|
+ "W011_front_surprised_level3_001.mat"
|
|
|
+ ],
|
|
|
+ value = "M030_front_neutral_level1_001.mat"
|
|
|
+ )
|
|
|
+ gr.Examples(
|
|
|
+ examples = [
|
|
|
+ "data/src_img/uncropped/face3.png",
|
|
|
+ "data/src_img/uncropped/male_face.png",
|
|
|
+ "data/src_img/uncropped/uncut_src_img.jpg",
|
|
|
+ "data/src_img/cropped/chpa5.png",
|
|
|
+ "data/src_img/cropped/cut_img.png",
|
|
|
+ "data/src_img/cropped/f30.png",
|
|
|
+ "data/src_img/cropped/menglu2.png",
|
|
|
+ "data/src_img/cropped/nscu2.png",
|
|
|
+ "data/src_img/cropped/zp1.png",
|
|
|
+ "data/src_img/cropped/zt12.png"
|
|
|
+ ],
|
|
|
+ inputs=[image_path],
|
|
|
+ examples_per_page=5
|
|
|
+ )
|
|
|
+ with gr.Row():
|
|
|
+ gr.ClearButton([audio_input, image_path, audio_list])
|
|
|
+ run_btn = gr.Button("Run", elem_id="run-btn")
|
|
|
+ with gr.Column():
|
|
|
+ output_video = gr.Video(format="mp4")
|
|
|
+ gr.HTML("""
|
|
|
+ <p id="project-links" align="center">
|
|
|
+ <a href='https://dreamtalk-project.github.io/'><img src='https://img.shields.io/badge/Project-Page-Green'></a> <a href='https://arxiv.org/abs/2312.09767'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a> <a href='https://youtu.be/VF4vlE6ZqWQ'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
|
|
|
+ </p>
|
|
|
+ <img src="https://github.com/ali-vilab/dreamtalk/raw/main/media/teaser.gif" style="margin: 0 auto;border-radius: 10px;" />
|
|
|
+ """)
|
|
|
+
|
|
|
+ run_btn.click(
|
|
|
+ fn = infer,
|
|
|
+ inputs = [audio_input, image_path, emotional_style],
|
|
|
+ outputs = [output_video]
|
|
|
+ )
|
|
|
+
|
|
|
+demo.queue(max_size=20).launch(server_name="0.0.0.0", debug=True)
|