12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- import torch
- import torchaudio
- import gradio as gr
- import tempfile
- from tts_webui.bark.npz_tools import load_npz
- from tts_webui.vocos.get_vocos_model import get_vocos_model
- def reconstruct_with_vocos(audio_tokens):
- device = "cpu"
- vocos = get_vocos_model(model_name="charactr/vocos-encodec-24khz")
- audio_tokens_torch = torch.from_numpy(audio_tokens).to(device)
- features = vocos.codes_to_features(audio_tokens_torch)
- bitrate_id_for_6kbps = 2
- return vocos.decode(
- features, bandwidth_id=torch.tensor([bitrate_id_for_6kbps], device=device)
- )
- def vocos_predict(npz_file: tempfile._TemporaryFileWrapper):
- if npz_file is None:
- print("No file selected")
- return None
- full_generation = load_npz(npz_file.name)
- audio_tokens = full_generation["fine_prompt"]
- vocos_output = reconstruct_with_vocos(audio_tokens)
- vocos_output = upsample_to_44100(vocos_output)
- return (44100, vocos_output.cpu().squeeze().numpy())
- def upsample_to_44100(audio):
- return torchaudio.functional.resample(audio, orig_freq=24000, new_freq=44100)
- def get_audio(npz_file: tempfile._TemporaryFileWrapper):
- from tts_webui.bark.get_audio_from_npz import get_audio_from_full_generation
- if npz_file is None:
- print("No file selected")
- return [None, None]
- full_generation = load_npz(npz_file.name)
- return [
- get_audio_from_full_generation(full_generation), # type: ignore
- None,
- ]
- def vocos_bark_ui():
- npz_file = gr.File(label="Input NPZ", file_types=[".npz"], interactive=True)
- submit = gr.Button(value="Decode")
- current = gr.Audio(label="decoded with Encodec:")
- output = gr.Audio(label="decoded with Vocos:")
- npz_file.change(
- fn=get_audio,
- inputs=[npz_file],
- outputs=[current, output],
- api_name="encodec_decode",
- )
- submit.click(
- fn=vocos_predict,
- inputs=[npz_file],
- outputs=output,
- api_name="vocos_npz",
- )
- def vocos_tab_bark():
- with gr.Tab("Vocos (Bark NPZ)"):
- vocos_bark_ui()
- if __name__ == "__main__":
- with gr.Blocks() as demo:
- vocos_tab_bark()
- demo.launch(server_port=7863)
|