123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- import glob
- from typing import Any
- import numpy as np
- from tts_webui.bark.FullGeneration import FullGeneration
- import json
- import torch
- def compress_history(full_generation: FullGeneration):
- return {
- "semantic_prompt": full_generation["semantic_prompt"].astype(np.int16),
- "coarse_prompt": full_generation["coarse_prompt"].astype(np.int16),
- "fine_prompt": full_generation["fine_prompt"].astype(np.int16),
- }
- def pack_metadata(metadata: dict[str, Any]):
- # return list(json.dumps(metadata))
- def default(o):
- if isinstance(o, np.ndarray):
- return o.tolist()
- return o.__dict__
- return np.array(json.dumps(metadata, default=default))
- def save_npz(filename: str, full_generation: FullGeneration, metadata: dict[str, Any]):
- np.savez(
- filename,
- **{
- **compress_history(full_generation),
- "metadata": pack_metadata(metadata),
- },
- )
- def save_npz_musicgen(filename: str, tokens: torch.Tensor, metadata: dict[str, Any]):
- np.savez(
- filename,
- **{
- "tokens": tokens.cpu().numpy(),
- "metadata": pack_metadata(metadata),
- },
- )
- def load_npz(filename):
- def unpack_metadata(metadata: np.ndarray):
- def join_list(x: list | np.ndarray):
- if isinstance(x, np.ndarray):
- x = x.tolist()
- return "".join(x)
- return json.loads(join_list(metadata))
- with np.load(filename, allow_pickle=True) as data:
- result = {key: data[key] for key in data}
- if "metadata" in result:
- result["metadata"] = unpack_metadata(result["metadata"])
- return result
- def get_npz_files():
- return (
- glob.glob("voices/*.npz")
- + glob.glob("favorites/*/*.npz")
- + glob.glob("outputs/*/*.npz")
- )
- if __name__ == "__main__":
- in_npz = load_npz("./temp/ogg-vs-npz/audio__bark__None__2023-05-29_10-12-46.npz")
- metadata_in = {
- "_version": "0.0.1",
- "_hash_version": "0.0.2",
- "_type": "bark",
- "is_big_semantic_model": True,
- "is_big_coarse_model": False,
- "is_big_fine_model": False,
- "prompt": "test",
- "language": None,
- "speaker_id": None,
- "hash": "98b14851692f09df5e89c68f0a8e2013",
- "history_prompt": "continued_generation",
- "history_prompt_npz": None,
- "history_hash": "98b14851692f09df5e89c68f0a8e2013",
- "text_temp": 0.7,
- "waveform_temp": 0.7,
- "date": "2023-06-07_16-56-09",
- "seed": "2039063546",
- }
- save_npz(
- "./npz_reencode_test_new_list.npz",
- {
- "semantic_prompt": in_npz["semantic_prompt"],
- "coarse_prompt": in_npz["coarse_prompt"],
- "fine_prompt": in_npz["fine_prompt"],
- },
- metadata_in,
- )
- out_npz = load_npz("./npz_reencode_test_new_list.npz")
- assert out_npz["metadata"] == metadata_in
|