audio.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. import math
  2. import numpy as np
  3. import librosa
  4. import vocoder.hparams as hp
  5. from scipy.signal import lfilter
  6. import soundfile as sf
  7. def label_2_float(x, bits) :
  8. return 2 * x / (2**bits - 1.) - 1.
  9. def float_2_label(x, bits) :
  10. assert abs(x).max() <= 1.0
  11. x = (x + 1.) * (2**bits - 1) / 2
  12. return x.clip(0, 2**bits - 1)
  13. def load_wav(path) :
  14. return librosa.load(str(path), sr=hp.sample_rate)[0]
  15. def save_wav(x, path) :
  16. sf.write(path, x.astype(np.float32), hp.sample_rate)
  17. def split_signal(x) :
  18. unsigned = x + 2**15
  19. coarse = unsigned // 256
  20. fine = unsigned % 256
  21. return coarse, fine
  22. def combine_signal(coarse, fine) :
  23. return coarse * 256 + fine - 2**15
  24. def encode_16bits(x) :
  25. return np.clip(x * 2**15, -2**15, 2**15 - 1).astype(np.int16)
  26. mel_basis = None
  27. def linear_to_mel(spectrogram):
  28. global mel_basis
  29. if mel_basis is None:
  30. mel_basis = build_mel_basis()
  31. return np.dot(mel_basis, spectrogram)
  32. def build_mel_basis():
  33. return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, fmin=hp.fmin)
  34. def normalize(S):
  35. return np.clip((S - hp.min_level_db) / -hp.min_level_db, 0, 1)
  36. def denormalize(S):
  37. return (np.clip(S, 0, 1) * -hp.min_level_db) + hp.min_level_db
  38. def amp_to_db(x):
  39. return 20 * np.log10(np.maximum(1e-5, x))
  40. def db_to_amp(x):
  41. return np.power(10.0, x * 0.05)
  42. def spectrogram(y):
  43. D = stft(y)
  44. S = amp_to_db(np.abs(D)) - hp.ref_level_db
  45. return normalize(S)
  46. def melspectrogram(y):
  47. D = stft(y)
  48. S = amp_to_db(linear_to_mel(np.abs(D)))
  49. return normalize(S)
  50. def stft(y):
  51. return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=hp.hop_length, win_length=hp.win_length)
  52. def pre_emphasis(x):
  53. return lfilter([1, -hp.preemphasis], [1], x)
  54. def de_emphasis(x):
  55. return lfilter([1], [1, -hp.preemphasis], x)
  56. def encode_mu_law(x, mu) :
  57. mu = mu - 1
  58. fx = np.sign(x) * np.log(1 + mu * np.abs(x)) / np.log(1 + mu)
  59. return np.floor((fx + 1) / 2 * mu + 0.5)
  60. def decode_mu_law(y, mu, from_labels=True) :
  61. if from_labels:
  62. y = label_2_float(y, math.log2(mu))
  63. mu = mu - 1
  64. x = np.sign(y) / mu * ((1 + mu) ** np.abs(y) - 1)
  65. return x