utterance.py 855 B

1234567891011121314151617181920212223242526
  1. import numpy as np
  2. class Utterance:
  3. def __init__(self, frames_fpath, wave_fpath):
  4. self.frames_fpath = frames_fpath
  5. self.wave_fpath = wave_fpath
  6. def get_frames(self):
  7. return np.load(self.frames_fpath)
  8. def random_partial(self, n_frames):
  9. """
  10. Crops the frames into a partial utterance of n_frames
  11. :param n_frames: The number of frames of the partial utterance
  12. :return: the partial utterance frames and a tuple indicating the start and end of the
  13. partial utterance in the complete utterance.
  14. """
  15. frames = self.get_frames()
  16. if frames.shape[0] == n_frames:
  17. start = 0
  18. else:
  19. start = np.random.randint(0, frames.shape[0] - n_frames)
  20. end = start + n_frames
  21. return frames[start:end], (start, end)