12345678910111213141516171819202122232425262728293031323334353637383940 |
- from encoder.data_objects.random_cycler import RandomCycler
- from encoder.data_objects.utterance import Utterance
- from pathlib import Path
- # Contains the set of utterances of a single speaker
- class Speaker:
- def __init__(self, root: Path):
- self.root = root
- self.name = root.name
- self.utterances = None
- self.utterance_cycler = None
-
- def _load_utterances(self):
- with self.root.joinpath("_sources.txt").open("r") as sources_file:
- sources = [l.split(",") for l in sources_file]
- sources = {frames_fname: wave_fpath for frames_fname, wave_fpath in sources}
- self.utterances = [Utterance(self.root.joinpath(f), w) for f, w in sources.items()]
- self.utterance_cycler = RandomCycler(self.utterances)
-
- def random_partial(self, count, n_frames):
- """
- Samples a batch of <count> unique partial utterances from the disk in a way that all
- utterances come up at least once every two cycles and in a random order every time.
-
- :param count: The number of partial utterances to sample from the set of utterances from
- that speaker. Utterances are guaranteed not to be repeated if <count> is not larger than
- the number of utterances available.
- :param n_frames: The number of frames in the partial utterance.
- :return: A list of tuples (utterance, frames, range) where utterance is an Utterance,
- frames are the frames of the partial utterances and range is the range of the partial
- utterance with regard to the complete utterance.
- """
- if self.utterances is None:
- self._load_utterances()
- utterances = self.utterance_cycler.sample(count)
- a = [(u,) + u.random_partial(n_frames) for u in utterances]
- return a
|