1234567891011121314151617181920212223242526272829 |
- ## Mel-filterbank
- mel_window_length = 25 # In milliseconds
- mel_window_step = 10 # In milliseconds
- mel_n_channels = 40
- ## Audio
- sampling_rate = 16000
- # Number of spectrogram frames in a partial utterance
- partials_n_frames = 160 # 1600 ms
- # Number of spectrogram frames at inference
- inference_n_frames = 80 # 800 ms
- ## Voice Activation Detection
- # Window size of the VAD. Must be either 10, 20 or 30 milliseconds.
- # This sets the granularity of the VAD. Should not need to be changed.
- vad_window_length = 30 # In milliseconds
- # Number of frames to average together when performing the moving average smoothing.
- # The larger this value, the larger the VAD variations must be to not get smoothed out.
- vad_moving_average_width = 8
- # Maximum number of consecutive silent frames a segment can have.
- vad_max_silence_length = 6
- ## Audio volume normalization
- audio_norm_target_dBFS = -30
|