ui.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. import sys
  2. from pathlib import Path
  3. from time import sleep
  4. from typing import List, Set
  5. from warnings import filterwarnings, warn
  6. import matplotlib.pyplot as plt
  7. import numpy as np
  8. import sounddevice as sd
  9. import soundfile as sf
  10. import umap
  11. from PyQt5.QtCore import Qt, QStringListModel
  12. from PyQt5.QtWidgets import *
  13. from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
  14. from encoder.inference import plot_embedding_as_heatmap
  15. from toolbox.utterance import Utterance
  16. filterwarnings("ignore")
  17. colormap = np.array([
  18. [0, 127, 70],
  19. [255, 0, 0],
  20. [255, 217, 38],
  21. [0, 135, 255],
  22. [165, 0, 165],
  23. [255, 167, 255],
  24. [97, 142, 151],
  25. [0, 255, 255],
  26. [255, 96, 38],
  27. [142, 76, 0],
  28. [33, 0, 127],
  29. [0, 0, 0],
  30. [183, 183, 183],
  31. [76, 255, 0],
  32. ], dtype=np.float) / 255
  33. default_text = \
  34. "Welcome to the toolbox! To begin, load an utterance from your datasets or record one " \
  35. "yourself.\nOnce its embedding has been created, you can synthesize any text written here.\n" \
  36. "The synthesizer expects to generate " \
  37. "outputs that are somewhere between 5 and 12 seconds.\nTo mark breaks, write a new line. " \
  38. "Each line will be treated separately.\nThen, they are joined together to make the final " \
  39. "spectrogram. Use the vocoder to generate audio.\nThe vocoder generates almost in constant " \
  40. "time, so it will be more time efficient for longer inputs like this one.\nOn the left you " \
  41. "have the embedding projections. Load or record more utterances to see them.\nIf you have " \
  42. "at least 2 or 3 utterances from a same speaker, a cluster should form.\nSynthesized " \
  43. "utterances are of the same color as the speaker whose voice was used, but they're " \
  44. "represented with a cross."
  45. class UI(QDialog):
  46. min_umap_points = 4
  47. max_log_lines = 5
  48. max_saved_utterances = 20
  49. def draw_utterance(self, utterance: Utterance, which):
  50. self.draw_spec(utterance.spec, which)
  51. self.draw_embed(utterance.embed, utterance.name, which)
  52. def draw_embed(self, embed, name, which):
  53. embed_ax, _ = self.current_ax if which == "current" else self.gen_ax
  54. embed_ax.figure.suptitle("" if embed is None else name)
  55. ## Embedding
  56. # Clear the plot
  57. if len(embed_ax.images) > 0:
  58. embed_ax.images[0].colorbar.remove()
  59. embed_ax.clear()
  60. # Draw the embed
  61. if embed is not None:
  62. plot_embedding_as_heatmap(embed, embed_ax)
  63. embed_ax.set_title("embedding")
  64. embed_ax.set_aspect("equal", "datalim")
  65. embed_ax.set_xticks([])
  66. embed_ax.set_yticks([])
  67. embed_ax.figure.canvas.draw()
  68. def draw_spec(self, spec, which):
  69. _, spec_ax = self.current_ax if which == "current" else self.gen_ax
  70. ## Spectrogram
  71. # Draw the spectrogram
  72. spec_ax.clear()
  73. if spec is not None:
  74. spec_ax.imshow(spec, aspect="auto", interpolation="none")
  75. spec_ax.set_title("mel spectrogram")
  76. spec_ax.set_xticks([])
  77. spec_ax.set_yticks([])
  78. spec_ax.figure.canvas.draw()
  79. if which != "current":
  80. self.vocode_button.setDisabled(spec is None)
  81. def draw_umap_projections(self, utterances: Set[Utterance]):
  82. self.umap_ax.clear()
  83. speakers = np.unique([u.speaker_name for u in utterances])
  84. colors = {speaker_name: colormap[i] for i, speaker_name in enumerate(speakers)}
  85. embeds = [u.embed for u in utterances]
  86. # Display a message if there aren't enough points
  87. if len(utterances) < self.min_umap_points:
  88. self.umap_ax.text(.5, .5, "Add %d more points to\ngenerate the projections" %
  89. (self.min_umap_points - len(utterances)),
  90. horizontalalignment='center', fontsize=15)
  91. self.umap_ax.set_title("")
  92. # Compute the projections
  93. else:
  94. if not self.umap_hot:
  95. self.log(
  96. "Drawing UMAP projections for the first time, this will take a few seconds.")
  97. self.umap_hot = True
  98. reducer = umap.UMAP(int(np.ceil(np.sqrt(len(embeds)))), metric="cosine")
  99. projections = reducer.fit_transform(embeds)
  100. speakers_done = set()
  101. for projection, utterance in zip(projections, utterances):
  102. color = colors[utterance.speaker_name]
  103. mark = "x" if "_gen_" in utterance.name else "o"
  104. label = None if utterance.speaker_name in speakers_done else utterance.speaker_name
  105. speakers_done.add(utterance.speaker_name)
  106. self.umap_ax.scatter(projection[0], projection[1], c=[color], marker=mark,
  107. label=label)
  108. self.umap_ax.legend(prop={'size': 10})
  109. # Draw the plot
  110. self.umap_ax.set_aspect("equal", "datalim")
  111. self.umap_ax.set_xticks([])
  112. self.umap_ax.set_yticks([])
  113. self.umap_ax.figure.canvas.draw()
  114. def save_audio_file(self, wav, sample_rate):
  115. dialog = QFileDialog()
  116. dialog.setDefaultSuffix(".wav")
  117. fpath, _ = dialog.getSaveFileName(
  118. parent=self,
  119. caption="Select a path to save the audio file",
  120. filter="Audio Files (*.flac *.wav)"
  121. )
  122. if fpath:
  123. #Default format is wav
  124. if Path(fpath).suffix == "":
  125. fpath += ".wav"
  126. sf.write(fpath, wav, sample_rate)
  127. def setup_audio_devices(self, sample_rate):
  128. input_devices = []
  129. output_devices = []
  130. for device in sd.query_devices():
  131. # Check if valid input
  132. try:
  133. sd.check_input_settings(device=device["name"], samplerate=sample_rate)
  134. input_devices.append(device["name"])
  135. except:
  136. pass
  137. # Check if valid output
  138. try:
  139. sd.check_output_settings(device=device["name"], samplerate=sample_rate)
  140. output_devices.append(device["name"])
  141. except Exception as e:
  142. # Log a warning only if the device is not an input
  143. if not device["name"] in input_devices:
  144. warn("Unsupported output device %s for the sample rate: %d \nError: %s" % (device["name"], sample_rate, str(e)))
  145. if len(input_devices) == 0:
  146. self.log("No audio input device detected. Recording may not work.")
  147. self.audio_in_device = None
  148. else:
  149. self.audio_in_device = input_devices[0]
  150. if len(output_devices) == 0:
  151. self.log("No supported output audio devices were found! Audio output may not work.")
  152. self.audio_out_devices_cb.addItems(["None"])
  153. self.audio_out_devices_cb.setDisabled(True)
  154. else:
  155. self.audio_out_devices_cb.clear()
  156. self.audio_out_devices_cb.addItems(output_devices)
  157. self.audio_out_devices_cb.currentTextChanged.connect(self.set_audio_device)
  158. self.set_audio_device()
  159. def set_audio_device(self):
  160. output_device = self.audio_out_devices_cb.currentText()
  161. if output_device == "None":
  162. output_device = None
  163. # If None, sounddevice queries portaudio
  164. sd.default.device = (self.audio_in_device, output_device)
  165. def play(self, wav, sample_rate):
  166. try:
  167. sd.stop()
  168. sd.play(wav, sample_rate)
  169. except Exception as e:
  170. print(e)
  171. self.log("Error in audio playback. Try selecting a different audio output device.")
  172. self.log("Your device must be connected before you start the toolbox.")
  173. def stop(self):
  174. sd.stop()
  175. def record_one(self, sample_rate, duration):
  176. self.record_button.setText("Recording...")
  177. self.record_button.setDisabled(True)
  178. self.log("Recording %d seconds of audio" % duration)
  179. sd.stop()
  180. try:
  181. wav = sd.rec(duration * sample_rate, sample_rate, 1)
  182. except Exception as e:
  183. print(e)
  184. self.log("Could not record anything. Is your recording device enabled?")
  185. self.log("Your device must be connected before you start the toolbox.")
  186. return None
  187. for i in np.arange(0, duration, 0.1):
  188. self.set_loading(i, duration)
  189. sleep(0.1)
  190. self.set_loading(duration, duration)
  191. sd.wait()
  192. self.log("Done recording.")
  193. self.record_button.setText("Record")
  194. self.record_button.setDisabled(False)
  195. return wav.squeeze()
  196. @property
  197. def current_dataset_name(self):
  198. return self.dataset_box.currentText()
  199. @property
  200. def current_speaker_name(self):
  201. return self.speaker_box.currentText()
  202. @property
  203. def current_utterance_name(self):
  204. return self.utterance_box.currentText()
  205. def browse_file(self):
  206. fpath = QFileDialog().getOpenFileName(
  207. parent=self,
  208. caption="Select an audio file",
  209. filter="Audio Files (*.mp3 *.flac *.wav *.m4a)"
  210. )
  211. return Path(fpath[0]) if fpath[0] != "" else ""
  212. @staticmethod
  213. def repopulate_box(box, items, random=False):
  214. """
  215. Resets a box and adds a list of items. Pass a list of (item, data) pairs instead to join
  216. data to the items
  217. """
  218. box.blockSignals(True)
  219. box.clear()
  220. for item in items:
  221. item = list(item) if isinstance(item, tuple) else [item]
  222. box.addItem(str(item[0]), *item[1:])
  223. if len(items) > 0:
  224. box.setCurrentIndex(np.random.randint(len(items)) if random else 0)
  225. box.setDisabled(len(items) == 0)
  226. box.blockSignals(False)
  227. def populate_browser(self, datasets_root: Path, recognized_datasets: List, level: int,
  228. random=True):
  229. # Select a random dataset
  230. if level <= 0:
  231. if datasets_root is not None:
  232. datasets = [datasets_root.joinpath(d) for d in recognized_datasets]
  233. datasets = [d.relative_to(datasets_root) for d in datasets if d.exists()]
  234. self.browser_load_button.setDisabled(len(datasets) == 0)
  235. if datasets_root is None or len(datasets) == 0:
  236. msg = "Warning: you d" + ("id not pass a root directory for datasets as argument" \
  237. if datasets_root is None else "o not have any of the recognized datasets" \
  238. " in %s" % datasets_root)
  239. self.log(msg)
  240. msg += ".\nThe recognized datasets are:\n\t%s\nFeel free to add your own. You " \
  241. "can still use the toolbox by recording samples yourself." % \
  242. ("\n\t".join(recognized_datasets))
  243. print(msg, file=sys.stderr)
  244. self.random_utterance_button.setDisabled(True)
  245. self.random_speaker_button.setDisabled(True)
  246. self.random_dataset_button.setDisabled(True)
  247. self.utterance_box.setDisabled(True)
  248. self.speaker_box.setDisabled(True)
  249. self.dataset_box.setDisabled(True)
  250. self.browser_load_button.setDisabled(True)
  251. self.auto_next_checkbox.setDisabled(True)
  252. return
  253. self.repopulate_box(self.dataset_box, datasets, random)
  254. # Select a random speaker
  255. if level <= 1:
  256. speakers_root = datasets_root.joinpath(self.current_dataset_name)
  257. speaker_names = [d.stem for d in speakers_root.glob("*") if d.is_dir()]
  258. self.repopulate_box(self.speaker_box, speaker_names, random)
  259. # Select a random utterance
  260. if level <= 2:
  261. utterances_root = datasets_root.joinpath(
  262. self.current_dataset_name,
  263. self.current_speaker_name
  264. )
  265. utterances = []
  266. for extension in ['mp3', 'flac', 'wav', 'm4a']:
  267. utterances.extend(Path(utterances_root).glob("**/*.%s" % extension))
  268. utterances = [fpath.relative_to(utterances_root) for fpath in utterances]
  269. self.repopulate_box(self.utterance_box, utterances, random)
  270. def browser_select_next(self):
  271. index = (self.utterance_box.currentIndex() + 1) % len(self.utterance_box)
  272. self.utterance_box.setCurrentIndex(index)
  273. @property
  274. def current_encoder_fpath(self):
  275. return self.encoder_box.itemData(self.encoder_box.currentIndex())
  276. @property
  277. def current_synthesizer_fpath(self):
  278. return self.synthesizer_box.itemData(self.synthesizer_box.currentIndex())
  279. @property
  280. def current_vocoder_fpath(self):
  281. return self.vocoder_box.itemData(self.vocoder_box.currentIndex())
  282. def populate_models(self, models_dir: Path):
  283. # Encoder
  284. encoder_fpaths = list(models_dir.glob("*/encoder.pt"))
  285. if len(encoder_fpaths) == 0:
  286. raise Exception("No encoder models found in %s" % models_dir)
  287. self.repopulate_box(self.encoder_box, [(f.parent.name, f) for f in encoder_fpaths])
  288. # Synthesizer
  289. synthesizer_fpaths = list(models_dir.glob("*/synthesizer.pt"))
  290. if len(synthesizer_fpaths) == 0:
  291. raise Exception("No synthesizer models found in %s" % models_dir)
  292. self.repopulate_box(self.synthesizer_box, [(f.parent.name, f) for f in synthesizer_fpaths])
  293. # Vocoder
  294. vocoder_fpaths = list(models_dir.glob("*/vocoder.pt"))
  295. vocoder_items = [(f.parent.name, f) for f in vocoder_fpaths] + [("Griffin-Lim", None)]
  296. self.repopulate_box(self.vocoder_box, vocoder_items)
  297. @property
  298. def selected_utterance(self):
  299. return self.utterance_history.itemData(self.utterance_history.currentIndex())
  300. def register_utterance(self, utterance: Utterance):
  301. self.utterance_history.blockSignals(True)
  302. self.utterance_history.insertItem(0, utterance.name, utterance)
  303. self.utterance_history.setCurrentIndex(0)
  304. self.utterance_history.blockSignals(False)
  305. if len(self.utterance_history) > self.max_saved_utterances:
  306. self.utterance_history.removeItem(self.max_saved_utterances)
  307. self.play_button.setDisabled(False)
  308. self.generate_button.setDisabled(False)
  309. self.synthesize_button.setDisabled(False)
  310. def log(self, line, mode="newline"):
  311. if mode == "newline":
  312. self.logs.append(line)
  313. if len(self.logs) > self.max_log_lines:
  314. del self.logs[0]
  315. elif mode == "append":
  316. self.logs[-1] += line
  317. elif mode == "overwrite":
  318. self.logs[-1] = line
  319. log_text = '\n'.join(self.logs)
  320. self.log_window.setText(log_text)
  321. self.app.processEvents()
  322. def set_loading(self, value, maximum=1):
  323. self.loading_bar.setValue(value * 100)
  324. self.loading_bar.setMaximum(maximum * 100)
  325. self.loading_bar.setTextVisible(value != 0)
  326. self.app.processEvents()
  327. def populate_gen_options(self, seed, trim_silences):
  328. if seed is not None:
  329. self.random_seed_checkbox.setChecked(True)
  330. self.seed_textbox.setText(str(seed))
  331. self.seed_textbox.setEnabled(True)
  332. else:
  333. self.random_seed_checkbox.setChecked(False)
  334. self.seed_textbox.setText(str(0))
  335. self.seed_textbox.setEnabled(False)
  336. if not trim_silences:
  337. self.trim_silences_checkbox.setChecked(False)
  338. self.trim_silences_checkbox.setDisabled(True)
  339. def update_seed_textbox(self):
  340. if self.random_seed_checkbox.isChecked():
  341. self.seed_textbox.setEnabled(True)
  342. else:
  343. self.seed_textbox.setEnabled(False)
  344. def reset_interface(self):
  345. self.draw_embed(None, None, "current")
  346. self.draw_embed(None, None, "generated")
  347. self.draw_spec(None, "current")
  348. self.draw_spec(None, "generated")
  349. self.draw_umap_projections(set())
  350. self.set_loading(0)
  351. self.play_button.setDisabled(True)
  352. self.generate_button.setDisabled(True)
  353. self.synthesize_button.setDisabled(True)
  354. self.vocode_button.setDisabled(True)
  355. self.replay_wav_button.setDisabled(True)
  356. self.export_wav_button.setDisabled(True)
  357. [self.log("") for _ in range(self.max_log_lines)]
  358. def __init__(self):
  359. ## Initialize the application
  360. self.app = QApplication(sys.argv)
  361. super().__init__(None)
  362. self.setWindowTitle("SV2TTS toolbox")
  363. ## Main layouts
  364. # Root
  365. root_layout = QGridLayout()
  366. self.setLayout(root_layout)
  367. # Browser
  368. browser_layout = QGridLayout()
  369. root_layout.addLayout(browser_layout, 0, 0, 1, 2)
  370. # Generation
  371. gen_layout = QVBoxLayout()
  372. root_layout.addLayout(gen_layout, 0, 2, 1, 2)
  373. # Projections
  374. self.projections_layout = QVBoxLayout()
  375. root_layout.addLayout(self.projections_layout, 1, 0, 1, 1)
  376. # Visualizations
  377. vis_layout = QVBoxLayout()
  378. root_layout.addLayout(vis_layout, 1, 1, 1, 3)
  379. ## Projections
  380. # UMap
  381. fig, self.umap_ax = plt.subplots(figsize=(3, 3), facecolor="#F0F0F0")
  382. fig.subplots_adjust(left=0.02, bottom=0.02, right=0.98, top=0.98)
  383. self.projections_layout.addWidget(FigureCanvas(fig))
  384. self.umap_hot = False
  385. self.clear_button = QPushButton("Clear")
  386. self.projections_layout.addWidget(self.clear_button)
  387. ## Browser
  388. # Dataset, speaker and utterance selection
  389. i = 0
  390. self.dataset_box = QComboBox()
  391. browser_layout.addWidget(QLabel("<b>Dataset</b>"), i, 0)
  392. browser_layout.addWidget(self.dataset_box, i + 1, 0)
  393. self.speaker_box = QComboBox()
  394. browser_layout.addWidget(QLabel("<b>Speaker</b>"), i, 1)
  395. browser_layout.addWidget(self.speaker_box, i + 1, 1)
  396. self.utterance_box = QComboBox()
  397. browser_layout.addWidget(QLabel("<b>Utterance</b>"), i, 2)
  398. browser_layout.addWidget(self.utterance_box, i + 1, 2)
  399. self.browser_load_button = QPushButton("Load")
  400. browser_layout.addWidget(self.browser_load_button, i + 1, 3)
  401. i += 2
  402. # Random buttons
  403. self.random_dataset_button = QPushButton("Random")
  404. browser_layout.addWidget(self.random_dataset_button, i, 0)
  405. self.random_speaker_button = QPushButton("Random")
  406. browser_layout.addWidget(self.random_speaker_button, i, 1)
  407. self.random_utterance_button = QPushButton("Random")
  408. browser_layout.addWidget(self.random_utterance_button, i, 2)
  409. self.auto_next_checkbox = QCheckBox("Auto select next")
  410. self.auto_next_checkbox.setChecked(True)
  411. browser_layout.addWidget(self.auto_next_checkbox, i, 3)
  412. i += 1
  413. # Utterance box
  414. browser_layout.addWidget(QLabel("<b>Use embedding from:</b>"), i, 0)
  415. self.utterance_history = QComboBox()
  416. browser_layout.addWidget(self.utterance_history, i, 1, 1, 3)
  417. i += 1
  418. # Random & next utterance buttons
  419. self.browser_browse_button = QPushButton("Browse")
  420. browser_layout.addWidget(self.browser_browse_button, i, 0)
  421. self.record_button = QPushButton("Record")
  422. browser_layout.addWidget(self.record_button, i, 1)
  423. self.play_button = QPushButton("Play")
  424. browser_layout.addWidget(self.play_button, i, 2)
  425. self.stop_button = QPushButton("Stop")
  426. browser_layout.addWidget(self.stop_button, i, 3)
  427. i += 1
  428. # Model and audio output selection
  429. self.encoder_box = QComboBox()
  430. browser_layout.addWidget(QLabel("<b>Encoder</b>"), i, 0)
  431. browser_layout.addWidget(self.encoder_box, i + 1, 0)
  432. self.synthesizer_box = QComboBox()
  433. browser_layout.addWidget(QLabel("<b>Synthesizer</b>"), i, 1)
  434. browser_layout.addWidget(self.synthesizer_box, i + 1, 1)
  435. self.vocoder_box = QComboBox()
  436. browser_layout.addWidget(QLabel("<b>Vocoder</b>"), i, 2)
  437. browser_layout.addWidget(self.vocoder_box, i + 1, 2)
  438. self.audio_out_devices_cb=QComboBox()
  439. browser_layout.addWidget(QLabel("<b>Audio Output</b>"), i, 3)
  440. browser_layout.addWidget(self.audio_out_devices_cb, i + 1, 3)
  441. i += 2
  442. #Replay & Save Audio
  443. browser_layout.addWidget(QLabel("<b>Toolbox Output:</b>"), i, 0)
  444. self.waves_cb = QComboBox()
  445. self.waves_cb_model = QStringListModel()
  446. self.waves_cb.setModel(self.waves_cb_model)
  447. self.waves_cb.setToolTip("Select one of the last generated waves in this section for replaying or exporting")
  448. browser_layout.addWidget(self.waves_cb, i, 1)
  449. self.replay_wav_button = QPushButton("Replay")
  450. self.replay_wav_button.setToolTip("Replay last generated vocoder")
  451. browser_layout.addWidget(self.replay_wav_button, i, 2)
  452. self.export_wav_button = QPushButton("Export")
  453. self.export_wav_button.setToolTip("Save last generated vocoder audio in filesystem as a wav file")
  454. browser_layout.addWidget(self.export_wav_button, i, 3)
  455. i += 1
  456. ## Embed & spectrograms
  457. vis_layout.addStretch()
  458. gridspec_kw = {"width_ratios": [1, 4]}
  459. fig, self.current_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
  460. gridspec_kw=gridspec_kw)
  461. fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
  462. vis_layout.addWidget(FigureCanvas(fig))
  463. fig, self.gen_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
  464. gridspec_kw=gridspec_kw)
  465. fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
  466. vis_layout.addWidget(FigureCanvas(fig))
  467. for ax in self.current_ax.tolist() + self.gen_ax.tolist():
  468. ax.set_facecolor("#F0F0F0")
  469. for side in ["top", "right", "bottom", "left"]:
  470. ax.spines[side].set_visible(False)
  471. ## Generation
  472. self.text_prompt = QPlainTextEdit(default_text)
  473. gen_layout.addWidget(self.text_prompt, stretch=1)
  474. self.generate_button = QPushButton("Synthesize and vocode")
  475. gen_layout.addWidget(self.generate_button)
  476. layout = QHBoxLayout()
  477. self.synthesize_button = QPushButton("Synthesize only")
  478. layout.addWidget(self.synthesize_button)
  479. self.vocode_button = QPushButton("Vocode only")
  480. layout.addWidget(self.vocode_button)
  481. gen_layout.addLayout(layout)
  482. layout_seed = QGridLayout()
  483. self.random_seed_checkbox = QCheckBox("Random seed:")
  484. self.random_seed_checkbox.setToolTip("When checked, makes the synthesizer and vocoder deterministic.")
  485. layout_seed.addWidget(self.random_seed_checkbox, 0, 0)
  486. self.seed_textbox = QLineEdit()
  487. self.seed_textbox.setMaximumWidth(80)
  488. layout_seed.addWidget(self.seed_textbox, 0, 1)
  489. self.trim_silences_checkbox = QCheckBox("Enhance vocoder output")
  490. self.trim_silences_checkbox.setToolTip("When checked, trims excess silence in vocoder output."
  491. " This feature requires `webrtcvad` to be installed.")
  492. layout_seed.addWidget(self.trim_silences_checkbox, 0, 2, 1, 2)
  493. gen_layout.addLayout(layout_seed)
  494. self.loading_bar = QProgressBar()
  495. gen_layout.addWidget(self.loading_bar)
  496. self.log_window = QLabel()
  497. self.log_window.setAlignment(Qt.AlignBottom | Qt.AlignLeft)
  498. gen_layout.addWidget(self.log_window)
  499. self.logs = []
  500. gen_layout.addStretch()
  501. ## Set the size of the window and of the elements
  502. max_size = QDesktopWidget().availableGeometry(self).size() * 0.8
  503. self.resize(max_size)
  504. ## Finalize the display
  505. self.reset_interface()
  506. self.show()
  507. def start(self):
  508. self.app.exec_()