inference_gui.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. import sys
  2. from PyQt5.QtCore import QEvent
  3. from PyQt5.QtWidgets import QApplication, QMainWindow, QLabel, QLineEdit, QPushButton, QTextEdit
  4. from PyQt5.QtWidgets import QGridLayout, QVBoxLayout, QWidget, QFileDialog, QStatusBar, QComboBox
  5. import soundfile as sf
  6. from tools.i18n.i18n import I18nAuto
  7. i18n = I18nAuto()
  8. from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav
  9. class GPTSoVITSGUI(QMainWindow):
  10. def __init__(self):
  11. super().__init__()
  12. self.init_ui()
  13. def init_ui(self):
  14. self.setWindowTitle('GPT-SoVITS GUI')
  15. self.setGeometry(800, 450, 950, 850)
  16. self.setStyleSheet("""
  17. QWidget {
  18. background-color: #a3d3b1;
  19. }
  20. QTabWidget::pane {
  21. background-color: #a3d3b1;
  22. }
  23. QTabWidget::tab-bar {
  24. alignment: left;
  25. }
  26. QTabBar::tab {
  27. background: #8da4bf;
  28. color: #ffffff;
  29. padding: 8px;
  30. }
  31. QTabBar::tab:selected {
  32. background: #2a3f54;
  33. }
  34. QLabel {
  35. color: #000000;
  36. }
  37. QPushButton {
  38. background-color: #4CAF50;
  39. color: white;
  40. padding: 8px;
  41. border: 1px solid #4CAF50;
  42. border-radius: 4px;
  43. }
  44. QPushButton:hover {
  45. background-color: #45a049;
  46. border: 1px solid #45a049;
  47. box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.1);
  48. }
  49. """)
  50. license_text = (
  51. "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. "
  52. "如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
  53. license_label = QLabel(license_text)
  54. license_label.setWordWrap(True)
  55. self.GPT_model_label = QLabel("选择GPT模型:")
  56. self.GPT_model_input = QLineEdit()
  57. self.GPT_model_input.setPlaceholderText("拖拽或选择文件")
  58. self.GPT_model_input.setReadOnly(True)
  59. self.GPT_model_button = QPushButton("选择GPT模型文件")
  60. self.GPT_model_button.clicked.connect(self.select_GPT_model)
  61. self.SoVITS_model_label = QLabel("选择SoVITS模型:")
  62. self.SoVITS_model_input = QLineEdit()
  63. self.SoVITS_model_input.setPlaceholderText("拖拽或选择文件")
  64. self.SoVITS_model_input.setReadOnly(True)
  65. self.SoVITS_model_button = QPushButton("选择SoVITS模型文件")
  66. self.SoVITS_model_button.clicked.connect(self.select_SoVITS_model)
  67. self.ref_audio_label = QLabel("上传参考音频:")
  68. self.ref_audio_input = QLineEdit()
  69. self.ref_audio_input.setPlaceholderText("拖拽或选择文件")
  70. self.ref_audio_input.setReadOnly(True)
  71. self.ref_audio_button = QPushButton("选择音频文件")
  72. self.ref_audio_button.clicked.connect(self.select_ref_audio)
  73. self.ref_text_label = QLabel("参考音频文本:")
  74. self.ref_text_input = QLineEdit()
  75. self.ref_text_input.setPlaceholderText("拖拽或选择文件")
  76. self.ref_text_input.setReadOnly(True)
  77. self.ref_text_button = QPushButton("上传文本")
  78. self.ref_text_button.clicked.connect(self.upload_ref_text)
  79. self.language_label = QLabel("参考音频语言:")
  80. self.language_combobox = QComboBox()
  81. self.language_combobox.addItems(["中文", "英文", "日文"])
  82. self.target_text_label = QLabel("合成目标文本:")
  83. self.target_text_input = QLineEdit()
  84. self.target_text_input.setPlaceholderText("拖拽或选择文件")
  85. self.target_text_input.setReadOnly(True)
  86. self.target_text_button = QPushButton("上传文本")
  87. self.target_text_button.clicked.connect(self.upload_target_text)
  88. self.language_label_02 = QLabel("合成音频语言:")
  89. self.language_combobox_02 = QComboBox()
  90. self.language_combobox_02.addItems(["中文", "英文", "日文"])
  91. self.output_label = QLabel("输出音频路径:")
  92. self.output_input = QLineEdit()
  93. self.output_input.setPlaceholderText("拖拽或选择文件")
  94. self.output_input.setReadOnly(True)
  95. self.output_button = QPushButton("选择文件夹")
  96. self.output_button.clicked.connect(self.select_output_path)
  97. self.output_text = QTextEdit()
  98. self.output_text.setReadOnly(True)
  99. self.add_drag_drop_events([
  100. self.GPT_model_input,
  101. self.SoVITS_model_input,
  102. self.ref_audio_input,
  103. self.ref_text_input,
  104. self.target_text_input,
  105. self.output_input,
  106. ])
  107. self.synthesize_button = QPushButton("合成")
  108. self.synthesize_button.clicked.connect(self.synthesize)
  109. self.clear_output_button = QPushButton("清空输出")
  110. self.clear_output_button.clicked.connect(self.clear_output)
  111. self.status_bar = QStatusBar()
  112. main_layout = QVBoxLayout()
  113. input_layout = QGridLayout()
  114. input_layout.setSpacing(10)
  115. self.setLayout(input_layout)
  116. input_layout.addWidget(license_label, 0, 0, 1, 3)
  117. input_layout.addWidget(self.GPT_model_label, 1, 0)
  118. input_layout.addWidget(self.GPT_model_input, 2, 0, 1, 2)
  119. input_layout.addWidget(self.GPT_model_button, 2, 2)
  120. input_layout.addWidget(self.SoVITS_model_label, 3, 0)
  121. input_layout.addWidget(self.SoVITS_model_input, 4, 0, 1, 2)
  122. input_layout.addWidget(self.SoVITS_model_button, 4, 2)
  123. input_layout.addWidget(self.ref_audio_label, 5, 0)
  124. input_layout.addWidget(self.ref_audio_input, 6, 0, 1, 2)
  125. input_layout.addWidget(self.ref_audio_button, 6, 2)
  126. input_layout.addWidget(self.language_label, 7, 0)
  127. input_layout.addWidget(self.language_combobox, 8, 0, 1, 1)
  128. input_layout.addWidget(self.ref_text_label, 9, 0)
  129. input_layout.addWidget(self.ref_text_input, 10, 0, 1, 2)
  130. input_layout.addWidget(self.ref_text_button, 10, 2)
  131. input_layout.addWidget(self.language_label_02, 11, 0)
  132. input_layout.addWidget(self.language_combobox_02, 12, 0, 1, 1)
  133. input_layout.addWidget(self.target_text_label, 13, 0)
  134. input_layout.addWidget(self.target_text_input, 14, 0, 1, 2)
  135. input_layout.addWidget(self.target_text_button, 14, 2)
  136. input_layout.addWidget(self.output_label, 15, 0)
  137. input_layout.addWidget(self.output_input, 16, 0, 1, 2)
  138. input_layout.addWidget(self.output_button, 16, 2)
  139. main_layout.addLayout(input_layout)
  140. output_layout = QVBoxLayout()
  141. output_layout.addWidget(self.output_text)
  142. main_layout.addLayout(output_layout)
  143. main_layout.addWidget(self.synthesize_button)
  144. main_layout.addWidget(self.clear_output_button)
  145. main_layout.addWidget(self.status_bar)
  146. self.central_widget = QWidget()
  147. self.central_widget.setLayout(main_layout)
  148. self.setCentralWidget(self.central_widget)
  149. def dragEnterEvent(self, event):
  150. if event.mimeData().hasUrls():
  151. event.acceptProposedAction()
  152. def dropEvent(self, event):
  153. if event.mimeData().hasUrls():
  154. file_paths = [url.toLocalFile() for url in event.mimeData().urls()]
  155. if len(file_paths) == 1:
  156. self.update_ref_audio(file_paths[0])
  157. self.update_input_paths(self.ref_audio_input, file_paths[0])
  158. else:
  159. self.update_ref_audio(", ".join(file_paths))
  160. def add_drag_drop_events(self, widgets):
  161. for widget in widgets:
  162. widget.setAcceptDrops(True)
  163. widget.installEventFilter(self)
  164. def eventFilter(self, obj, event):
  165. if event.type() == QEvent.DragEnter:
  166. mime_data = event.mimeData()
  167. if mime_data.hasUrls():
  168. event.acceptProposedAction()
  169. elif event.type() == QEvent.Drop:
  170. mime_data = event.mimeData()
  171. if mime_data.hasUrls():
  172. file_paths = [url.toLocalFile() for url in mime_data.urls()]
  173. if len(file_paths) == 1:
  174. self.update_input_paths(obj, file_paths[0])
  175. else:
  176. self.update_input_paths(obj, ", ".join(file_paths))
  177. event.acceptProposedAction()
  178. return super().eventFilter(obj, event)
  179. def select_GPT_model(self):
  180. file_path, _ = QFileDialog.getOpenFileName(self, "选择GPT模型文件", "", "GPT Files (*.ckpt)")
  181. if file_path:
  182. self.GPT_model_input.setText(file_path)
  183. def select_SoVITS_model(self):
  184. file_path, _ = QFileDialog.getOpenFileName(self, "选择SoVITS模型文件", "", "SoVITS Files (*.pth)")
  185. if file_path:
  186. self.SoVITS_model_input.setText(file_path)
  187. def select_ref_audio(self):
  188. options = QFileDialog.Options()
  189. options |= QFileDialog.DontUseNativeDialog
  190. options |= QFileDialog.ShowDirsOnly
  191. file_dialog = QFileDialog()
  192. file_dialog.setOptions(options)
  193. file_dialog.setFileMode(QFileDialog.AnyFile)
  194. file_dialog.setNameFilter("Audio Files (*.wav *.mp3)")
  195. if file_dialog.exec_():
  196. file_paths = file_dialog.selectedFiles()
  197. if len(file_paths) == 1:
  198. self.update_ref_audio(file_paths[0])
  199. self.update_input_paths(self.ref_audio_input, file_paths[0])
  200. else:
  201. self.update_ref_audio(", ".join(file_paths))
  202. def upload_ref_text(self):
  203. file_path, _ = QFileDialog.getOpenFileName(self, "选择文本文件", "", "Text Files (*.txt)")
  204. if file_path:
  205. with open(file_path, 'r', encoding='utf-8') as file:
  206. content = file.read()
  207. self.ref_text_input.setText(content)
  208. self.update_input_paths(self.ref_text_input, file_path)
  209. def upload_target_text(self):
  210. file_path, _ = QFileDialog.getOpenFileName(self, "选择文本文件", "", "Text Files (*.txt)")
  211. if file_path:
  212. with open(file_path, 'r', encoding='utf-8') as file:
  213. content = file.read()
  214. self.target_text_input.setText(content)
  215. self.update_input_paths(self.target_text_input, file_path)
  216. def select_output_path(self):
  217. options = QFileDialog.Options()
  218. options |= QFileDialog.DontUseNativeDialog
  219. options |= QFileDialog.ShowDirsOnly
  220. folder_dialog = QFileDialog()
  221. folder_dialog.setOptions(options)
  222. folder_dialog.setFileMode(QFileDialog.Directory)
  223. if folder_dialog.exec_():
  224. folder_path = folder_dialog.selectedFiles()[0]
  225. self.output_input.setText(folder_path)
  226. def update_ref_audio(self, file_path):
  227. self.ref_audio_input.setText(file_path)
  228. def update_input_paths(self, input_box, file_path):
  229. input_box.setText(file_path)
  230. def clear_output(self):
  231. self.output_text.clear()
  232. def synthesize(self):
  233. GPT_model_path = self.GPT_model_input.text()
  234. SoVITS_model_path = self.SoVITS_model_input.text()
  235. ref_audio_path = self.ref_audio_input.text()
  236. language_combobox = self.language_combobox.currentText()
  237. language_combobox = i18n(language_combobox)
  238. ref_text = self.ref_text_input.text()
  239. language_combobox_02 = self.language_combobox_02.currentText()
  240. language_combobox_02 = i18n(language_combobox_02)
  241. target_text = self.target_text_input.text()
  242. output_path = self.output_input.text()
  243. change_gpt_weights(gpt_path=GPT_model_path)
  244. change_sovits_weights(sovits_path=SoVITS_model_path)
  245. synthesis_result = get_tts_wav(ref_wav_path=ref_audio_path,
  246. prompt_text=ref_text,
  247. prompt_language=language_combobox,
  248. text=target_text,
  249. text_language=language_combobox_02)
  250. result_list = list(synthesis_result)
  251. if result_list:
  252. last_sampling_rate, last_audio_data = result_list[-1]
  253. output_wav_path = os.path.join(output_path, "output.wav")
  254. sf.write(output_wav_path, last_audio_data, last_sampling_rate)
  255. result = "Audio saved to " + output_wav_path
  256. self.status_bar.showMessage("合成完成!输出路径:" + output_wav_path, 5000)
  257. self.output_text.append("处理结果:\n" + result)
  258. def main():
  259. app = QApplication(sys.argv)
  260. mainWin = GPTSoVITSGUI()
  261. mainWin.show()
  262. sys.exit(app.exec_())
  263. if __name__ == '__main__':
  264. main()