subfix_webui.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. import argparse,os
  2. import copy
  3. import json
  4. import os
  5. import uuid
  6. import librosa
  7. import gradio as gr
  8. import numpy as np
  9. import soundfile
  10. g_json_key_text = ""
  11. g_json_key_path = ""
  12. g_load_file = ""
  13. g_load_format = ""
  14. g_max_json_index = 0
  15. g_index = 0
  16. g_batch = 10
  17. g_text_list = []
  18. g_audio_list = []
  19. g_checkbox_list = []
  20. g_data_json = []
  21. def reload_data(index, batch):
  22. global g_index
  23. g_index = index
  24. global g_batch
  25. g_batch = batch
  26. datas = g_data_json[index:index+batch]
  27. output = []
  28. for d in datas:
  29. output.append(
  30. {
  31. g_json_key_text: d[g_json_key_text],
  32. g_json_key_path: d[g_json_key_path]
  33. }
  34. )
  35. return output
  36. def b_change_index(index, batch):
  37. global g_index, g_batch
  38. g_index, g_batch = index, batch
  39. datas = reload_data(index, batch)
  40. output = []
  41. for i , _ in enumerate(datas):
  42. output.append(
  43. # gr.Textbox(
  44. # label=f"Text {i+index}",
  45. # value=_[g_json_key_text]#text
  46. # )
  47. {
  48. "__type__":"update",
  49. "label":f"Text {i+index}",
  50. "value":_[g_json_key_text]
  51. }
  52. )
  53. for _ in range(g_batch - len(datas)):
  54. output.append(
  55. # gr.Textbox(
  56. # label=f"Text",
  57. # value=""
  58. # )
  59. {
  60. "__type__": "update",
  61. "label": f"Text",
  62. "value": ""
  63. }
  64. )
  65. for _ in datas:
  66. output.append(_[g_json_key_path])
  67. for _ in range(g_batch - len(datas)):
  68. output.append(None)
  69. for _ in range(g_batch):
  70. output.append(False)
  71. return output
  72. def b_next_index(index, batch):
  73. b_save_file()
  74. if (index + batch) <= g_max_json_index:
  75. return index + batch , *b_change_index(index + batch, batch)
  76. else:
  77. return index, *b_change_index(index, batch)
  78. def b_previous_index(index, batch):
  79. b_save_file()
  80. if (index - batch) >= 0:
  81. return index - batch , *b_change_index(index - batch, batch)
  82. else:
  83. return 0, *b_change_index(0, batch)
  84. def b_submit_change(*text_list):
  85. global g_data_json
  86. change = False
  87. for i, new_text in enumerate(text_list):
  88. if g_index + i <= g_max_json_index:
  89. new_text = new_text.strip()+' '
  90. if (g_data_json[g_index + i][g_json_key_text] != new_text):
  91. g_data_json[g_index + i][g_json_key_text] = new_text
  92. change = True
  93. if change:
  94. b_save_file()
  95. return g_index, *b_change_index(g_index, g_batch)
  96. def b_delete_audio(*checkbox_list):
  97. global g_data_json, g_index, g_max_json_index
  98. b_save_file()
  99. change = False
  100. for i, checkbox in reversed(list(enumerate(checkbox_list))):
  101. if g_index + i < len(g_data_json):
  102. if (checkbox == True):
  103. g_data_json.pop(g_index + i)
  104. change = True
  105. g_max_json_index = len(g_data_json)-1
  106. if g_index > g_max_json_index:
  107. g_index = g_max_json_index
  108. g_index = g_index if g_index >= 0 else 0
  109. if change:
  110. b_save_file()
  111. # return gr.Slider(value=g_index, maximum=(g_max_json_index if g_max_json_index>=0 else 0)), *b_change_index(g_index, g_batch)
  112. return {"value":g_index,"__type__":"update","maximum":(g_max_json_index if g_max_json_index>=0 else 0)},*b_change_index(g_index, g_batch)
  113. def b_invert_selection(*checkbox_list):
  114. new_list = [not item if item is True else True for item in checkbox_list]
  115. return new_list
  116. def get_next_path(filename):
  117. base_dir = os.path.dirname(filename)
  118. base_name = os.path.splitext(os.path.basename(filename))[0]
  119. for i in range(100):
  120. new_path = os.path.join(base_dir, f"{base_name}_{str(i).zfill(2)}.wav")
  121. if not os.path.exists(new_path) :
  122. return new_path
  123. return os.path.join(base_dir, f'{str(uuid.uuid4())}.wav')
  124. def b_audio_split(audio_breakpoint, *checkbox_list):
  125. global g_data_json , g_max_json_index
  126. checked_index = []
  127. for i, checkbox in enumerate(checkbox_list):
  128. if (checkbox == True and g_index+i < len(g_data_json)):
  129. checked_index.append(g_index + i)
  130. if len(checked_index) == 1 :
  131. index = checked_index[0]
  132. audio_json = copy.deepcopy(g_data_json[index])
  133. path = audio_json[g_json_key_path]
  134. data, sample_rate = librosa.load(path, sr=None, mono=True)
  135. audio_maxframe = len(data)
  136. break_frame = int(audio_breakpoint * sample_rate)
  137. if (break_frame >= 1 and break_frame < audio_maxframe):
  138. audio_first = data[0:break_frame]
  139. audio_second = data[break_frame:]
  140. nextpath = get_next_path(path)
  141. soundfile.write(nextpath, audio_second, sample_rate)
  142. soundfile.write(path, audio_first, sample_rate)
  143. g_data_json.insert(index + 1, audio_json)
  144. g_data_json[index + 1][g_json_key_path] = nextpath
  145. b_save_file()
  146. g_max_json_index = len(g_data_json) - 1
  147. # return gr.Slider(value=g_index, maximum=g_max_json_index), *b_change_index(g_index, g_batch)
  148. return {"value":g_index,"maximum":g_max_json_index,"__type__":"update"}, *b_change_index(g_index, g_batch)
  149. def b_merge_audio(interval_r, *checkbox_list):
  150. global g_data_json , g_max_json_index
  151. b_save_file()
  152. checked_index = []
  153. audios_path = []
  154. audios_text = []
  155. for i, checkbox in enumerate(checkbox_list):
  156. if (checkbox == True and g_index+i < len(g_data_json)):
  157. checked_index.append(g_index + i)
  158. if (len(checked_index)>1):
  159. for i in checked_index:
  160. audios_path.append(g_data_json[i][g_json_key_path])
  161. audios_text.append(g_data_json[i][g_json_key_text])
  162. for i in reversed(checked_index[1:]):
  163. g_data_json.pop(i)
  164. base_index = checked_index[0]
  165. base_path = audios_path[0]
  166. g_data_json[base_index][g_json_key_text] = "".join(audios_text)
  167. audio_list = []
  168. l_sample_rate = None
  169. for i, path in enumerate(audios_path):
  170. data, sample_rate = librosa.load(path, sr=l_sample_rate, mono=True)
  171. l_sample_rate = sample_rate
  172. if (i > 0):
  173. silence = np.zeros(int(l_sample_rate * interval_r))
  174. audio_list.append(silence)
  175. audio_list.append(data)
  176. audio_concat = np.concatenate(audio_list)
  177. soundfile.write(base_path, audio_concat, l_sample_rate)
  178. b_save_file()
  179. g_max_json_index = len(g_data_json) - 1
  180. # return gr.Slider(value=g_index, maximum=g_max_json_index), *b_change_index(g_index, g_batch)
  181. return {"value":g_index,"maximum":g_max_json_index,"__type__":"update"}, *b_change_index(g_index, g_batch)
  182. def b_save_json():
  183. with open(g_load_file,'w', encoding="utf-8") as file:
  184. for data in g_data_json:
  185. file.write(f'{json.dumps(data, ensure_ascii = False)}\n')
  186. def b_save_list():
  187. with open(g_load_file,'w', encoding="utf-8") as file:
  188. for data in g_data_json:
  189. wav_path = data["wav_path"]
  190. speaker_name = data["speaker_name"]
  191. language = data["language"]
  192. text = data["text"]
  193. file.write(f"{wav_path}|{speaker_name}|{language}|{text}".strip()+'\n')
  194. def b_load_json():
  195. global g_data_json, g_max_json_index
  196. with open(g_load_file, 'r', encoding="utf-8") as file:
  197. g_data_json = file.readlines()
  198. g_data_json = [json.loads(line) for line in g_data_json]
  199. g_max_json_index = len(g_data_json) - 1
  200. def b_load_list():
  201. global g_data_json, g_max_json_index
  202. with open(g_load_file, 'r', encoding="utf-8") as source:
  203. data_list = source.readlines()
  204. for _ in data_list:
  205. data = _.split('|')
  206. if (len(data) == 4):
  207. wav_path, speaker_name, language, text = data
  208. g_data_json.append(
  209. {
  210. 'wav_path':wav_path,
  211. 'speaker_name':speaker_name,
  212. 'language':language,
  213. 'text':text.strip()
  214. }
  215. )
  216. else:
  217. print("error line:", data)
  218. g_max_json_index = len(g_data_json) - 1
  219. def b_save_file():
  220. if g_load_format == "json":
  221. b_save_json()
  222. elif g_load_format == "list":
  223. b_save_list()
  224. def b_load_file():
  225. if g_load_format == "json":
  226. b_load_json()
  227. elif g_load_format == "list":
  228. b_load_list()
  229. def set_global(load_json, load_list, json_key_text, json_key_path, batch):
  230. global g_json_key_text, g_json_key_path, g_load_file, g_load_format, g_batch
  231. g_batch = int(batch)
  232. if (load_json != "None"):
  233. g_load_format = "json"
  234. g_load_file = load_json
  235. elif (load_list != "None"):
  236. g_load_format = "list"
  237. g_load_file = load_list
  238. else:
  239. g_load_format = "list"
  240. g_load_file = "demo.list"
  241. g_json_key_text = json_key_text
  242. g_json_key_path = json_key_path
  243. b_load_file()
  244. if __name__ == "__main__":
  245. parser = argparse.ArgumentParser(description='Process some integers.')
  246. parser.add_argument('--load_json', default="None", help='source file, like demo.json')
  247. parser.add_argument('--is_share', default="False", help='whether webui is_share=True')
  248. parser.add_argument('--load_list', default="None", help='source file, like demo.list')
  249. parser.add_argument('--webui_port_subfix', default=9871, help='source file, like demo.list')
  250. parser.add_argument('--json_key_text', default="text", help='the text key name in json, Default: text')
  251. parser.add_argument('--json_key_path', default="wav_path", help='the path key name in json, Default: wav_path')
  252. parser.add_argument('--g_batch', default=10, help='max number g_batch wav to display, Default: 10')
  253. args = parser.parse_args()
  254. set_global(args.load_json, args.load_list, args.json_key_text, args.json_key_path, args.g_batch)
  255. with gr.Blocks() as demo:
  256. with gr.Row():
  257. btn_change_index = gr.Button("Change Index")
  258. btn_submit_change = gr.Button("Submit Text")
  259. btn_merge_audio = gr.Button("Merge Audio")
  260. btn_delete_audio = gr.Button("Delete Audio")
  261. btn_previous_index = gr.Button("Previous Index")
  262. btn_next_index = gr.Button("Next Index")
  263. with gr.Row():
  264. index_slider = gr.Slider(
  265. minimum=0, maximum=g_max_json_index, value=g_index, step=1, label="Index", scale=3
  266. )
  267. splitpoint_slider = gr.Slider(
  268. minimum=0, maximum=120.0, value=0, step=0.1, label="Audio Split Point(s)", scale=3
  269. )
  270. btn_audio_split = gr.Button("Split Audio", scale=1)
  271. btn_save_json = gr.Button("Save File", visible=True, scale=1)
  272. btn_invert_selection = gr.Button("Invert Selection", scale=1)
  273. with gr.Row():
  274. with gr.Column():
  275. for _ in range(0,g_batch):
  276. with gr.Row():
  277. text = gr.Textbox(
  278. label = "Text",
  279. visible = True,
  280. scale=5
  281. )
  282. audio_output = gr.Audio(
  283. label="Output Audio",
  284. visible = True,
  285. scale=5
  286. )
  287. audio_check = gr.Checkbox(
  288. label="Yes",
  289. show_label = True,
  290. info = "Choose Audio",
  291. scale=1
  292. )
  293. g_text_list.append(text)
  294. g_audio_list.append(audio_output)
  295. g_checkbox_list.append(audio_check)
  296. with gr.Row():
  297. batchsize_slider = gr.Slider(
  298. minimum=1, maximum=g_batch, value=g_batch, step=1, label="Batch Size", scale=3, interactive=False
  299. )
  300. interval_slider = gr.Slider(
  301. minimum=0, maximum=2, value=0, step=0.01, label="Interval", scale=3
  302. )
  303. btn_theme_dark = gr.Button("Light Theme", link="?__theme=light", scale=1)
  304. btn_theme_light = gr.Button("Dark Theme", link="?__theme=dark", scale=1)
  305. btn_change_index.click(
  306. b_change_index,
  307. inputs=[
  308. index_slider,
  309. batchsize_slider,
  310. ],
  311. outputs=[
  312. *g_text_list,
  313. *g_audio_list,
  314. *g_checkbox_list
  315. ],
  316. )
  317. btn_submit_change.click(
  318. b_submit_change,
  319. inputs=[
  320. *g_text_list,
  321. ],
  322. outputs=[
  323. index_slider,
  324. *g_text_list,
  325. *g_audio_list,
  326. *g_checkbox_list
  327. ],
  328. )
  329. btn_previous_index.click(
  330. b_previous_index,
  331. inputs=[
  332. index_slider,
  333. batchsize_slider,
  334. ],
  335. outputs=[
  336. index_slider,
  337. *g_text_list,
  338. *g_audio_list,
  339. *g_checkbox_list
  340. ],
  341. )
  342. btn_next_index.click(
  343. b_next_index,
  344. inputs=[
  345. index_slider,
  346. batchsize_slider,
  347. ],
  348. outputs=[
  349. index_slider,
  350. *g_text_list,
  351. *g_audio_list,
  352. *g_checkbox_list
  353. ],
  354. )
  355. btn_delete_audio.click(
  356. b_delete_audio,
  357. inputs=[
  358. *g_checkbox_list
  359. ],
  360. outputs=[
  361. index_slider,
  362. *g_text_list,
  363. *g_audio_list,
  364. *g_checkbox_list
  365. ]
  366. )
  367. btn_merge_audio.click(
  368. b_merge_audio,
  369. inputs=[
  370. interval_slider,
  371. *g_checkbox_list
  372. ],
  373. outputs=[
  374. index_slider,
  375. *g_text_list,
  376. *g_audio_list,
  377. *g_checkbox_list
  378. ]
  379. )
  380. btn_audio_split.click(
  381. b_audio_split,
  382. inputs=[
  383. splitpoint_slider,
  384. *g_checkbox_list
  385. ],
  386. outputs=[
  387. index_slider,
  388. *g_text_list,
  389. *g_audio_list,
  390. *g_checkbox_list
  391. ]
  392. )
  393. btn_invert_selection.click(
  394. b_invert_selection,
  395. inputs=[
  396. *g_checkbox_list
  397. ],
  398. outputs=[
  399. *g_checkbox_list
  400. ]
  401. )
  402. btn_save_json.click(
  403. b_save_file
  404. )
  405. demo.load(
  406. b_change_index,
  407. inputs=[
  408. index_slider,
  409. batchsize_slider,
  410. ],
  411. outputs=[
  412. *g_text_list,
  413. *g_audio_list,
  414. *g_checkbox_list
  415. ],
  416. )
  417. demo.launch(
  418. server_name="0.0.0.0",
  419. inbrowser=True,
  420. quiet=True,
  421. share=eval(args.is_share),
  422. server_port=int(args.webui_port_subfix)
  423. )