Import.html 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781
  1. <!DOCTYPE html>
  2. <html dir="ltr">
  3. <head>
  4. <meta http-equiv="content-type" content="text/html;charset=UTF-8" />
  5. <title>Options/Import - XOWA</title>
  6. <link rel="shortcut icon" href="https://gnosygnu.github.io/xowa/xowa_logo.png" />
  7. <link rel="stylesheet" href="https://gnosygnu.github.io/xowa/xowa_common.css" type="text/css">
  8. <style>
  9. .options_row_label {font-weight:bold; vertical-align:text-top; text-align:right; width:150px;}
  10. .options_table {border: 1px solid #AAAAAA; border-collapse: collapse; color: black; margin: 1em 0; background-color: #F9F9F9;}
  11. .options_table td {border: 1px solid #AAAAAA; padding: 0.2em;}
  12. .options_textbox {padding: 2px; border: 1px solid black;}
  13. .options_textbox_num {padding: 2px; border: 1px solid black; text-align:right;}
  14. .options_textarea {padding: 2px; border: 1px solid black;}
  15. .options_select {padding: 2px; border: 1px solid black;}
  16. .options_button {padding: 2px; border: 1px solid black; background: white; height: 22px; margin-left: 1px; vertical-align: top;}
  17. .options_checkbox {padding: 2px; border: 1px solid black;}
  18. .options_readonly {background-color:#F9F9F9;}
  19. </style>
  20. <style>
  21. .optionsButton
  22. {
  23. width: 100px;
  24. height: 40px;
  25. font-family: 'Segoe UI';
  26. font-size: 12px;
  27. margin-left: -5px;
  28. cursor: pointer;
  29. background-color: lightgray;
  30. text-align: center;
  31. }
  32. .optionsLink
  33. {
  34. color: black !important;
  35. text-decoration: none;
  36. }
  37. .optionsButton.active
  38. {
  39. width: 100px;
  40. height: 40px;
  41. font-family: 'Segoe UI';
  42. font-size: 12px;
  43. margin-left: -5px;
  44. cursor: pointer;
  45. background-color: dimgray;
  46. color: white !important;
  47. border: solid 1px dimgray;
  48. }
  49. .optionsButton.active a
  50. {
  51. color: white !important;
  52. }
  53. .tabs
  54. {
  55. width: 100%;
  56. margin-top: 10px;
  57. }
  58. .tab-links ul
  59. {
  60. margin-left: -40px;
  61. }
  62. .tab-links li
  63. {
  64. margin: 0px 0px 0px 0px;
  65. float: left;
  66. list-style: none;
  67. }
  68. .tab-links a
  69. {
  70. padding: 9px 15px 9px 15px;
  71. border-radius: 3px 3px 0px 0px;
  72. background: #f0f0f0;
  73. font-family: 'Segoe UI';
  74. font-size: 13px;
  75. color: black !important;
  76. text-decoration: none;
  77. border-radius: 3px;
  78. border-top: solid 1px gray;
  79. border-left: solid 1px gray;
  80. border-right: solid 1px gray;
  81. }
  82. .tab-links a:hover
  83. {
  84. background: #B0B0B0;
  85. text-decoration: none;
  86. }
  87. li.active a, li.active a:hover
  88. {
  89. background: #fff;
  90. color: #4c4c4c;
  91. font-weight: bold;
  92. }
  93. .tab-content
  94. {
  95. padding: 15px;
  96. border-radius: 3px;
  97. background: #fff;
  98. border-top: solid 1px gray;
  99. margin-top: 5px;
  100. min-height: 300px;
  101. }
  102. </style>
  103. <style>
  104. .options_row_label {font-weight:bold; vertical-align:text-top; text-align:right; width:150px;}
  105. .options_table {border: 1px solid #AAAAAA; border-collapse: collapse; color: black; margin: 1em 0; background-color: #F9F9F9;}
  106. .options_table td {border: 1px solid #AAAAAA; padding: 0.2em;}
  107. .options_textbox {padding: 2px; border: 1px solid black;}
  108. .options_textbox_num {padding: 2px; border: 1px solid black; text-align:right;}
  109. .options_textarea {padding: 2px; border: 1px solid black;}
  110. .options_select {padding: 2px; border: 1px solid black;}
  111. .options_button {padding: 2px; border: 1px solid black; background: white; height: 22px; margin-left: 1px; vertical-align: top;}
  112. .options_checkbox {padding: 2px; border: 1px solid black;}
  113. .options_readonly {background-color:#F9F9F9;}
  114. </style>
  115. <style>
  116. .optionsButton
  117. {
  118. width: 100px;
  119. height: 40px;
  120. font-family: 'Segoe UI';
  121. font-size: 12px;
  122. margin-left: -5px;
  123. cursor: pointer;
  124. background-color: lightgray;
  125. text-align: center;
  126. }
  127. .optionsLink
  128. {
  129. color: black !important;
  130. text-decoration: none;
  131. }
  132. .optionsButton.active
  133. {
  134. width: 100px;
  135. height: 40px;
  136. font-family: 'Segoe UI';
  137. font-size: 12px;
  138. margin-left: -5px;
  139. cursor: pointer;
  140. background-color: dimgray;
  141. color: white !important;
  142. border: solid 1px dimgray;
  143. }
  144. .optionsButton.active a
  145. {
  146. color: white !important;
  147. }
  148. .tabs
  149. {
  150. width: 100%;
  151. margin-top: 10px;
  152. }
  153. .tab-links ul
  154. {
  155. margin-left: -40px;
  156. }
  157. .tab-links li
  158. {
  159. margin: 0px 0px 0px 0px;
  160. float: left;
  161. list-style: none;
  162. }
  163. .tab-links a
  164. {
  165. padding: 9px 15px 9px 15px;
  166. border-radius: 3px 3px 0px 0px;
  167. background: #f0f0f0;
  168. font-family: 'Segoe UI';
  169. font-size: 13px;
  170. color: black !important;
  171. text-decoration: none;
  172. border-radius: 3px;
  173. border-top: solid 1px gray;
  174. border-left: solid 1px gray;
  175. border-right: solid 1px gray;
  176. }
  177. .tab-links a:hover
  178. {
  179. background: #B0B0B0;
  180. text-decoration: none;
  181. }
  182. li.active a, li.active a:hover
  183. {
  184. background: #fff;
  185. color: #4c4c4c;
  186. font-weight: bold;
  187. }
  188. .tab-content
  189. {
  190. padding: 15px;
  191. border-radius: 3px;
  192. background: #fff;
  193. border-top: solid 1px gray;
  194. margin-top: 5px;
  195. min-height: 300px;
  196. }
  197. </style>
  198. </head>
  199. <body class="mediawiki ltr sitedir-ltr ns-0 ns-subject skin-vector action-submit vector-animateLayout" spellcheck="false">
  200. <div id="mw-page-base" class="noprint"></div>
  201. <div id="mw-head-base" class="noprint"></div>
  202. <div id="content" class="mw-body">
  203. <h1 id="firstHeading" class="firstHeading"><span>Options/Import</span></h1>
  204. <div id="bodyContent" class="mw-body-content">
  205. <div id="siteSub">From XOWA: the free, open-source, offline wiki application</div>
  206. <div id="contentSub"></div>
  207. <div id="mw-content-text" lang="en" dir="ltr" class="mw-content-ltr">
  208. <div>
  209. <table>
  210. <tr>
  211. <td class='optionsButton'>
  212. <a class='optionsLink' href='/wiki/Options/Window'>Apps (basic)</a>
  213. </td>
  214. <td class='optionsButton'>
  215. <a class='optionsLink' href='/wiki/Options/Security'>Apps (advanced)</a>
  216. </td>
  217. <td class='optionsButton active'>
  218. <a class='optionsLink' href='/wiki/Options/Import'>Wiki</a>
  219. </td>
  220. <td class='optionsButton'>
  221. <a class='optionsLink' href='/wiki/Options/HTML'>HTML</a>
  222. </td>
  223. <td class='optionsButton'>
  224. <a class='optionsLink' href='/wiki/Options/Content_apps'>External apps</a>
  225. </td>
  226. <td class='optionsButton'>
  227. <a class='optionsLink' href='/wiki/Options/Search_suggest'>Modules</a>
  228. </td>
  229. <td class='optionsButton'>
  230. <a class='optionsLink' href='/wiki/Options/Math'>Extensions</a>
  231. </td>
  232. <td class='optionsButton'>
  233. <a class='optionsLink' href='/wiki/Options/Dev'>Misc</a>
  234. </td>
  235. </tr>
  236. </table>
  237. </div>
  238. <div class="tabs">
  239. <div>
  240. <ul class='tab-links' style='margin-left:0px;'>
  241. <li class='active'>
  242. <a href='/wiki/Options/Import'>Import</a>
  243. </li>
  244. <li class=''>
  245. <a href='/wiki/Options/Files'>Files</a>
  246. </li>
  247. <li class=''>
  248. <a href='/wiki/Options/Category'>Category</a>
  249. </li>
  250. <li class=''>
  251. <a href='/wiki/Options/Import_Dansguardian'>Import Dansguardian</a>
  252. </li>
  253. </ul>
  254. </div><br>
  255. <div class='tab-content'>
  256. <div id="toc" class="toc">
  257. <div id="toctitle">
  258. <h2>
  259. Contents
  260. </h2>
  261. </div>
  262. <ul>
  263. <li class="toclevel-1 tocsection-1">
  264. <a href="#Wiki_setup"><span class="tocnumber">1</span> <span class="toctext">Wiki setup</span></a>
  265. </li>
  266. <li class="toclevel-1 tocsection-2">
  267. <a href="#Import_process"><span class="tocnumber">2</span> <span class="toctext">Import process</span></a>
  268. </li>
  269. <li class="toclevel-1 tocsection-3">
  270. <a href="#PageRank"><span class="tocnumber">3</span> <span class="toctext">PageRank</span></a>
  271. </li>
  272. <li class="toclevel-1 tocsection-4">
  273. <a href="#Database_layout"><span class="tocnumber">4</span> <span class="toctext">Database layout</span></a>
  274. </li>
  275. <li class="toclevel-1 tocsection-5">
  276. <a href="#Decompression_apps"><span class="tocnumber">5</span> <span class="toctext">Decompression apps</span></a>
  277. </li>
  278. <li class="toclevel-1 tocsection-6">
  279. <a href="#Notes"><span class="tocnumber">6</span> <span class="toctext">Notes</span></a>
  280. </li>
  281. </ul>
  282. </div>
  283. <div style='float:right; margin-right:10px; margin-top:5px;'>
  284. <p>
  285. <a href="xowa-cmd:app.user.prefs.save();" accesskey='s'><img src='file:///C:/xowa/bin/any/xowa/file/app.window/save.png' width="16" height="16">Save</a>
  286. </p>
  287. </div><br>
  288. <h2>
  289. <span class="mw-headline" id="Wiki_setup">Wiki setup</span>
  290. </h2>
  291. <table class='options_table'>
  292. <tr>
  293. <td class='options_row_label'>
  294. Search version: <sup id="cite_ref-search_version_0-0" class="reference"><a href="#cite_note-search_version-0">[1]</a></sup>
  295. </td>
  296. <td>
  297. <select xowa_prop="app.setup.dumps.search_version" xowa_prop_list="app.setup.dumps.search_version_list" class="options_textbox" size='2' id='xowa_prop_0'>
  298. <option value='1'>
  299. 1
  300. </option>
  301. <option value='2' selected='selected'>
  302. 2
  303. </option>
  304. </select>
  305. </td>
  306. </tr>
  307. <tr>
  308. <td class='options_row_label'>
  309. Page storage format: <sup id="cite_ref-data_storage_format_1-0" class="reference"><a href="#cite_note-data_storage_format-1">[2]</a></sup>
  310. </td>
  311. <td>
  312. <select xowa_prop="xowa.api.bldr.wiki.import.zip_tid_text" xowa_prop_list="xowa.api.bldr.wiki.import.zip_tid_list" class="options_textbox" size='3' id='xowa_prop_1'>
  313. <option value='raw'>
  314. text
  315. </option>
  316. <option value='gzip' selected='selected'>
  317. gzip
  318. </option>
  319. <option value='bzip2'>
  320. bzip2
  321. </option>
  322. </select>
  323. </td>
  324. </tr>
  325. </table>
  326. <h2>
  327. <span class="mw-headline" id="Import_process">Import process</span>
  328. </h2>
  329. <table class='options_table'>
  330. <tr>
  331. <td class='options_row_label'>
  332. Dump servers: <sup id="cite_ref-dump_server_urls_2-0" class="reference"><a href="#cite_note-dump_server_urls-2">[3]</a></sup>
  333. </td>
  334. <td>
  335. <textarea xowa_prop="app.setup.dumps.server_urls" class="options_textarea" style="width: 400px; height:72px;" id='xowa_prop_2'>
  336. https://dumps.wikimedia.org/,
  337. http://dumps.wikimedia.your.org/,
  338. http://wikipedia.c3sl.ufpr.br/,
  339. http://ftp.fi.muni.cz/pub/wikimedia/
  340. </textarea>
  341. </td>
  342. </tr>
  343. <tr>
  344. <td class='options_row_label'>
  345. Import bz2 by stdout: <sup id="cite_ref-import_bz2_by_stdout_3-0" class="reference"><a href="#cite_note-import_bz2_by_stdout-3">[4]</a></sup>
  346. </td>
  347. <td>
  348. <input xowa_prop="app.setup.dumps.import_bz2_by_stdout" type="checkbox" class="options_checkbox" id='xowa_prop_3' checked='checked'>
  349. </td>
  350. </tr>
  351. <tr>
  352. <td class='options_row_label'>
  353. Import bz2 by stdout process:<sup id="cite_ref-4" class="reference"><a href="#cite_note-4">[5]</a></sup>
  354. </td>
  355. <td>
  356. <input xowa_prop="app.fsys.apps.decompress_bz2_by_stdout.cmd" class="options_textbox" style="width: 380px; margin-bottom:2px;" type="xowa_io" id='xowa_prop_4' value='C:\xowa\bin\windows_64\7-zip\7za'><button id='xowa_prop_4_io' class='options_button' onclick='xowa_io_select("file", "xowa_prop_4", "Please select a file.");'>...</button>
  357. <p>
  358. <input xowa_prop="app.fsys.apps.decompress_bz2_by_stdout.args" class="options_textbox" style="width: 380px;" id='xowa_prop_5' value='x -so "~{src}"'>
  359. </p>
  360. </td>
  361. </tr>
  362. <tr>
  363. <td class='options_row_label'>
  364. Custom wiki commands: <sup id="cite_ref-custom_cmds_5-0" class="reference"><a href="#cite_note-custom_cmds-5">[6]</a></sup>
  365. </td>
  366. <td>
  367. <input xowa_prop="app.setup.dumps.custom_cmds" class="options_textbox" style="width: 400px;" id='xowa_prop_6' value='wiki.download,wiki.import'>
  368. </td>
  369. </tr>
  370. <tr>
  371. <td class='options_row_label'>
  372. Download xowa_common.css: <sup id="cite_ref-download_xowa_common_css_6-0" class="reference"><a href="#cite_note-download_xowa_common_css-6">[7]</a></sup>
  373. </td>
  374. <td>
  375. <input xowa_prop="app.setup.dumps.css_commons_download" type="checkbox" class="options_checkbox" id='xowa_prop_7' checked='checked'>
  376. </td>
  377. </tr>
  378. <tr>
  379. <td class='options_row_label'>
  380. Delete xml file after import: <sup id="cite_ref-delete_xml_file_7-0" class="reference"><a href="#cite_note-delete_xml_file-7">[8]</a></sup>
  381. </td>
  382. <td>
  383. <input xowa_prop="app.setup.dumps.delete_xml_file" type="checkbox" class="options_checkbox" id='xowa_prop_8' checked='checked'>
  384. </td>
  385. </tr>
  386. </table>
  387. <h2>
  388. <span class="mw-headline" id="PageRank">PageRank</span>
  389. </h2>
  390. <table class='options_table'>
  391. <tr>
  392. <td class='options_row_label'>
  393. PageRank iteration max: <sup id="cite_ref-page_rank-iteration_max_8-0" class="reference"><a href="#cite_note-page_rank-iteration_max-8">[9]</a></sup>
  394. </td>
  395. <td>
  396. <input xowa_prop="xowa.api.bldr.wiki.import.page_rank.iteration_max" class="options_textbox" style="width: 400px;" id='xowa_prop_9' value='0'>
  397. </td>
  398. </tr>
  399. </table>
  400. <h2>
  401. <span class="mw-headline" id="Database_layout">Database layout</span>
  402. </h2>
  403. <table class='options_table'>
  404. <tr>
  405. <td class='options_row_label'>
  406. Max file size for single text database: <sup id="cite_ref-layout_text_max_9-0" class="reference"><a href="#cite_note-layout_text_max-9">[10]</a></sup>
  407. </td>
  408. <td>
  409. <input xowa_prop="xowa.api.bldr.wiki.import.layout_text_max" class="options_textbox" style="width: 400px;" id='xowa_prop_10' value='1500'>
  410. </td>
  411. </tr>
  412. </table>
  413. <h2>
  414. <span class="mw-headline" id="Decompression_apps">Decompression apps</span>
  415. </h2>
  416. <table class="options_table">
  417. <tr>
  418. <td class='options_row_label'>
  419. Decompress bz2 file
  420. <p>
  421. <sup id="cite_ref-10" class="reference"><a href="#cite_note-10">[11]</a></sup>
  422. </p>
  423. </td>
  424. <td>
  425. <input xowa_prop="app.fsys.apps.decompress_bz2.cmd" class="options_textbox" style="width: 380px; margin-bottom:2px;" type="xowa_io" id='xowa_prop_11' value='C:\xowa\bin\windows_64\7-zip\7za'><button id='xowa_prop_11_io' class='options_button' onclick='xowa_io_select("file", "xowa_prop_11", "Please select a file.");'>...</button>
  426. <p>
  427. <input xowa_prop="app.fsys.apps.decompress_bz2.args" class="options_textbox" style="width: 380px;" id='xowa_prop_12' value='x -y -r "~{src}" -o"~{trg_dir}"'>
  428. </p>
  429. </td>
  430. </tr>
  431. <tr>
  432. <td class='options_row_label'>
  433. Decompress zip file
  434. <p>
  435. <sup id="cite_ref-11" class="reference"><a href="#cite_note-11">[12]</a></sup>
  436. </p>
  437. </td>
  438. <td>
  439. <input xowa_prop="app.fsys.apps.decompress_bz2.cmd" class="options_textbox" style="width: 380px; margin-bottom:2px;" type="xowa_io" id='xowa_prop_13' value='C:\xowa\bin\windows_64\7-zip\7za'><button id='xowa_prop_13_io' class='options_button' onclick='xowa_io_select("file", "xowa_prop_13", "Please select a file.");'>...</button>
  440. <p>
  441. <input xowa_prop="app.fsys.apps.decompress_bz2.args" class="options_textbox" style="width: 380px;" id='xowa_prop_14' value='x -y -r "~{src}" -o"~{trg_dir}"'>
  442. </p>
  443. </td>
  444. </tr>
  445. <tr>
  446. <td class='options_row_label'>
  447. Decompress gz file
  448. <p>
  449. <sup id="cite_ref-12" class="reference"><a href="#cite_note-12">[13]</a></sup>
  450. </p>
  451. </td>
  452. <td>
  453. <input xowa_prop="app.fsys.apps.decompress_gz.cmd" class="options_textbox" style="width: 380px; margin-bottom:2px;" type="xowa_io" id='xowa_prop_15' value='C:\xowa\bin\windows_64\7-zip\7za'><button id='xowa_prop_15_io' class='options_button' onclick='xowa_io_select("file", "xowa_prop_15", "Please select a file.");'>...</button>
  454. <p>
  455. <input xowa_prop="app.fsys.apps.decompress_gz.args" class="options_textbox" style="width: 380px;" id='xowa_prop_16' value='x -y -r "~{src}" -o"~{trg_dir}"'>
  456. </p>
  457. </td>
  458. </tr>
  459. </table>
  460. <h2>
  461. <span class="mw-headline" id="Notes">Notes</span>
  462. </h2>
  463. <ol class="references">
  464. <li id="cite_note-search_version-0">
  465. <span class="mw-cite-backlink"><a href="#cite_ref-search_version_0-0">^</a></span> <span class="reference-text">Choose one of the following: (default is <code>2</code>)</span>
  466. <ul>
  467. <li>
  468. <span class="reference-text"><b>1</b> : search uses page title data. searches are less accurate, but less space is used</span>
  469. </li>
  470. <li>
  471. <span class="reference-text"><b>2</b> : (default) search uses title word data. searches are more accurate, but more space is used and setup is longer. For English Wikipedia, another 1.8 GB and 38 min of time are needed.</span>
  472. </li>
  473. </ul>
  474. </li>
  475. <li id="cite_note-data_storage_format-1">
  476. <span class="mw-cite-backlink"><a href="#cite_ref-data_storage_format_1-0">^</a></span> <span class="reference-text">Choose one of the following: (default is <code>.gz</code>)</span>
  477. <ul>
  478. <li>
  479. <span class="reference-text"><b>text</b>: fastest for reading but has no compression. Simple Wikipedia will be 300 MB</span>
  480. </li>
  481. <li>
  482. <span class="reference-text"><b>gzip</b>: (default) fast for reading and has compression. Simple Wikipedia will be 100 MB</span>
  483. </li>
  484. <li>
  485. <span class="reference-text"><b>bzip2</b>: very slow for reading but has best compression. Simple Wikipedia will be 85 MB (Note: The performance is very noticeable. Please try this with Simple Wikipedia first before using on a large wiki.)</span>
  486. </li>
  487. </ul>
  488. </li>
  489. <li id="cite_note-dump_server_urls-2">
  490. <span class="mw-cite-backlink"><a href="#cite_ref-dump_server_urls_2-0">^</a></span> <span class="reference-text">Enter a list of server urls separated by a comma and newline.</span>
  491. <ul>
  492. <li>
  493. <span class="reference-text">The default value is:</span>
  494. </li>
  495. </ul>
  496. <pre>
  497. <span class="reference-text">http://dumps.wikimedia.your.org/,
  498. http://dumps.wikimedia.org/,
  499. http://wikipedia.c3sl.ufpr.br/,
  500. http://ftp.fi.muni.cz/pub/wikimedia/
  501. </span>
  502. </pre>
  503. <ul>
  504. <li>
  505. <span class="reference-text">Note that servers are prioritized from left-to-right. In the default example, <b>your.org</b> will be tried first. If it is offline, then the next server -- <b>dumps.wikimedia.org</b> -- will be tried, etc.</span>
  506. </li>
  507. <li>
  508. <span class="reference-text">See <a href="http://xowa.org/wiki/home/page/App/Import/Download/Dump_servers.html" id="xolnki_2" title="App/Import/Download/Dump servers">App/Import/Download/Dump_servers</a> for more info</span>
  509. </li>
  510. </ul>
  511. </li>
  512. <li id="cite_note-import_bz2_by_stdout-3">
  513. <span class="mw-cite-backlink"><a href="#cite_ref-import_bz2_by_stdout_3-0">^</a></span> <span class="reference-text"><b>NOTE 1: this option only applies if the "Custom wiki commands" option is <code>wiki.download,wiki.import</code> (wiki.unzip must be removed)</b><br>
  514. Select the method for importing a wiki dump bz2 file. (default is <code>checked</code>)</span>
  515. <ul>
  516. <li>
  517. <span class="reference-text"><b>checked</b> : import through a native process's stdout. This will be faster, but may not work on all Operating Systems. A 95 MB file takes 85 seconds</span>
  518. </li>
  519. <li>
  520. <span class="reference-text"><b>unchecked</b>: import though Apache Common's Java bz2 compression library. This will be slower, but will work on all Operating Systems. A 95 MB file takes 215 seconds.</span>
  521. </li>
  522. </ul><span class="reference-text"><b>NOTE 2: lbzip2 (Many thanks to Anselm for making this suggestion, as well as compiling the data to support it. See <a href="http://sourceforge.net/p/xowa/tickets/263/?limit=10&amp;page=6#f2fb/dcb6" rel="nofollow" class="external free">http://sourceforge.net/p/xowa/tickets/263/?limit=10&amp;page=6#f2fb/dcb6</a>)</b> Linux users should consider using lbzip2, as lbzip2 has significant performance differences (30% in many cases).</span>
  523. <ul>
  524. <li>
  525. <span class="reference-text">install lbzip2</span>
  526. <ul>
  527. <li>
  528. <span class="reference-text">(Debian) <code>sudo apt-get install lbzip2</code></span>
  529. </li>
  530. </ul>
  531. </li>
  532. <li>
  533. <span class="reference-text">change "Import bz2 by stdout process" to</span>
  534. <ul>
  535. <li>
  536. <span class="reference-text"><code>lbzip2</code></span>
  537. </li>
  538. <li>
  539. <span class="reference-text"><code>-dkc "~{src}"</code></span>
  540. </li>
  541. </ul>
  542. </li>
  543. </ul>
  544. </li>
  545. <li id="cite_note-4">
  546. <span class="mw-cite-backlink"><a href="#cite_ref-4">^</a></span> <span class="reference-text">Process used to decompress bz2 by stdout. Recommended: Operating System default</span>
  547. </li>
  548. <li id="cite_note-custom_cmds-5">
  549. <span class="mw-cite-backlink"><a href="#cite_ref-custom_cmds_5-0">^</a></span> <span class="reference-text">Select custom commands: (default is <code>wiki.download,wiki.unzip,wiki.import</code>)<br>
  550. <b>Short version:</b></span>
  551. <ul>
  552. <li>
  553. <span class="reference-text">For fast imports, but high disk space usage, use <code>wiki.download,wiki.unzip,wiki.import</code></span>
  554. </li>
  555. <li>
  556. <span class="reference-text">For slow imports, but low disk space usage, use <code>wiki.download,wiki.import</code></span>
  557. </li>
  558. </ul><span class="reference-text"><b>Long version:</b> Enter a list of commands separated by a comma. Valid commands are listed below. Note that simple.wikipedia.org is used for all examples, but the commands apply to any wiki.</span>
  559. <ul>
  560. <li>
  561. <span class="reference-text"><code>wiki.download</code>: downloads the wiki data dump from the dump server</span>
  562. </li>
  563. </ul>
  564. <dl>
  565. <dd>
  566. <span class="reference-text">A file will be generated in "/xowa/wiki/simple.wikipedia.org/simplewiki-latest-pages-articles.xml.bz2"</span>
  567. </dd>
  568. </dl>
  569. <ul>
  570. <li>
  571. <span class="reference-text"><code>wiki.unzip</code>: unzips an xml file from the wiki data dump</span>
  572. </li>
  573. </ul>
  574. <dl>
  575. <dd>
  576. <span class="reference-text">A file will be created for "/xowa/wiki/simple.wikipedia.org/simplewiki-latest-pages-articles.xml" (assuming the corresponding .xml.bz2 exists)</span>
  577. </dd>
  578. <dd>
  579. <span class="reference-text">If this step is omitted, then XOWA will read directly from the .bz2 file. Although this will use less space (no .xml file to unzip), it will be significantly slower. <b>Also, due to a program limitation, the progress percentage will not be accurate. It may hover at 99.99% for several minutes</b></span>
  580. </dd>
  581. </dl>
  582. <ul>
  583. <li>
  584. <span class="reference-text"><code>wiki.import</code>: imports the xml file</span>
  585. </li>
  586. </ul>
  587. <dl>
  588. <dd>
  589. <span class="reference-text">A wiki will be imported from "/xowa/wiki/simple.wikipedia.org/simplewiki-latest-pages-articles.xml"</span>
  590. </dd>
  591. </dl><span class="reference-text">The following lists possible combinations:</span>
  592. <ul>
  593. <li>
  594. <span class="reference-text"><code>wiki.download,wiki.unzip,wiki.import</code> AKA: <b>fastest</b></span>
  595. </li>
  596. </ul>
  597. <dl>
  598. <dd>
  599. <span class="reference-text">This is the default. Note that this will be the fastest to set up, but will take more space. For example, English Wikipedia will set up in 5 hours and require at least 45 GB of temp space</span>
  600. </dd>
  601. </dl>
  602. <ul>
  603. <li>
  604. <span class="reference-text"><code>wiki.download,wiki.import</code> AKA: <b>smallest</b></span>
  605. </li>
  606. </ul>
  607. <dl>
  608. <dd>
  609. <span class="reference-text">This will read directly from the bz2 file. Note that this will use the least disk space, but will take more time. For example, English Wikipedia will set up in 8 hours but will only use 5 GB of temp space</span>
  610. </dd>
  611. </dl>
  612. </li>
  613. <li id="cite_note-download_xowa_common_css-6">
  614. <span class="mw-cite-backlink"><a href="#cite_ref-download_xowa_common_css_6-0">^</a></span> <span class="reference-text">Affects the xowa_common.css in /xowa/user/anonymous/wiki/wiki_name/html/. Occurs when importing a wiki. (default is <code>checked</code>)</span>
  615. <ul>
  616. <li>
  617. <span class="reference-text"><b>checked</b> : downloads xowa_common.css from the Wikimedia servers. Note that this stylesheet will be the latest copy but it may cause unexpected formatting in XOWA.</span>
  618. </li>
  619. <li>
  620. <span class="reference-text"><b>unchecked</b>: (default) copies xowa_common.css from /xowa/bin/any/html/html/import/. Note that this stylesheet is the one XOWA is coded against. It is the most stable, but will not have the latest logo</span>
  621. </li>
  622. </ul>
  623. </li>
  624. <li id="cite_note-delete_xml_file-7">
  625. <span class="mw-cite-backlink"><a href="#cite_ref-delete_xml_file_7-0">^</a></span> <span class="reference-text">(Only relevant for wiki.unzip) Choose one of the following: (default is <code>checked</code>)</span>
  626. <ul>
  627. <li>
  628. <span class="reference-text"><b>checked</b> : (default) the .xml file is automatically deleted once the import process completes</span>
  629. </li>
  630. <li>
  631. <span class="reference-text"><b>unchecked</b>: the .xml file is untouched</span>
  632. </li>
  633. </ul>
  634. </li>
  635. <li id="cite_note-page_rank-iteration_max-8">
  636. <span class="mw-cite-backlink"><a href="#cite_ref-page_rank-iteration_max_8-0">^</a></span> <span class="reference-text">Specify one of the following: (default is <code>0</code>)</span>
  637. <ul>
  638. <li>
  639. <span class="reference-text"><b>0</b> : (default) page rank is disabled</span>
  640. </li>
  641. <li>
  642. <span class="reference-text"><b>(number greater than 1)</b>: page rank will be calculated until it is finished or maximum number of interations are reached. For more info, see <a href="http://xowa.org/wiki/home/page/Help/Features/Search/Build.html" id="xolnki_3" title="Help/Features/Search/Build" class="xowa-visited">Help/Features/Search/Build</a></span>
  643. </li>
  644. </ul>
  645. </li>
  646. <li id="cite_note-layout_text_max-9">
  647. <span class="mw-cite-backlink"><a href="#cite_ref-layout_text_max_9-0">^</a></span> <span class="reference-text">Enter a number in MB to represent the cutoff for multi-file wikis and single-file wikis (default is <code>1500</code>)</span>
  648. <ul>
  649. <li>
  650. <span class="reference-text">For example, 1500 means that a wiki with a dump file size of 1.5 GB or less will generate a single-file wiki (for example, Simple Wikipedia). Any wiki with a dump file size larger than 1.5 GB will generate a multiple-file wiki. (for example, English Wikipedia).</span>
  651. </li>
  652. <li>
  653. <span class="reference-text">If you always want to generate a single-file wiki, set the value to a large number like 999,999 (999 GB)</span>
  654. </li>
  655. <li>
  656. <span class="reference-text">If you always want to generate a multi-file wiki, set the value to 0.</span>
  657. </li>
  658. </ul>
  659. </li>
  660. <li id="cite_note-10">
  661. <span class="mw-cite-backlink"><a href="#cite_ref-10">^</a></span> <span class="reference-text">Decompress bz2 file(needed for importing dumps) . Recommended: <a href="http://7-zip.org/" rel="nofollow" class="external text">7-zip</a></span>
  662. </li>
  663. <li id="cite_note-11">
  664. <span class="mw-cite-backlink"><a href="#cite_ref-11">^</a></span> <span class="reference-text">Decompress zip file(needed for importing dumps) . Recommended: <a href="http://7-zip.org/" rel="nofollow" class="external text">7-zip</a></span>
  665. </li>
  666. <li id="cite_note-12">
  667. <span class="mw-cite-backlink"><a href="#cite_ref-12">^</a></span> <span class="reference-text">Decompress gz file(needed for importing dumps) . Recommended: <a href="http://7-zip.org/" rel="nofollow" class="external text">7-zip</a></span>
  668. </li>
  669. </ol>
  670. </div>
  671. </div>
  672. </div>
  673. </div>
  674. </div>
  675. <div id="mw-head" class="noprint">
  676. <div id="left-navigation">
  677. <div id="p-namespaces" class="vectorTabs">
  678. <h3>Namespaces</h3>
  679. <ul>
  680. <li id="ca-nstab-main" class="selected"><span><a id="ca-nstab-main-href" href="index.html">Page</a></span></li>
  681. </ul>
  682. </div>
  683. </div>
  684. </div>
  685. <div id='mw-panel' class='noprint'>
  686. <div id='p-logo'>
  687. <a style="background-image: url(https://gnosygnu.github.io/xowa/xowa_logo.png);" href="http://xowa.org/" title="Visit the main page"></a>
  688. </div>
  689. <div class="portal" id='xowa-portal-home'>
  690. <h3>XOWA</h3>
  691. <div class="body">
  692. <ul>
  693. <li><a href="http://xowa.org/index.html" title='Visit the main page'>Main page</a></li>
  694. <li><a href="http://xowa.org/screenshots.html" title='See screenshots of XOWA'>Screenshots</a></li>
  695. <li><a href="http://xowa.org/wiki/home/page/Help/Download_XOWA.html" title='Download the XOWA application'>Download XOWA</a></li>
  696. <li><a href="http://xowa.org/wiki/home/page/Dashboard/Image_databases.html" title='Download offline wikis and image databases'>Download wikis</a></li>
  697. </ul>
  698. </div>
  699. </div>
  700. <div class="portal" id='xowa-portal-stargin'>
  701. <h3>Getting started</h3>
  702. <div class="body">
  703. <ul>
  704. <li><a href="http://xowa.org/wiki/home/page/App/Setup/System_requirements.html" title='Get XOWA&apos;s system requirements'>Requirements</a></li>
  705. <li><a href="http://xowa.org/wiki/home/page/App/Setup/Installation.html" title='Get instructions for installing XOWA'>Installation</a></li>
  706. <li><a href="http://xowa.org/wiki/home/page/App/Import/Simple_Wikipedia.html" title='Learn how to set up Simple Wikipedia'>Simple Wikipedia</a></li>
  707. <li><a href="http://xowa.org/wiki/home/page/App/Import/English_Wikipedia.html" title='Learn how to set up English Wikipedia'>English Wikipedia</a></li>
  708. <li><a href="http://xowa.org/wiki/home/page/App/Import/Other_wikis.html" title='Learn how to set up other Wikipedias'>Other Wikipedias</a></li>
  709. </ul>
  710. </div>
  711. </div>
  712. <div class="portal" id='xowa-portal-help'>
  713. <h3>Help</h3>
  714. <div class="body">
  715. <ul>
  716. <li><a href="http://xowa.org/wiki/home/page/Help/About.html" title='Get more information about XOWA'>About</a></li>
  717. <li><a href="http://xowa.org/wiki/home/page/Help/Contents.html" title='View a list of help topics'>Contents</a></li>
  718. <li><a href="http://xowa.org/wiki/home/page/Help/Media.html" title='Read what others have written about XOWA'>Media</a></li>
  719. <li><a href="http://xowa.org/wiki/home/page/Help/Feedback.html" title='Questions? Comments? Leave feedback for XOWA'>Feedback</a></li>
  720. </ul>
  721. </div>
  722. </div>
  723. <div class="portal" id='xowa-portal-blog'>
  724. <h3>Blog</h3>
  725. <div class="body">
  726. <ul>
  727. <li><a href="http://xowa.org/wiki/home/page/Blog.html" title='Follow XOWA''s development process'>Current</a></li>
  728. </ul>
  729. </div>
  730. </div>
  731. <div class="portal" id='xowa-portal-links'>
  732. <h3>Links</h3>
  733. <div class="body">
  734. <ul>
  735. <li><a href="http://dumps.wikimedia.org/backup-index.html" title="Get wiki datababase dumps directly from Wikimedia">Wikimedia dumps</a></li>
  736. <li><a href="https://archive.org/search.php?query=xowa" title="Search archive.org for XOWA files">XOWA @ archive.org</a></li>
  737. <li><a href="http://en.wikipedia.org" title="Visit Wikipedia (and compare to XOWA!)">English Wikipedia</a></li>
  738. </ul>
  739. </div>
  740. </div>
  741. <div class="portal" id='xowa-portal-donate'>
  742. <h3>Donate</h3>
  743. <div class="body">
  744. <ul>
  745. <li><a href="https://archive.org/donate/index.php" title="Support archive.org!">archive.org</a></li><!-- listed first due to recent fire damages: http://blog.archive.org/2013/11/06/scanning-center-fire-please-help-rebuild/ -->
  746. <li><a href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector" title="Support Wikipedia!">Wikipedia</a></li>
  747. <!-- <li><a href="" title="Support XOWA! (but only after you've supported archive.org and Wikipedia)">XOWA</a></li> -->
  748. </ul>
  749. </div>
  750. </div>
  751. </div>
  752. </body>
  753. </html>