瀏覽代碼

Full-text search: Specify namespaces for indexing

gnosygnu 8 年之前
父節點
當前提交
a4380b6d48

+ 1 - 1
140_dbs/src/gplx/dbs/Db_conn.java

@@ -105,7 +105,7 @@ public class Db_conn {
 	public int					Exec_sql_args(String sql, Object... args)	{return this.Exec_qry(Db_qry_sql.dml_(String_.Format(sql, args)));}
 	public int					Exec_sql_plog_ntx(String msg, String sql) {return Exec_sql_plog(Bool_.N, msg, sql);}
 	public int					Exec_sql_plog_txn(String msg, String sql) {return Exec_sql_plog(Bool_.Y, msg, sql);}
-	public int					Exec_sql_plog(boolean txn, String msg, String sql) {
+	public int					Exec_sql_plog(boolean txn, String msg, String sql) {			
 		Gfo_usr_dlg_.Instance.Plog_many("", "", msg);
 		if (txn) this.Txn_bgn(msg);
 		int rv = Exec_sql(sql);

+ 55 - 0
400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_args.java

@@ -0,0 +1,55 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012-2017 gnosygnu@gmail.com
+
+XOWA is licensed under the terms of the General Public License (GPL) Version 3,
+or alternatively under the terms of the Apache License Version 2.0.
+
+You may use XOWA according to either of these licenses as is most appropriate
+for your project on a case-by-case basis.
+
+The terms of each license can be found in the source code repository:
+
+GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
+Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
+*/
+package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
+import gplx.xowa.wikis.nss.*;
+public class Xofulltext_indexer_args implements Gfo_invk {
+	public byte[] wikis;
+	public String ns_ids;
+	public void Init_by_wiki(Xowe_wiki wiki) {
+		// wikis: null 
+		if (wikis == null)
+			wikis = wiki.Domain_bry();
+
+		// ns: null / *
+		if (ns_ids == null)
+			ns_ids = "0";
+		else if (String_.Eq(ns_ids, "*")) {
+			Xow_ns[] ns_ary = wiki.Ns_mgr().Ords_ary();
+			int len = ns_ary.length;
+			Bry_bfr bfr = Bry_bfr_.New();
+			for (int i = 0; i < len; i++) {
+				Xow_ns ns = ns_ary[i];
+				int ns_id = ns.Id();
+				if (ns_id < 0) continue; // ignore media, special 
+				if (i != 0) bfr.Add_byte(Byte_ascii.Pipe);
+				bfr.Add_int_variable(ns_id);
+			}
+			ns_ids = bfr.To_str_and_clear();
+		}
+	}
+	public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
+		if      (ctx.Match(k, "wikis_"))            this.wikis = m.ReadBryOr("v", null);
+		else if	(ctx.Match(k, "ns_ids"))            this.ns_ids = m.ReadStrOr("v", null);
+		else	return Gfo_invk_.Rv_unhandled;
+		return this;
+	}
+	public static Xofulltext_indexer_args New_by_json(gplx.langs.jsons.Json_nde args) {
+		Xofulltext_indexer_args rv = new Xofulltext_indexer_args();
+		rv.wikis = args.Get_as_bry("wikis");
+		rv.ns_ids = args.Get_as_str("ns_ids");
+		return rv;
+	}
+}

+ 6 - 1
400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_cmd.java

@@ -16,10 +16,15 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
 import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
 public class Xofulltext_indexer_cmd extends Xob_cmd__base {
+	private final    Xofulltext_indexer_args args = new Xofulltext_indexer_args();
 	public Xofulltext_indexer_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
 	@Override public void Cmd_run() {
 		wiki.Init_assert();
-		new Xofulltext_indexer_mgr().Exec(wiki, null);
+		new Xofulltext_indexer_mgr().Exec(wiki, null, args);
+	}
+	@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
+		if      (ctx.Match(k, "args"))              return args;
+		else	return Gfo_invk_.Rv_unhandled;
 	}
 
 	@Override public String Cmd_key() {return "search.index";}

+ 19 - 6
400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_mgr.java

@@ -20,19 +20,28 @@ import gplx.xowa.wikis.data.*;
 import gplx.xowa.htmls.core.dbs.*;
 import gplx.xowa.addons.wikis.fulltexts.indexers.svcs.*;
 public class Xofulltext_indexer_mgr {
-	public void Exec(Xowe_wiki wiki, Xofulltext_indexer_ui ui) {
+	public void Exec(Xowe_wiki wiki, Xofulltext_indexer_ui ui, Xofulltext_indexer_args args) {
+		// init indexer
+		Xofulltext_indexer_wkr indexer = new Xofulltext_indexer_wkr();
+		indexer.Init(wiki);
+
+		// get page tbl
 		Xow_db_file core_db = wiki.Data__core_mgr().Db__core();
 		gplx.xowa.wikis.data.tbls.Xowd_page_tbl page_tbl = core_db.Tbl__page();
 
+		// init args
+		args.Init_by_wiki(wiki);
+		int count = 0;
 		Xoh_page hpg = new Xoh_page();
 
-		Xofulltext_indexer_wkr indexer = new Xofulltext_indexer_wkr();
-		indexer.Init(wiki);
-
+		// get rdr and loop
 		Db_conn conn = page_tbl.Conn();
-		Db_rdr rdr = conn.Exec_rdr("SELECT page_id, page_score, page_namespace, page_title, page_html_db_id FROM page WHERE page_namespace = 0;");
-		int count = 0;
+		Db_rdr rdr = conn.Exec_rdr(Db_sql_.Make_by_fmt(String_.Ary
+		( "SELECT  page_id, page_score, page_namespace, page_title, page_html_db_id"
+		, "FROM    page"
+		, "WHERE   page_namespace IN ({0});"), String_.Replace(args.ns_ids, "|", ",")));
 		while (rdr.Move_next()) {
+			// read vars
 			int page_namespace = rdr.Read_int("page_namespace");
 			byte[] page_ttl_bry = rdr.Read_bry_by_str("page_title");
 			int page_id = rdr.Read_int("page_id");
@@ -52,7 +61,10 @@ public class Xofulltext_indexer_mgr {
 					continue;
 				byte[] html_text = wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
 
+				// run index
 				indexer.Index(page_id, page_score, page_ttl.Page_txt(), html_text);
+
+				// notify
 				if ((++count % 10000) == 0) {
 					Gfo_usr_dlg_.Instance.Prog_many("", "", "indexing page: ~{0}", count);
 					if (ui != null)
@@ -63,6 +75,7 @@ public class Xofulltext_indexer_mgr {
 			}
 		}
 
+		// term indexer
 		indexer.Term();
 	}
 }

+ 6 - 4
400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_doc.java

@@ -16,14 +16,16 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
 import gplx.langs.mustaches.*;
 public class Xofulltext_indexer_doc implements Mustache_doc_itm {
-	private final    byte[] wikis_bry;
-	public Xofulltext_indexer_doc
-		( byte[] wikis_bry) {
+	private final    String wikis_bry, ns_ids;
+	public Xofulltext_indexer_doc(String wikis_bry, String ns_ids) {
 		this.wikis_bry = wikis_bry;
+		this.ns_ids = ns_ids;
 	}
 	public boolean Mustache__write(String key, Mustache_bfr bfr) {
 		if		(String_.Eq(key, "wikis"))
-			bfr.Add_bry(wikis_bry);
+			bfr.Add_str_u8(wikis_bry);
+		else if	(String_.Eq(key, "ns_ids"))
+			bfr.Add_str_u8(ns_ids);
 		else
 			return false;
 		return true;

+ 4 - 4
400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_html.java

@@ -17,15 +17,15 @@ package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; impor
 import gplx.xowa.specials.*; import gplx.langs.mustaches.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.wikis.pages.tags.*;
 import gplx.dbs.*;
 class Xofulltext_indexer_html extends Xow_special_wtr__base {
-	private final    byte[] wikis_bry;
-	public Xofulltext_indexer_html
-		( byte[] wikis_bry) {
+	private final    String wikis_bry, ns_ids;
+	public Xofulltext_indexer_html(String wikis_bry, String ns_ids) {
 		this.wikis_bry = wikis_bry;
+		this.ns_ids = ns_ids;
 	}
 	@Override protected Io_url Get_addon_dir(Xoa_app app)			{return Addon_dir(app);}
 	@Override protected Io_url Get_mustache_fil(Io_url addon_dir)	{return addon_dir.GenSubFil_nest("bin", "xofulltext_indexer.template.html");}
 	@Override protected Mustache_doc_itm Bld_mustache_root(Xoa_app app) {
-		return new Xofulltext_indexer_doc(wikis_bry);
+		return new Xofulltext_indexer_doc(wikis_bry, ns_ids);
 	}
 	@Override protected void Bld_tags(Xoa_app app, Io_url addon_dir, Xopage_html_data page_data) {
 		Xopg_tag_mgr head_tags = page_data.Head_tags();

+ 2 - 2
400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_special.java

@@ -20,12 +20,12 @@ public class Xofulltext_indexer_special implements Xow_special_page {
 	public void Special__gen(Xow_wiki wiki, Xoa_page page, Xoa_url url, Xoa_ttl ttl) {
 		// get qry if any
 		Gfo_qarg_mgr url_args = new Gfo_qarg_mgr().Init(url.Qargs_ary());
-		byte[] wikis_bry = url_args.Read_bry_or("wikis", Bry_.Empty);
 
 		// get options and create page
 		// Xocfg_mgr cfg_mgr = wiki.App().Cfg();
 		new Xofulltext_indexer_html
-		( wikis_bry
+		( url_args.Read_str_or("wikis", wiki.Domain_str())
+		, url_args.Read_str_or("ns_ids", "0")
 		).Bld_page_by_mustache(wiki.App(), page, this);
 	}
 	Xofulltext_indexer_special(Xow_special_meta special__meta) {this.special__meta = special__meta;}

+ 6 - 9
400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/svcs/Xofulltext_indexer_svc.java

@@ -29,8 +29,7 @@ class Xofulltext_indexer_svc implements Gfo_invk {
 	}
 	public void Index(Json_nde args) {
 		// create args
-		byte[] wikis_bry = args.Get_as_bry("wikis");
-		Xofulltext_indexer_args indexer_args = new Xofulltext_indexer_args(wikis_bry);
+		Xofulltext_indexer_args indexer_args = Xofulltext_indexer_args.New_by_json(args);
 
 		// launch thread
 		gplx.core.threads.Thread_adp_.Start_by_val("index", Cancelable_.Never, this, Invk__index, indexer_args);
@@ -47,6 +46,7 @@ class Xofulltext_indexer_svc implements Gfo_invk {
 				continue;
 			}
 
+			// check if dir exists
 			wiki.Init_by_wiki();
 			Io_url search_dir = Xosearch_fulltext_addon.Get_index_dir(wiki);
 			if (Io_mgr.Instance.ExistsDir(search_dir)) {
@@ -55,11 +55,14 @@ class Xofulltext_indexer_svc implements Gfo_invk {
 				continue;
 			}
 
+			// notify bgn
 			app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
 				.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": wiki index started: " + String_.new_u8(domain)));
 
-			new Xofulltext_indexer_mgr().Exec((Xowe_wiki)wiki, new Xofulltext_indexer_ui(app.Gui__cbk_mgr(), cbk_trg));
+			// run index
+			new Xofulltext_indexer_mgr().Exec((Xowe_wiki)wiki, new Xofulltext_indexer_ui(app.Gui__cbk_mgr(), cbk_trg), args);
 
+			// notify end
 			app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
 				.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": wiki index ended: " + String_.new_u8(domain)));
 		}
@@ -72,9 +75,3 @@ class Xofulltext_indexer_svc implements Gfo_invk {
 	}  
 	private static final String Invk__index = "index";
 }
-class Xofulltext_indexer_args {
-	public byte[] wikis;
-	public Xofulltext_indexer_args(byte[] wikis) {
-		this.wikis = wikis;
-	}
-}

+ 4 - 4
gplx.gflucene/src/gplx/gflucene/searchers/Gflucene_searcher_mgr.java

@@ -63,13 +63,13 @@ public class Gflucene_searcher_mgr {
 			IndexReader reader = DirectoryReader.open(index);
 			IndexSearcher searcher = new IndexSearcher(reader);
 
-			Query query = new QueryParser("body", analyzer).parse(data.query);
-//			Query multi_query = MultiFieldQueryParser.parse(data.query, new String[] {"body"}, new BooleanClause.Occur []{BooleanClause.Occur.SHOULD}, analyzer);
+//			Query query = new QueryParser("body", analyzer).parse(data.query);
+			Query multi_query = MultiFieldQueryParser.parse(data.query, new String[] {"body"}, new BooleanClause.Occur []{BooleanClause.Occur.SHOULD}, analyzer);
 			
 //			Query body_query = new QueryParser("body", analyzer).parse(data.query);
 //			Query title_query = new QueryParser("title", analyzer).parse(data.query);
-//			FunctionQuery boost_query = new FunctionQuery(new LongFieldSource("page_score"));			
-//			CustomScoreQuery query = new CustomScoreQuery(multi_query, boost_query);
+			FunctionQuery boost_query = new FunctionQuery(new LongFieldSource("page_score"));			
+			CustomScoreQuery query = new CustomScoreQuery(multi_query, boost_query);
  
 //			TopDocs docs = searcher.search(query, reader.maxDoc());
 			TopDocs docs = searcher.search(query, data.match_max);