private void Tkns_add_word(byte tid, int src_bgn, int src_end) { if (tkns.Count() > 0) { // at least 1 tkn; check for "auto-and" Xow_search_tkn last_tkn = (Xow_search_tkn) tkns.Get_at_last(); if (last_tkn.Tid() == Xow_search_tkn.Tid_word) // previous tkn is word; auto "AND" words; EX: A B -> A AND B tkns.Add(Xow_search_tkn.new_bry(Xow_search_tkn.Tid_and, Bry_and)); } if (tid == Xow_search_tkn .Tid_word) { // if word has symbol, convert to quoted; EX: a-b should become "a-b"; // otherwise searcher would search for a single word a-b byte[] cur_word = Bry_.Mid(src, src_bgn, src_end); byte[][] words = gplx.xowa.bldrs.cmds.texts.Xob_search_base.Split_ttl_into_words( null, tmp_list, tmp_bfr, cur_word); int words_len = words.length; if (words_len == 1 // only one word && !Bry_.Eq(words[0], cur_word) // split word not same as raw && Bry_finder.Find_fwd(cur_word, Byte_ascii.Star) == -1 // no asterisk ) { tkns.Add(Xow_search_tkn.new_bry(tid, words[0])); return; } if (words.length > 1) // multiple words; add as quoted-term; EX: "a-b" tid = Xow_search_tkn.Tid_word_quoted; } tkns.Add(new_tkn(tid, src_bgn, src_end)); }
public void Special__gen(Xoa_app app, Xow_wiki wiki, Xoa_page page, Xoa_url url, Xoa_ttl ttl) { int slash_pos = Bry_find_.Find_fwd(ttl.Page_txt_wo_qargs(), Xoa_ttl.Subpage_spr); // check for slash byte[] special_name = slash_pos == Bry_find_.Not_found ? ttl .Base_txt_wo_qarg() // no slash found; use base_txt; ignore qry args and just get // page_names; EX: Search/Earth?fulltext=y; // Allpages?from=Earth... : Bry_.Mid( ttl.Page_txt_wo_qargs(), 0, slash_pos); // slash found; use root page; EX: Special:ItemByTitle/enwiki/Earth special_name = Xoa_ttl.Replace_spaces(special_name); // handle spaces; EX:Spezial:Zufällige_Seite Xow_special_page special = (Xow_special_page) hash.Get_by_bry(special_name); if (special != null) { // special found; generate it; special = special.Special__clone(); page.Db().Page().Modified_on_(Datetime_now.Get()); try { special.Special__gen(wiki, page, url, ttl); } catch (Exception e) { Gfo_log_.Instance.Warn( "failed to generate special page", "url", url.To_str(), "err", Err_.Message_gplx_log(e)); } } }
public void Special_gen(Xowe_wiki wiki, Xoae_page page, Xoa_url url, Xoa_ttl ttl) { if (cfg == null) cfg = (Wdata_itemByTitle_cfg) wiki.Appe().Special_mgr().Get_or_null(Wdata_itemByTitle_cfg.Key); // Special:ItemByTitle/enwiki/Earth -> www.wikidata.org/wiki/Q2 Gfo_usr_dlg usr_dlg = wiki.Appe().Usr_dlg(); byte[] site_bry = cfg.Site_default(); byte[] page_bry = Bry_.Empty; byte[] raw_bry = ttl.Full_txt_wo_qarg(); // EX: enwiki/Earth int args_len = url.Args().length; if (args_len > 0) { arg_hash.Load(url); site_bry = arg_hash.Get_val_bry_or(Arg_site, Bry_.Empty); page_bry = arg_hash.Get_val_bry_or(Arg_page, Bry_.Empty); } int site_bgn = Bry_finder.Find_fwd(raw_bry, Xoa_ttl.Subpage_spr); if (site_bgn != Bry_.NotFound) { // leaf arg is available int page_bgn = Bry_finder.Find_fwd(raw_bry, Xoa_ttl.Subpage_spr, site_bgn + 1); int raw_bry_len = raw_bry.length; if (page_bgn != Bry_.NotFound && page_bgn < raw_bry_len) { // pipe is found and not last char (EX: "enwiki/" is invalid site_bry = Bry_.Mid(raw_bry, site_bgn + 1, page_bgn); page_bry = Bry_.Mid(raw_bry, page_bgn + 1, raw_bry_len); } } Xoae_app app = wiki.Appe(); if (Bry_.Len_gt_0(site_bry) && Bry_.Len_gt_0(page_bry)) if (Navigate(usr_dlg, app, app.Wiki_mgr().Wdata_mgr(), page, site_bry, page_bry)) return; Bry_bfr tmp_bfr = wiki.Utl__bfr_mkr().Get_k004(); html_fmtr.Bld_bfr_many( tmp_bfr, "Search for items by site and title", "Site", site_bry, "Page", page_bry, "Search"); page.Data_raw_(tmp_bfr.To_bry_and_rls()); page.Html_data().Html_restricted_n_(); // [[Special:]] pages allow all HTML }
public void Get_by_url1(Xow_wiki wiki, Xoa_page page, Xoa_url url, Xoa_ttl ttl) { int slash_pos = Bry_find_.Find_fwd(ttl.Page_txt_wo_qargs(), Xoa_ttl.Subpage_spr); // check for slash byte[] special_name = slash_pos == Bry_find_.Not_found ? ttl.Base_txt_wo_qarg() // no slash found; use base_txt; ignore qry args and just get // page_names; EX: Search/Earth?fulltext=y; // Allpages?from=Earth... : Bry_.Mid( ttl.Page_txt_wo_qargs(), 0, slash_pos); // slash found; use root page; EX: Special:ItemByTitle/enwiki/Earth Object o = hash.Get_by_bry(special_name); if (o == null) { Xol_specials_itm special_itm = wiki.Lang().Specials_mgr().Get_by_alias(special_name); if (special_itm != null) o = hash.Get_by_bry(special_itm.Special()); } if (o != null) { // Xow_special_page special = (Xow_special_page)o; // page.Revision_data().Modified_on_(Datetime_now.Get()); // special.Special__gen(wiki, page, url, ttl); } }
class Xow_search_scanner { private final List_adp tkns = List_adp_.new_(); private byte[] src; private int src_len, pos, txt_bgn; private final Ordered_hash tmp_list = Ordered_hash_.new_(); private final Bry_bfr tmp_bfr = Bry_bfr.new_(); public Xow_search_tkn[] Scan(byte[] src) { this.src = src; this.src_len = src.length; tkns.Clear(); pos = 0; txt_bgn = -1; while (pos < src_len) { byte cur_b = src[pos]; Object cur_obj = trie.Match_bgn_w_byte(cur_b, src, pos, src_len); if (cur_obj == null) { // text character if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it ++pos; } else { // AND, OR, (, ), -, \s, " int pos_end = trie.Match_pos(); byte cur_tid = ((Byte_obj_val) cur_obj).Val(); if (Cur_join_is_word(cur_tid, pos_end)) continue; // ignore words containing "and", "or"; EX: "random"; "for" if (txt_bgn != -1) { // pending word; create Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); txt_bgn = -1; } switch (cur_tid) { case Xow_search_tkn.Tid_space: // discard spaces pos = Bry_finder.Find_fwd_while(src, pos, src_len, Byte_ascii.Space); break; case Xow_search_tkn.Tid_quote: // find end quote and add as word int quote_bgn = pos + 1; int quote_end = Bry_finder.Find_fwd(src, Byte_ascii.Quote, quote_bgn, src_len); if (quote_end == Bry_.NotFound) throw Err_.new_fmt_("could not find end quote: {0}", String_.new_u8(src)); Tkns_add_word(Xow_search_tkn.Tid_word_quoted, quote_bgn, quote_end); pos = quote_end + 1; // +1 to place after quote break; case Xow_search_tkn.Tid_not: Tkns_add_word(Xow_search_tkn.Tid_not, pos, pos_end); pos = pos_end; break; case Xow_search_tkn.Tid_paren_bgn: case Xow_search_tkn.Tid_paren_end: case Xow_search_tkn.Tid_and: case Xow_search_tkn.Tid_or: tkns.Add(new_tkn(cur_tid, pos, pos_end)); pos = pos_end; break; default: throw Err_.unhandled(cur_tid); } } } if (txt_bgn != -1) { // pending word; create Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); txt_bgn = -1; } return (Xow_search_tkn[]) tkns.To_ary_and_clear(Xow_search_tkn.class); } private boolean Cur_join_is_word( byte cur_tid, int pos_end) { // extra logic to handle and / or occuring in unquoted strings; EX: "random"; // "for" switch (cur_tid) { default: return false; // only look at AND, OR, - case Xow_search_tkn.Tid_and: case Xow_search_tkn.Tid_or: case Xow_search_tkn.Tid_not: break; } boolean join_is_word = true; if (txt_bgn == -1) { // no pending word; if (cur_tid == Xow_search_tkn.Tid_not) return false; // NOT is only operator if no pending tkn; EX: -abc -> NOT abc; a-b -> a-b byte nxt_b = pos_end < src_len ? src[pos_end] : Byte_ascii.Nil; Object nxt_obj = trie.Match_bgn_w_byte(nxt_b, src, pos_end, src_len); if (nxt_obj == null) // next tkn is text; join must be word join_is_word = true; else { // next tkn is tkn byte nxt_tid = ((Byte_obj_val) nxt_obj).Val(); switch (nxt_tid) { case Xow_search_tkn.Tid_space: case Xow_search_tkn.Tid_quote: case Xow_search_tkn.Tid_paren_bgn: case Xow_search_tkn.Tid_paren_end: join_is_word = false; // next tkn is sym; and/or is not word; EX: a AND ; a AND"b"; a AND(b) break; case Xow_search_tkn.Tid_not: case Xow_search_tkn.Tid_and: case Xow_search_tkn.Tid_or: join_is_word = true; // next tkn is and or not; and/or is word; EX: andor; oror; or-abc; break; default: throw Err_.unhandled(cur_tid); } } } else { // pending word; cur join must be word; EX: "grand": "and" invoked and "gr" pending join_is_word = true; } if (join_is_word) { if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it pos = pos_end; return true; } if (txt_bgn != -1) { Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); // create word txt_bgn = -1; } return false; } private void Tkns_add_word(byte tid, int src_bgn, int src_end) { if (tkns.Count() > 0) { // at least 1 tkn; check for "auto-and" Xow_search_tkn last_tkn = (Xow_search_tkn) tkns.Get_at_last(); if (last_tkn.Tid() == Xow_search_tkn.Tid_word) // previous tkn is word; auto "AND" words; EX: A B -> A AND B tkns.Add(Xow_search_tkn.new_bry(Xow_search_tkn.Tid_and, Bry_and)); } if (tid == Xow_search_tkn .Tid_word) { // if word has symbol, convert to quoted; EX: a-b should become "a-b"; // otherwise searcher would search for a single word a-b byte[] cur_word = Bry_.Mid(src, src_bgn, src_end); byte[][] words = gplx.xowa.bldrs.cmds.texts.Xob_search_base.Split_ttl_into_words( null, tmp_list, tmp_bfr, cur_word); int words_len = words.length; if (words_len == 1 // only one word && !Bry_.Eq(words[0], cur_word) // split word not same as raw && Bry_finder.Find_fwd(cur_word, Byte_ascii.Star) == -1 // no asterisk ) { tkns.Add(Xow_search_tkn.new_bry(tid, words[0])); return; } if (words.length > 1) // multiple words; add as quoted-term; EX: "a-b" tid = Xow_search_tkn.Tid_word_quoted; } tkns.Add(new_tkn(tid, src_bgn, src_end)); } private Xow_search_tkn new_tkn(byte tid, int val_bgn, int val_end) { return Xow_search_tkn.new_pos(tid, val_bgn, val_end); } private static final byte[] Bry_and = Bry_.new_a7("AND"); private static final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_ascii_() // NOTE:ci.ascii:OR / AND only .Add_str_byte(" ", Xow_search_tkn.Tid_space) .Add_str_byte("\"", Xow_search_tkn.Tid_quote) .Add_str_byte("-", Xow_search_tkn.Tid_not) .Add_str_byte("(", Xow_search_tkn.Tid_paren_bgn) .Add_str_byte(")", Xow_search_tkn.Tid_paren_end) .Add_str_byte("or", Xow_search_tkn.Tid_or) .Add_str_byte("and", Xow_search_tkn.Tid_and); }
public class Wdata_itemByTitle_page implements Xows_page { private Xoa_url_arg_hash arg_hash = new Xoa_url_arg_hash(); private static final byte[] Arg_site = Bry_.new_a7("site"), Arg_page = Bry_.new_a7("page"); public Bry_fmtr Html_fmtr() { return html_fmtr; } private Wdata_itemByTitle_cfg cfg; public Xows_special_meta Special_meta() { return Xows_special_meta_.Itm__item_by_title; } public void Special_gen(Xowe_wiki wiki, Xoae_page page, Xoa_url url, Xoa_ttl ttl) { if (cfg == null) cfg = (Wdata_itemByTitle_cfg) wiki.Appe().Special_mgr().Get_or_null(Wdata_itemByTitle_cfg.Key); // Special:ItemByTitle/enwiki/Earth -> www.wikidata.org/wiki/Q2 Gfo_usr_dlg usr_dlg = wiki.Appe().Usr_dlg(); byte[] site_bry = cfg.Site_default(); byte[] page_bry = Bry_.Empty; byte[] raw_bry = ttl.Full_txt_wo_qarg(); // EX: enwiki/Earth int args_len = url.Args().length; if (args_len > 0) { arg_hash.Load(url); site_bry = arg_hash.Get_val_bry_or(Arg_site, Bry_.Empty); page_bry = arg_hash.Get_val_bry_or(Arg_page, Bry_.Empty); } int site_bgn = Bry_finder.Find_fwd(raw_bry, Xoa_ttl.Subpage_spr); if (site_bgn != Bry_.NotFound) { // leaf arg is available int page_bgn = Bry_finder.Find_fwd(raw_bry, Xoa_ttl.Subpage_spr, site_bgn + 1); int raw_bry_len = raw_bry.length; if (page_bgn != Bry_.NotFound && page_bgn < raw_bry_len) { // pipe is found and not last char (EX: "enwiki/" is invalid site_bry = Bry_.Mid(raw_bry, site_bgn + 1, page_bgn); page_bry = Bry_.Mid(raw_bry, page_bgn + 1, raw_bry_len); } } Xoae_app app = wiki.Appe(); if (Bry_.Len_gt_0(site_bry) && Bry_.Len_gt_0(page_bry)) if (Navigate(usr_dlg, app, app.Wiki_mgr().Wdata_mgr(), page, site_bry, page_bry)) return; Bry_bfr tmp_bfr = wiki.Utl__bfr_mkr().Get_k004(); html_fmtr.Bld_bfr_many( tmp_bfr, "Search for items by site and title", "Site", site_bry, "Page", page_bry, "Search"); page.Data_raw_(tmp_bfr.To_bry_and_rls()); page.Html_data().Html_restricted_n_(); // [[Special:]] pages allow all HTML } private static boolean Navigate( Gfo_usr_dlg usr_dlg, Xoae_app app, Wdata_wiki_mgr wdata_mgr, Xoae_page page, byte[] site_bry, byte[] page_bry) { page_bry = Xoa_app_.Utl__encoder_mgr() .Http_url() .Decode(page_bry); // NOTE: space is converted to + on postback to url; decode byte[] wiki_domain = Xow_wiki_alias.Parse_wmf_key(site_bry); if (wiki_domain == null) { usr_dlg.Warn_many("", "", "site_bry parse failed; site_bry:~{0}", String_.new_u8(site_bry)); return false; } Xowe_wiki wiki = app.Wiki_mgr().Get_by_key_or_make(wiki_domain); if (wiki == null) { usr_dlg.Warn_many( "", "", "wiki_domain does not exist; wiki_domain:~{0}", String_.new_u8(wiki_domain)); return false; } Xoa_ttl wdata_ttl = Xoa_ttl.parse_(wiki, page_bry); if (wdata_ttl == null) { usr_dlg.Warn_many("", "", "ttl is invalid; ttl:~{0}", String_.new_u8(page_bry)); return false; } Wdata_doc doc = wdata_mgr.Pages_get(wiki, wdata_ttl); if (doc == null) { usr_dlg.Warn_many( "", "", "ttl cannot be found in wikidata; ttl:~{0}", String_.new_u8(wdata_ttl.Raw())); return false; } byte[] qid_bry = doc.Qid(); Xoae_page qid_page = wdata_mgr.Wdata_wiki().Data_mgr().Redirect(page, qid_bry); if (qid_page.Missing()) { usr_dlg.Warn_many( "", "", "qid cannot be found in wikidata; qid:~{0}", String_.new_u8(qid_bry)); return false; } return true; } private static Bry_fmtr html_fmtr = Bry_fmtr.new_( String_.Concat_lines_nl( "<div id=\"mw-content-text\">", "<form method=\"get\" action=\"//www.wikidata.org/wiki/Special:ItemByTitle\" name=\"itembytitle\" id=\"wb-itembytitle-form1\">", "<fieldset>", "<legend>~{legend}</legend>", "<label for=\"wb-itembytitle-sitename\">~{site_lbl}:</label>", "<input id=\"wb-itembytitle-sitename\" size=\"12\" name=\"site\" value=\"~{site_val}\" accesskey=\"s\" />", "", "<label for=\"pagename\">~{page_lbl}:</label>", "<input id=\"pagename\" size=\"36\" class=\"wb-input-text\" name=\"page\" value=\"~{page_val}\" accesskey=\"p\" />", "", "<input id=\"wb-itembytitle-submit\" class=\"wb-input-button\" type=\"submit\" value=\"~{search_lbl}\" name=\"submit\" />", "</fieldset>", "</form>", "</div>", "<br>To change the default site, see <a href='/site/home/wiki/Help:Options/Wikibase'>Help:Options/Wikibase</a>"), "legend", "site_lbl", "site_val", "page_lbl", "page_val", "search_lbl"); }