예제 #1
0
class Xow_search_scanner {
  private final List_adp tkns = List_adp_.new_();
  private byte[] src;
  private int src_len, pos, txt_bgn;
  private final Ordered_hash tmp_list = Ordered_hash_.new_();
  private final Bry_bfr tmp_bfr = Bry_bfr.new_();

  public Xow_search_tkn[] Scan(byte[] src) {
    this.src = src;
    this.src_len = src.length;
    tkns.Clear();
    pos = 0;
    txt_bgn = -1;
    while (pos < src_len) {
      byte cur_b = src[pos];
      Object cur_obj = trie.Match_bgn_w_byte(cur_b, src, pos, src_len);
      if (cur_obj == null) { // text character
        if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it
        ++pos;
      } else { // AND, OR, (, ), -, \s, "
        int pos_end = trie.Match_pos();
        byte cur_tid = ((Byte_obj_val) cur_obj).Val();
        if (Cur_join_is_word(cur_tid, pos_end))
          continue; // ignore words containing "and", "or"; EX: "random"; "for"
        if (txt_bgn != -1) { // pending word; create
          Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos);
          txt_bgn = -1;
        }
        switch (cur_tid) {
          case Xow_search_tkn.Tid_space: // discard spaces
            pos = Bry_finder.Find_fwd_while(src, pos, src_len, Byte_ascii.Space);
            break;
          case Xow_search_tkn.Tid_quote: // find end quote and add as word
            int quote_bgn = pos + 1;
            int quote_end = Bry_finder.Find_fwd(src, Byte_ascii.Quote, quote_bgn, src_len);
            if (quote_end == Bry_.NotFound)
              throw Err_.new_fmt_("could not find end quote: {0}", String_.new_u8(src));
            Tkns_add_word(Xow_search_tkn.Tid_word_quoted, quote_bgn, quote_end);
            pos = quote_end + 1; // +1 to place after quote
            break;
          case Xow_search_tkn.Tid_not:
            Tkns_add_word(Xow_search_tkn.Tid_not, pos, pos_end);
            pos = pos_end;
            break;
          case Xow_search_tkn.Tid_paren_bgn:
          case Xow_search_tkn.Tid_paren_end:
          case Xow_search_tkn.Tid_and:
          case Xow_search_tkn.Tid_or:
            tkns.Add(new_tkn(cur_tid, pos, pos_end));
            pos = pos_end;
            break;
          default:
            throw Err_.unhandled(cur_tid);
        }
      }
    }
    if (txt_bgn != -1) { // pending word; create
      Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos);
      txt_bgn = -1;
    }
    return (Xow_search_tkn[]) tkns.To_ary_and_clear(Xow_search_tkn.class);
  }

  private boolean Cur_join_is_word(
      byte cur_tid,
      int
          pos_end) { // extra logic to handle and / or occuring in unquoted strings; EX: "random";
                     // "for"
    switch (cur_tid) {
      default:
        return false; // only look at AND, OR, -
      case Xow_search_tkn.Tid_and:
      case Xow_search_tkn.Tid_or:
      case Xow_search_tkn.Tid_not:
        break;
    }
    boolean join_is_word = true;
    if (txt_bgn == -1) { // no pending word;
      if (cur_tid == Xow_search_tkn.Tid_not)
        return false; // NOT is only operator if no pending tkn; EX: -abc -> NOT abc; a-b -> a-b
      byte nxt_b = pos_end < src_len ? src[pos_end] : Byte_ascii.Nil;
      Object nxt_obj = trie.Match_bgn_w_byte(nxt_b, src, pos_end, src_len);
      if (nxt_obj == null) // next tkn is text; join must be word
      join_is_word = true;
      else { // next tkn is tkn
        byte nxt_tid = ((Byte_obj_val) nxt_obj).Val();
        switch (nxt_tid) {
          case Xow_search_tkn.Tid_space:
          case Xow_search_tkn.Tid_quote:
          case Xow_search_tkn.Tid_paren_bgn:
          case Xow_search_tkn.Tid_paren_end:
            join_is_word =
                false; // next tkn is sym; and/or is not word; EX: a AND ; a AND"b"; a AND(b)
            break;
          case Xow_search_tkn.Tid_not:
          case Xow_search_tkn.Tid_and:
          case Xow_search_tkn.Tid_or:
            join_is_word = true; // next tkn is and or not; and/or is word; EX: andor; oror; or-abc;
            break;
          default:
            throw Err_.unhandled(cur_tid);
        }
      }
    } else { // pending word; cur join must be word; EX: "grand": "and" invoked and "gr" pending
      join_is_word = true;
    }
    if (join_is_word) {
      if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it
      pos = pos_end;
      return true;
    }
    if (txt_bgn != -1) {
      Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); // create word
      txt_bgn = -1;
    }
    return false;
  }

  private void Tkns_add_word(byte tid, int src_bgn, int src_end) {
    if (tkns.Count() > 0) { // at least 1 tkn; check for "auto-and"
      Xow_search_tkn last_tkn = (Xow_search_tkn) tkns.Get_at_last();
      if (last_tkn.Tid()
          == Xow_search_tkn.Tid_word) // previous tkn is word; auto "AND" words; EX: A B -> A AND B
      tkns.Add(Xow_search_tkn.new_bry(Xow_search_tkn.Tid_and, Bry_and));
    }
    if (tid
        == Xow_search_tkn
            .Tid_word) { // if word has symbol, convert to quoted; EX: a-b should become "a-b";
                         // otherwise searcher would search for a single word a-b
      byte[] cur_word = Bry_.Mid(src, src_bgn, src_end);
      byte[][] words =
          gplx.xowa.bldrs.cmds.texts.Xob_search_base.Split_ttl_into_words(
              null, tmp_list, tmp_bfr, cur_word);
      int words_len = words.length;
      if (words_len == 1 // only one word
          && !Bry_.Eq(words[0], cur_word) // split word not same as raw
          && Bry_finder.Find_fwd(cur_word, Byte_ascii.Star) == -1 // no asterisk
      ) {
        tkns.Add(Xow_search_tkn.new_bry(tid, words[0]));
        return;
      }
      if (words.length > 1) // multiple words; add as quoted-term; EX: "a-b"
      tid = Xow_search_tkn.Tid_word_quoted;
    }
    tkns.Add(new_tkn(tid, src_bgn, src_end));
  }

  private Xow_search_tkn new_tkn(byte tid, int val_bgn, int val_end) {
    return Xow_search_tkn.new_pos(tid, val_bgn, val_end);
  }

  private static final byte[] Bry_and = Bry_.new_a7("AND");
  private static final Btrie_slim_mgr trie =
      Btrie_slim_mgr.ci_ascii_() // NOTE:ci.ascii:OR / AND only
          .Add_str_byte(" ", Xow_search_tkn.Tid_space)
          .Add_str_byte("\"", Xow_search_tkn.Tid_quote)
          .Add_str_byte("-", Xow_search_tkn.Tid_not)
          .Add_str_byte("(", Xow_search_tkn.Tid_paren_bgn)
          .Add_str_byte(")", Xow_search_tkn.Tid_paren_end)
          .Add_str_byte("or", Xow_search_tkn.Tid_or)
          .Add_str_byte("and", Xow_search_tkn.Tid_and);
}
예제 #2
0
public class Wdata_itemByTitle_page implements Xows_page {
  private Xoa_url_arg_hash arg_hash = new Xoa_url_arg_hash();
  private static final byte[] Arg_site = Bry_.new_a7("site"), Arg_page = Bry_.new_a7("page");

  public Bry_fmtr Html_fmtr() {
    return html_fmtr;
  }

  private Wdata_itemByTitle_cfg cfg;

  public Xows_special_meta Special_meta() {
    return Xows_special_meta_.Itm__item_by_title;
  }

  public void Special_gen(Xowe_wiki wiki, Xoae_page page, Xoa_url url, Xoa_ttl ttl) {
    if (cfg == null)
      cfg =
          (Wdata_itemByTitle_cfg) wiki.Appe().Special_mgr().Get_or_null(Wdata_itemByTitle_cfg.Key);
    // Special:ItemByTitle/enwiki/Earth -> www.wikidata.org/wiki/Q2
    Gfo_usr_dlg usr_dlg = wiki.Appe().Usr_dlg();
    byte[] site_bry = cfg.Site_default();
    byte[] page_bry = Bry_.Empty;
    byte[] raw_bry = ttl.Full_txt_wo_qarg(); // EX: enwiki/Earth
    int args_len = url.Args().length;
    if (args_len > 0) {
      arg_hash.Load(url);
      site_bry = arg_hash.Get_val_bry_or(Arg_site, Bry_.Empty);
      page_bry = arg_hash.Get_val_bry_or(Arg_page, Bry_.Empty);
    }
    int site_bgn = Bry_finder.Find_fwd(raw_bry, Xoa_ttl.Subpage_spr);
    if (site_bgn != Bry_.NotFound) { // leaf arg is available
      int page_bgn = Bry_finder.Find_fwd(raw_bry, Xoa_ttl.Subpage_spr, site_bgn + 1);
      int raw_bry_len = raw_bry.length;
      if (page_bgn != Bry_.NotFound
          && page_bgn < raw_bry_len) { // pipe is found and not last char (EX: "enwiki/" is invalid
        site_bry = Bry_.Mid(raw_bry, site_bgn + 1, page_bgn);
        page_bry = Bry_.Mid(raw_bry, page_bgn + 1, raw_bry_len);
      }
    }
    Xoae_app app = wiki.Appe();
    if (Bry_.Len_gt_0(site_bry) && Bry_.Len_gt_0(page_bry))
      if (Navigate(usr_dlg, app, app.Wiki_mgr().Wdata_mgr(), page, site_bry, page_bry)) return;
    Bry_bfr tmp_bfr = wiki.Utl__bfr_mkr().Get_k004();
    html_fmtr.Bld_bfr_many(
        tmp_bfr,
        "Search for items by site and title",
        "Site",
        site_bry,
        "Page",
        page_bry,
        "Search");
    page.Data_raw_(tmp_bfr.To_bry_and_rls());
    page.Html_data().Html_restricted_n_(); // [[Special:]] pages allow all HTML
  }

  private static boolean Navigate(
      Gfo_usr_dlg usr_dlg,
      Xoae_app app,
      Wdata_wiki_mgr wdata_mgr,
      Xoae_page page,
      byte[] site_bry,
      byte[] page_bry) {
    page_bry =
        Xoa_app_.Utl__encoder_mgr()
            .Http_url()
            .Decode(page_bry); // NOTE: space is converted to + on postback to url; decode
    byte[] wiki_domain = Xow_wiki_alias.Parse_wmf_key(site_bry);
    if (wiki_domain == null) {
      usr_dlg.Warn_many("", "", "site_bry parse failed; site_bry:~{0}", String_.new_u8(site_bry));
      return false;
    }
    Xowe_wiki wiki = app.Wiki_mgr().Get_by_key_or_make(wiki_domain);
    if (wiki == null) {
      usr_dlg.Warn_many(
          "", "", "wiki_domain does not exist; wiki_domain:~{0}", String_.new_u8(wiki_domain));
      return false;
    }
    Xoa_ttl wdata_ttl = Xoa_ttl.parse_(wiki, page_bry);
    if (wdata_ttl == null) {
      usr_dlg.Warn_many("", "", "ttl is invalid; ttl:~{0}", String_.new_u8(page_bry));
      return false;
    }
    Wdata_doc doc = wdata_mgr.Pages_get(wiki, wdata_ttl);
    if (doc == null) {
      usr_dlg.Warn_many(
          "", "", "ttl cannot be found in wikidata; ttl:~{0}", String_.new_u8(wdata_ttl.Raw()));
      return false;
    }
    byte[] qid_bry = doc.Qid();
    Xoae_page qid_page = wdata_mgr.Wdata_wiki().Data_mgr().Redirect(page, qid_bry);
    if (qid_page.Missing()) {
      usr_dlg.Warn_many(
          "", "", "qid cannot be found in wikidata; qid:~{0}", String_.new_u8(qid_bry));
      return false;
    }
    return true;
  }

  private static Bry_fmtr html_fmtr =
      Bry_fmtr.new_(
          String_.Concat_lines_nl(
              "<div id=\"mw-content-text\">",
              "<form method=\"get\" action=\"//www.wikidata.org/wiki/Special:ItemByTitle\" name=\"itembytitle\" id=\"wb-itembytitle-form1\">",
              "<fieldset>",
              "<legend>~{legend}</legend>",
              "<label for=\"wb-itembytitle-sitename\">~{site_lbl}:</label>",
              "<input id=\"wb-itembytitle-sitename\" size=\"12\" name=\"site\" value=\"~{site_val}\" accesskey=\"s\" />",
              "",
              "<label for=\"pagename\">~{page_lbl}:</label>",
              "<input id=\"pagename\" size=\"36\" class=\"wb-input-text\" name=\"page\" value=\"~{page_val}\" accesskey=\"p\" />",
              "",
              "<input id=\"wb-itembytitle-submit\" class=\"wb-input-button\" type=\"submit\" value=\"~{search_lbl}\" name=\"submit\" />",
              "</fieldset>",
              "</form>",
              "</div>",
              "<br>To change the default site, see <a href='/site/home/wiki/Help:Options/Wikibase'>Help:Options/Wikibase</a>"),
          "legend",
          "site_lbl",
          "site_val",
          "page_lbl",
          "page_val",
          "search_lbl");
}