public Xow_search_tkn[] Scan(byte[] src) { this.src = src; this.src_len = src.length; tkns.Clear(); pos = 0; txt_bgn = -1; while (pos < src_len) { byte cur_b = src[pos]; Object cur_obj = trie.Match_bgn_w_byte(cur_b, src, pos, src_len); if (cur_obj == null) { // text character if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it ++pos; } else { // AND, OR, (, ), -, \s, " int pos_end = trie.Match_pos(); byte cur_tid = ((Byte_obj_val) cur_obj).Val(); if (Cur_join_is_word(cur_tid, pos_end)) continue; // ignore words containing "and", "or"; EX: "random"; "for" if (txt_bgn != -1) { // pending word; create Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); txt_bgn = -1; } switch (cur_tid) { case Xow_search_tkn.Tid_space: // discard spaces pos = Bry_finder.Find_fwd_while(src, pos, src_len, Byte_ascii.Space); break; case Xow_search_tkn.Tid_quote: // find end quote and add as word int quote_bgn = pos + 1; int quote_end = Bry_finder.Find_fwd(src, Byte_ascii.Quote, quote_bgn, src_len); if (quote_end == Bry_.NotFound) throw Err_.new_fmt_("could not find end quote: {0}", String_.new_u8(src)); Tkns_add_word(Xow_search_tkn.Tid_word_quoted, quote_bgn, quote_end); pos = quote_end + 1; // +1 to place after quote break; case Xow_search_tkn.Tid_not: Tkns_add_word(Xow_search_tkn.Tid_not, pos, pos_end); pos = pos_end; break; case Xow_search_tkn.Tid_paren_bgn: case Xow_search_tkn.Tid_paren_end: case Xow_search_tkn.Tid_and: case Xow_search_tkn.Tid_or: tkns.Add(new_tkn(cur_tid, pos, pos_end)); pos = pos_end; break; default: throw Err_.unhandled(cur_tid); } } } if (txt_bgn != -1) { // pending word; create Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); txt_bgn = -1; } return (Xow_search_tkn[]) tkns.To_ary_and_clear(Xow_search_tkn.class); }
public void Special__gen(Xoa_app app, Xow_wiki wiki, Xoa_page page, Xoa_url url, Xoa_ttl ttl) { int slash_pos = Bry_find_.Find_fwd(ttl.Page_txt_wo_qargs(), Xoa_ttl.Subpage_spr); // check for slash byte[] special_name = slash_pos == Bry_find_.Not_found ? ttl .Base_txt_wo_qarg() // no slash found; use base_txt; ignore qry args and just get // page_names; EX: Search/Earth?fulltext=y; // Allpages?from=Earth... : Bry_.Mid( ttl.Page_txt_wo_qargs(), 0, slash_pos); // slash found; use root page; EX: Special:ItemByTitle/enwiki/Earth special_name = Xoa_ttl.Replace_spaces(special_name); // handle spaces; EX:Spezial:Zufällige_Seite Xow_special_page special = (Xow_special_page) hash.Get_by_bry(special_name); if (special != null) { // special found; generate it; special = special.Special__clone(); page.Db().Page().Modified_on_(Datetime_now.Get()); try { special.Special__gen(wiki, page, url, ttl); } catch (Exception e) { Gfo_log_.Instance.Warn( "failed to generate special page", "url", url.To_str(), "err", Err_.Message_gplx_log(e)); } } }
private boolean Cur_join_is_word( byte cur_tid, int pos_end) { // extra logic to handle and / or occuring in unquoted strings; EX: "random"; // "for" switch (cur_tid) { default: return false; // only look at AND, OR, - case Xow_search_tkn.Tid_and: case Xow_search_tkn.Tid_or: case Xow_search_tkn.Tid_not: break; } boolean join_is_word = true; if (txt_bgn == -1) { // no pending word; if (cur_tid == Xow_search_tkn.Tid_not) return false; // NOT is only operator if no pending tkn; EX: -abc -> NOT abc; a-b -> a-b byte nxt_b = pos_end < src_len ? src[pos_end] : Byte_ascii.Nil; Object nxt_obj = trie.Match_bgn_w_byte(nxt_b, src, pos_end, src_len); if (nxt_obj == null) // next tkn is text; join must be word join_is_word = true; else { // next tkn is tkn byte nxt_tid = ((Byte_obj_val) nxt_obj).Val(); switch (nxt_tid) { case Xow_search_tkn.Tid_space: case Xow_search_tkn.Tid_quote: case Xow_search_tkn.Tid_paren_bgn: case Xow_search_tkn.Tid_paren_end: join_is_word = false; // next tkn is sym; and/or is not word; EX: a AND ; a AND"b"; a AND(b) break; case Xow_search_tkn.Tid_not: case Xow_search_tkn.Tid_and: case Xow_search_tkn.Tid_or: join_is_word = true; // next tkn is and or not; and/or is word; EX: andor; oror; or-abc; break; default: throw Err_.unhandled(cur_tid); } } } else { // pending word; cur join must be word; EX: "grand": "and" invoked and "gr" pending join_is_word = true; } if (join_is_word) { if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it pos = pos_end; return true; } if (txt_bgn != -1) { Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); // create word txt_bgn = -1; } return false; }