private void Tkns_add_word(byte tid, int src_bgn, int src_end) { if (tkns.Count() > 0) { // at least 1 tkn; check for "auto-and" Xow_search_tkn last_tkn = (Xow_search_tkn) tkns.Get_at_last(); if (last_tkn.Tid() == Xow_search_tkn.Tid_word) // previous tkn is word; auto "AND" words; EX: A B -> A AND B tkns.Add(Xow_search_tkn.new_bry(Xow_search_tkn.Tid_and, Bry_and)); } if (tid == Xow_search_tkn .Tid_word) { // if word has symbol, convert to quoted; EX: a-b should become "a-b"; // otherwise searcher would search for a single word a-b byte[] cur_word = Bry_.Mid(src, src_bgn, src_end); byte[][] words = gplx.xowa.bldrs.cmds.texts.Xob_search_base.Split_ttl_into_words( null, tmp_list, tmp_bfr, cur_word); int words_len = words.length; if (words_len == 1 // only one word && !Bry_.Eq(words[0], cur_word) // split word not same as raw && Bry_finder.Find_fwd(cur_word, Byte_ascii.Star) == -1 // no asterisk ) { tkns.Add(Xow_search_tkn.new_bry(tid, words[0])); return; } if (words.length > 1) // multiple words; add as quoted-term; EX: "a-b" tid = Xow_search_tkn.Tid_word_quoted; } tkns.Add(new_tkn(tid, src_bgn, src_end)); }
public Xow_search_tkn[] Scan(byte[] src) { this.src = src; this.src_len = src.length; tkns.Clear(); pos = 0; txt_bgn = -1; while (pos < src_len) { byte cur_b = src[pos]; Object cur_obj = trie.Match_bgn_w_byte(cur_b, src, pos, src_len); if (cur_obj == null) { // text character if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it ++pos; } else { // AND, OR, (, ), -, \s, " int pos_end = trie.Match_pos(); byte cur_tid = ((Byte_obj_val) cur_obj).Val(); if (Cur_join_is_word(cur_tid, pos_end)) continue; // ignore words containing "and", "or"; EX: "random"; "for" if (txt_bgn != -1) { // pending word; create Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); txt_bgn = -1; } switch (cur_tid) { case Xow_search_tkn.Tid_space: // discard spaces pos = Bry_finder.Find_fwd_while(src, pos, src_len, Byte_ascii.Space); break; case Xow_search_tkn.Tid_quote: // find end quote and add as word int quote_bgn = pos + 1; int quote_end = Bry_finder.Find_fwd(src, Byte_ascii.Quote, quote_bgn, src_len); if (quote_end == Bry_.NotFound) throw Err_.new_fmt_("could not find end quote: {0}", String_.new_u8(src)); Tkns_add_word(Xow_search_tkn.Tid_word_quoted, quote_bgn, quote_end); pos = quote_end + 1; // +1 to place after quote break; case Xow_search_tkn.Tid_not: Tkns_add_word(Xow_search_tkn.Tid_not, pos, pos_end); pos = pos_end; break; case Xow_search_tkn.Tid_paren_bgn: case Xow_search_tkn.Tid_paren_end: case Xow_search_tkn.Tid_and: case Xow_search_tkn.Tid_or: tkns.Add(new_tkn(cur_tid, pos, pos_end)); pos = pos_end; break; default: throw Err_.unhandled(cur_tid); } } } if (txt_bgn != -1) { // pending word; create Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); txt_bgn = -1; } return (Xow_search_tkn[]) tkns.To_ary_and_clear(Xow_search_tkn.class); }