예제 #1
0
 private void Tkns_add_word(byte tid, int src_bgn, int src_end) {
   if (tkns.Count() > 0) { // at least 1 tkn; check for "auto-and"
     Xow_search_tkn last_tkn = (Xow_search_tkn) tkns.Get_at_last();
     if (last_tkn.Tid()
         == Xow_search_tkn.Tid_word) // previous tkn is word; auto "AND" words; EX: A B -> A AND B
     tkns.Add(Xow_search_tkn.new_bry(Xow_search_tkn.Tid_and, Bry_and));
   }
   if (tid
       == Xow_search_tkn
           .Tid_word) { // if word has symbol, convert to quoted; EX: a-b should become "a-b";
                        // otherwise searcher would search for a single word a-b
     byte[] cur_word = Bry_.Mid(src, src_bgn, src_end);
     byte[][] words =
         gplx.xowa.bldrs.cmds.texts.Xob_search_base.Split_ttl_into_words(
             null, tmp_list, tmp_bfr, cur_word);
     int words_len = words.length;
     if (words_len == 1 // only one word
         && !Bry_.Eq(words[0], cur_word) // split word not same as raw
         && Bry_finder.Find_fwd(cur_word, Byte_ascii.Star) == -1 // no asterisk
     ) {
       tkns.Add(Xow_search_tkn.new_bry(tid, words[0]));
       return;
     }
     if (words.length > 1) // multiple words; add as quoted-term; EX: "a-b"
     tid = Xow_search_tkn.Tid_word_quoted;
   }
   tkns.Add(new_tkn(tid, src_bgn, src_end));
 }
예제 #2
0
 public Xow_search_tkn[] Scan(byte[] src) {
   this.src = src;
   this.src_len = src.length;
   tkns.Clear();
   pos = 0;
   txt_bgn = -1;
   while (pos < src_len) {
     byte cur_b = src[pos];
     Object cur_obj = trie.Match_bgn_w_byte(cur_b, src, pos, src_len);
     if (cur_obj == null) { // text character
       if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it
       ++pos;
     } else { // AND, OR, (, ), -, \s, "
       int pos_end = trie.Match_pos();
       byte cur_tid = ((Byte_obj_val) cur_obj).Val();
       if (Cur_join_is_word(cur_tid, pos_end))
         continue; // ignore words containing "and", "or"; EX: "random"; "for"
       if (txt_bgn != -1) { // pending word; create
         Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos);
         txt_bgn = -1;
       }
       switch (cur_tid) {
         case Xow_search_tkn.Tid_space: // discard spaces
           pos = Bry_finder.Find_fwd_while(src, pos, src_len, Byte_ascii.Space);
           break;
         case Xow_search_tkn.Tid_quote: // find end quote and add as word
           int quote_bgn = pos + 1;
           int quote_end = Bry_finder.Find_fwd(src, Byte_ascii.Quote, quote_bgn, src_len);
           if (quote_end == Bry_.NotFound)
             throw Err_.new_fmt_("could not find end quote: {0}", String_.new_u8(src));
           Tkns_add_word(Xow_search_tkn.Tid_word_quoted, quote_bgn, quote_end);
           pos = quote_end + 1; // +1 to place after quote
           break;
         case Xow_search_tkn.Tid_not:
           Tkns_add_word(Xow_search_tkn.Tid_not, pos, pos_end);
           pos = pos_end;
           break;
         case Xow_search_tkn.Tid_paren_bgn:
         case Xow_search_tkn.Tid_paren_end:
         case Xow_search_tkn.Tid_and:
         case Xow_search_tkn.Tid_or:
           tkns.Add(new_tkn(cur_tid, pos, pos_end));
           pos = pos_end;
           break;
         default:
           throw Err_.unhandled(cur_tid);
       }
     }
   }
   if (txt_bgn != -1) { // pending word; create
     Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos);
     txt_bgn = -1;
   }
   return (Xow_search_tkn[]) tkns.To_ary_and_clear(Xow_search_tkn.class);
 }