Ejemplo n.º 1
0
 public Xow_search_tkn[] Scan(byte[] src) {
   this.src = src;
   this.src_len = src.length;
   tkns.Clear();
   pos = 0;
   txt_bgn = -1;
   while (pos < src_len) {
     byte cur_b = src[pos];
     Object cur_obj = trie.Match_bgn_w_byte(cur_b, src, pos, src_len);
     if (cur_obj == null) { // text character
       if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it
       ++pos;
     } else { // AND, OR, (, ), -, \s, "
       int pos_end = trie.Match_pos();
       byte cur_tid = ((Byte_obj_val) cur_obj).Val();
       if (Cur_join_is_word(cur_tid, pos_end))
         continue; // ignore words containing "and", "or"; EX: "random"; "for"
       if (txt_bgn != -1) { // pending word; create
         Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos);
         txt_bgn = -1;
       }
       switch (cur_tid) {
         case Xow_search_tkn.Tid_space: // discard spaces
           pos = Bry_finder.Find_fwd_while(src, pos, src_len, Byte_ascii.Space);
           break;
         case Xow_search_tkn.Tid_quote: // find end quote and add as word
           int quote_bgn = pos + 1;
           int quote_end = Bry_finder.Find_fwd(src, Byte_ascii.Quote, quote_bgn, src_len);
           if (quote_end == Bry_.NotFound)
             throw Err_.new_fmt_("could not find end quote: {0}", String_.new_u8(src));
           Tkns_add_word(Xow_search_tkn.Tid_word_quoted, quote_bgn, quote_end);
           pos = quote_end + 1; // +1 to place after quote
           break;
         case Xow_search_tkn.Tid_not:
           Tkns_add_word(Xow_search_tkn.Tid_not, pos, pos_end);
           pos = pos_end;
           break;
         case Xow_search_tkn.Tid_paren_bgn:
         case Xow_search_tkn.Tid_paren_end:
         case Xow_search_tkn.Tid_and:
         case Xow_search_tkn.Tid_or:
           tkns.Add(new_tkn(cur_tid, pos, pos_end));
           pos = pos_end;
           break;
         default:
           throw Err_.unhandled(cur_tid);
       }
     }
   }
   if (txt_bgn != -1) { // pending word; create
     Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos);
     txt_bgn = -1;
   }
   return (Xow_search_tkn[]) tkns.To_ary_and_clear(Xow_search_tkn.class);
 }
Ejemplo n.º 2
0
 private boolean Cur_join_is_word(
     byte cur_tid,
     int
         pos_end) { // extra logic to handle and / or occuring in unquoted strings; EX: "random";
                    // "for"
   switch (cur_tid) {
     default:
       return false; // only look at AND, OR, -
     case Xow_search_tkn.Tid_and:
     case Xow_search_tkn.Tid_or:
     case Xow_search_tkn.Tid_not:
       break;
   }
   boolean join_is_word = true;
   if (txt_bgn == -1) { // no pending word;
     if (cur_tid == Xow_search_tkn.Tid_not)
       return false; // NOT is only operator if no pending tkn; EX: -abc -> NOT abc; a-b -> a-b
     byte nxt_b = pos_end < src_len ? src[pos_end] : Byte_ascii.Nil;
     Object nxt_obj = trie.Match_bgn_w_byte(nxt_b, src, pos_end, src_len);
     if (nxt_obj == null) // next tkn is text; join must be word
     join_is_word = true;
     else { // next tkn is tkn
       byte nxt_tid = ((Byte_obj_val) nxt_obj).Val();
       switch (nxt_tid) {
         case Xow_search_tkn.Tid_space:
         case Xow_search_tkn.Tid_quote:
         case Xow_search_tkn.Tid_paren_bgn:
         case Xow_search_tkn.Tid_paren_end:
           join_is_word =
               false; // next tkn is sym; and/or is not word; EX: a AND ; a AND"b"; a AND(b)
           break;
         case Xow_search_tkn.Tid_not:
         case Xow_search_tkn.Tid_and:
         case Xow_search_tkn.Tid_or:
           join_is_word = true; // next tkn is and or not; and/or is word; EX: andor; oror; or-abc;
           break;
         default:
           throw Err_.unhandled(cur_tid);
       }
     }
   } else { // pending word; cur join must be word; EX: "grand": "and" invoked and "gr" pending
     join_is_word = true;
   }
   if (join_is_word) {
     if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it
     pos = pos_end;
     return true;
   }
   if (txt_bgn != -1) {
     Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); // create word
     txt_bgn = -1;
   }
   return false;
 }