Beispiel #1
0
class Xow_search_scanner {
  private final List_adp tkns = List_adp_.new_();
  private byte[] src;
  private int src_len, pos, txt_bgn;
  private final Ordered_hash tmp_list = Ordered_hash_.new_();
  private final Bry_bfr tmp_bfr = Bry_bfr.new_();

  public Xow_search_tkn[] Scan(byte[] src) {
    this.src = src;
    this.src_len = src.length;
    tkns.Clear();
    pos = 0;
    txt_bgn = -1;
    while (pos < src_len) {
      byte cur_b = src[pos];
      Object cur_obj = trie.Match_bgn_w_byte(cur_b, src, pos, src_len);
      if (cur_obj == null) { // text character
        if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it
        ++pos;
      } else { // AND, OR, (, ), -, \s, "
        int pos_end = trie.Match_pos();
        byte cur_tid = ((Byte_obj_val) cur_obj).Val();
        if (Cur_join_is_word(cur_tid, pos_end))
          continue; // ignore words containing "and", "or"; EX: "random"; "for"
        if (txt_bgn != -1) { // pending word; create
          Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos);
          txt_bgn = -1;
        }
        switch (cur_tid) {
          case Xow_search_tkn.Tid_space: // discard spaces
            pos = Bry_finder.Find_fwd_while(src, pos, src_len, Byte_ascii.Space);
            break;
          case Xow_search_tkn.Tid_quote: // find end quote and add as word
            int quote_bgn = pos + 1;
            int quote_end = Bry_finder.Find_fwd(src, Byte_ascii.Quote, quote_bgn, src_len);
            if (quote_end == Bry_.NotFound)
              throw Err_.new_fmt_("could not find end quote: {0}", String_.new_u8(src));
            Tkns_add_word(Xow_search_tkn.Tid_word_quoted, quote_bgn, quote_end);
            pos = quote_end + 1; // +1 to place after quote
            break;
          case Xow_search_tkn.Tid_not:
            Tkns_add_word(Xow_search_tkn.Tid_not, pos, pos_end);
            pos = pos_end;
            break;
          case Xow_search_tkn.Tid_paren_bgn:
          case Xow_search_tkn.Tid_paren_end:
          case Xow_search_tkn.Tid_and:
          case Xow_search_tkn.Tid_or:
            tkns.Add(new_tkn(cur_tid, pos, pos_end));
            pos = pos_end;
            break;
          default:
            throw Err_.unhandled(cur_tid);
        }
      }
    }
    if (txt_bgn != -1) { // pending word; create
      Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos);
      txt_bgn = -1;
    }
    return (Xow_search_tkn[]) tkns.To_ary_and_clear(Xow_search_tkn.class);
  }

  private boolean Cur_join_is_word(
      byte cur_tid,
      int
          pos_end) { // extra logic to handle and / or occuring in unquoted strings; EX: "random";
                     // "for"
    switch (cur_tid) {
      default:
        return false; // only look at AND, OR, -
      case Xow_search_tkn.Tid_and:
      case Xow_search_tkn.Tid_or:
      case Xow_search_tkn.Tid_not:
        break;
    }
    boolean join_is_word = true;
    if (txt_bgn == -1) { // no pending word;
      if (cur_tid == Xow_search_tkn.Tid_not)
        return false; // NOT is only operator if no pending tkn; EX: -abc -> NOT abc; a-b -> a-b
      byte nxt_b = pos_end < src_len ? src[pos_end] : Byte_ascii.Nil;
      Object nxt_obj = trie.Match_bgn_w_byte(nxt_b, src, pos_end, src_len);
      if (nxt_obj == null) // next tkn is text; join must be word
      join_is_word = true;
      else { // next tkn is tkn
        byte nxt_tid = ((Byte_obj_val) nxt_obj).Val();
        switch (nxt_tid) {
          case Xow_search_tkn.Tid_space:
          case Xow_search_tkn.Tid_quote:
          case Xow_search_tkn.Tid_paren_bgn:
          case Xow_search_tkn.Tid_paren_end:
            join_is_word =
                false; // next tkn is sym; and/or is not word; EX: a AND ; a AND"b"; a AND(b)
            break;
          case Xow_search_tkn.Tid_not:
          case Xow_search_tkn.Tid_and:
          case Xow_search_tkn.Tid_or:
            join_is_word = true; // next tkn is and or not; and/or is word; EX: andor; oror; or-abc;
            break;
          default:
            throw Err_.unhandled(cur_tid);
        }
      }
    } else { // pending word; cur join must be word; EX: "grand": "and" invoked and "gr" pending
      join_is_word = true;
    }
    if (join_is_word) {
      if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it
      pos = pos_end;
      return true;
    }
    if (txt_bgn != -1) {
      Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); // create word
      txt_bgn = -1;
    }
    return false;
  }

  private void Tkns_add_word(byte tid, int src_bgn, int src_end) {
    if (tkns.Count() > 0) { // at least 1 tkn; check for "auto-and"
      Xow_search_tkn last_tkn = (Xow_search_tkn) tkns.Get_at_last();
      if (last_tkn.Tid()
          == Xow_search_tkn.Tid_word) // previous tkn is word; auto "AND" words; EX: A B -> A AND B
      tkns.Add(Xow_search_tkn.new_bry(Xow_search_tkn.Tid_and, Bry_and));
    }
    if (tid
        == Xow_search_tkn
            .Tid_word) { // if word has symbol, convert to quoted; EX: a-b should become "a-b";
                         // otherwise searcher would search for a single word a-b
      byte[] cur_word = Bry_.Mid(src, src_bgn, src_end);
      byte[][] words =
          gplx.xowa.bldrs.cmds.texts.Xob_search_base.Split_ttl_into_words(
              null, tmp_list, tmp_bfr, cur_word);
      int words_len = words.length;
      if (words_len == 1 // only one word
          && !Bry_.Eq(words[0], cur_word) // split word not same as raw
          && Bry_finder.Find_fwd(cur_word, Byte_ascii.Star) == -1 // no asterisk
      ) {
        tkns.Add(Xow_search_tkn.new_bry(tid, words[0]));
        return;
      }
      if (words.length > 1) // multiple words; add as quoted-term; EX: "a-b"
      tid = Xow_search_tkn.Tid_word_quoted;
    }
    tkns.Add(new_tkn(tid, src_bgn, src_end));
  }

  private Xow_search_tkn new_tkn(byte tid, int val_bgn, int val_end) {
    return Xow_search_tkn.new_pos(tid, val_bgn, val_end);
  }

  private static final byte[] Bry_and = Bry_.new_a7("AND");
  private static final Btrie_slim_mgr trie =
      Btrie_slim_mgr.ci_ascii_() // NOTE:ci.ascii:OR / AND only
          .Add_str_byte(" ", Xow_search_tkn.Tid_space)
          .Add_str_byte("\"", Xow_search_tkn.Tid_quote)
          .Add_str_byte("-", Xow_search_tkn.Tid_not)
          .Add_str_byte("(", Xow_search_tkn.Tid_paren_bgn)
          .Add_str_byte(")", Xow_search_tkn.Tid_paren_end)
          .Add_str_byte("or", Xow_search_tkn.Tid_or)
          .Add_str_byte("and", Xow_search_tkn.Tid_and);
}
Beispiel #2
0
	public Bry_fmtr Compile() {
		synchronized (this) {	// THREAD: DATE:2015-04-29
			Bry_bfr lkp_bfr = Bry_bfr.new_(16);
			int fmt_len = fmt.length; int fmt_end = fmt_len - 1; int fmt_pos = 0;
			byte[] trg_bry = new byte[fmt_len]; int trg_pos = 0;
			boolean lkp_is_active = false, lkp_is_numeric = true;
			byte nxt_byte, tmp_byte;
			List_adp list = List_adp_.new_();
			fmt_args_exist = false;
			while (true) {
				if (fmt_pos > fmt_end) break;
				byte cur_byte = fmt[fmt_pos];
				if		(lkp_is_active) {
					if (cur_byte == char_arg_end) {
						if (lkp_is_numeric)
							list.Add(Bry_fmtr_itm.arg_(lkp_bfr.XtoInt(0) - baseInt));
						else {
							byte[] key_fmt = lkp_bfr.Xto_bry();
							Object idx_ref = keys.Get_by(Bry_obj_ref.new_(key_fmt));
							if (idx_ref == null) {
								int lkp_bfr_len = lkp_bfr.Len();
								byte[] lkp_bry = lkp_bfr.Bfr();
								trg_bry[trg_pos++] = char_escape;
								trg_bry[trg_pos++] = char_arg_bgn;
								for (int i = 0; i < lkp_bfr_len; i++)
									trg_bry[trg_pos++] = lkp_bry[i];
								trg_bry[trg_pos++] = char_arg_end;
							}
							else {
								list.Add(Bry_fmtr_itm.arg_(((Int_obj_val)idx_ref).Val() - baseInt));
							}
						}
						lkp_is_active = false;
						lkp_bfr.Clear();
						fmt_args_exist = true;
					}
					else {
						lkp_bfr.Add_byte(cur_byte);
						switch (cur_byte) {
							case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
							case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
								break;
							default:
								lkp_is_numeric = false;
								break;
						}
					}
					fmt_pos += 1;
				}
				else if	(cur_byte == char_escape) {
					if (fmt_pos == fmt_end) {
						if (fail_when_invalid_escapes)
							throw Err_.new_("escape char encountered but no more chars left");
						else {
							trg_bry[trg_pos] = cur_byte;
							break;
						}
					}
					nxt_byte = fmt[fmt_pos + 1];
					if (nxt_byte == char_arg_bgn) {
						if (trg_pos > 0) {list.Add(Bry_fmtr_itm.dat_(trg_bry, trg_pos)); trg_pos = 0;}	// something pending; add it to list
						int eval_lhs_bgn = fmt_pos + 2;
						if (eval_lhs_bgn < fmt_len && fmt[eval_lhs_bgn] == char_eval_bgn) {	// eval found
							fmt_pos = Compile_eval_cmd(fmt, fmt_len, eval_lhs_bgn, list);
							continue;
						}
						else {
							lkp_is_active = true;
							lkp_is_numeric = true;
						}
					}
					else {	// ~{0}; ~~ -> ~; ~n -> newLine; ~t -> tab
						if		(nxt_byte == char_escape)		tmp_byte = char_escape;
						else if	(nxt_byte == char_escape_nl)	tmp_byte = Byte_ascii.Nl;
						else if (nxt_byte == char_escape_tab)	tmp_byte = Byte_ascii.Tab;
						else {
							if (fail_when_invalid_escapes) throw Err_.new_("unknown escape code").Add("code", Char_.XbyInt(nxt_byte)).Add("fmt_pos", fmt_pos + 1);
							else
								tmp_byte = cur_byte;
						}
						trg_bry[trg_pos++] = tmp_byte;
					}
					fmt_pos += 2;
				}
				else {
					trg_bry[trg_pos++] = cur_byte;
					fmt_pos += 1;
				}
			}
			if (lkp_is_active) throw Err_.new_("idx mode not closed");
			if (trg_pos > 0) {list.Add(Bry_fmtr_itm.dat_(trg_bry, trg_pos)); trg_pos = 0;}
			itms = (Bry_fmtr_itm[])list.To_ary(Bry_fmtr_itm.class);
			itms_len = itms.length;
			return this;
		}
	}
Beispiel #3
0
	public void Bld_bfr_many_and_set_fmt(Object... args) {
		Bry_bfr bfr = Bry_bfr.new_();
		this.Bld_bfr_many(bfr, args);
		byte[] bry = bfr.Xto_bry_and_clear();
		this.Fmt_(bry).Compile();
	}
Beispiel #4
0
class Io_line_rdr_fxt {
  Io_line_rdr rdr;
  List_adp lines = List_adp_.new_();
  Bry_bfr tmp = Bry_bfr.new_();

  public Io_line_rdr_fxt(Io_url... urls) {
    rdr = new Io_line_rdr(Gfo_usr_dlg_.Test(), urls);
  }

  public Io_line_rdr_fxt Load_len_lines_(int v) {
    return Load_len_(v * 3);
  } // 3: 2=##, 1=\n

  public Io_line_rdr_fxt Load_len_(int v) {
    rdr.Load_len_(v);
    return this;
  }

  public Io_line_rdr_fxt File_lines_(int count) {
    for (int i = 0; i < count; i++) tmp.Add_int_fixed(i, 2).Add_byte_nl();
    Io_mgr.I.SaveFilBry(rdr.Urls()[0], tmp.Xto_bry_and_clear());
    return this;
  }
  //	public Io_url[] Src_fils() {return src_fils;} public Io_line_rdr_fxt Src_fils_(Io_url[] v)
  // {src_fils = v; return this;} Io_url[] src_fils;
  public Io_line_rdr_fxt tst_Match(String match, String expd) {
    rdr.Key_gen_(Io_line_rdr_key_gen_.first_pipe);
    boolean match_v = rdr.Match(Bry_.new_u8(match));
    String actl = match_v ? String_.new_u8(rdr.Bfr(), rdr.Key_pos_bgn(), rdr.Key_pos_end()) : "";
    Tfds.Eq(expd, actl);
    return this;
  }

  public Io_line_rdr_fxt File_lines_pipe_(int count) {
    for (int i = 0; i < count; i++) tmp.Add_int_fixed(i, 2).Add_byte(Byte_ascii.Pipe).Add_byte_nl();
    Io_mgr.I.SaveFilBry(rdr.Urls()[0], tmp.Xto_bry_and_clear());
    return this;
  }

  public Io_line_rdr_fxt File_lines_(int fil_idx, int bgn, int end) {
    for (int i = bgn; i < end; i++) tmp.Add_int_fixed(i, 2).Add_byte_nl();
    Io_mgr.I.SaveFilBry(rdr.Urls()[fil_idx], tmp.Xto_bry_and_clear());
    return this;
  }

  public Io_line_rdr_fxt Clear() {
    rdr.Clear();
    return this;
  }

  public Io_line_rdr_fxt tst_Read_til_lines(int count, String... expd) {
    lines.Clear();
    for (int i = 0; i < expd.length; i++) expd[i] = expd[i] + Op_sys.Lnx.Nl_str();
    for (int i = 0; i < count; i++) {
      if (rdr.Read_next())
        lines.Add(String_.new_u8(rdr.Bfr(), rdr.Itm_pos_bgn(), rdr.Itm_pos_end()));
      else break;
    }
    Tfds.Eq_ary_str(expd, lines.To_str_ary());
    return this;
  }
}