private void Tkns_add_word(byte tid, int src_bgn, int src_end) { if (tkns.Count() > 0) { // at least 1 tkn; check for "auto-and" Xow_search_tkn last_tkn = (Xow_search_tkn) tkns.Get_at_last(); if (last_tkn.Tid() == Xow_search_tkn.Tid_word) // previous tkn is word; auto "AND" words; EX: A B -> A AND B tkns.Add(Xow_search_tkn.new_bry(Xow_search_tkn.Tid_and, Bry_and)); } if (tid == Xow_search_tkn .Tid_word) { // if word has symbol, convert to quoted; EX: a-b should become "a-b"; // otherwise searcher would search for a single word a-b byte[] cur_word = Bry_.Mid(src, src_bgn, src_end); byte[][] words = gplx.xowa.bldrs.cmds.texts.Xob_search_base.Split_ttl_into_words( null, tmp_list, tmp_bfr, cur_word); int words_len = words.length; if (words_len == 1 // only one word && !Bry_.Eq(words[0], cur_word) // split word not same as raw && Bry_finder.Find_fwd(cur_word, Byte_ascii.Star) == -1 // no asterisk ) { tkns.Add(Xow_search_tkn.new_bry(tid, words[0])); return; } if (words.length > 1) // multiple words; add as quoted-term; EX: "a-b" tid = Xow_search_tkn.Tid_word_quoted; } tkns.Add(new_tkn(tid, src_bgn, src_end)); }
public static Keyval[] new_() { Ordered_hash translated = Ordered_hash_.New_bry(); List_adp untranslated = List_adp_.New(); Add_itm_many( translated, Xol_lang_stub_.Id_en, Xol_lang_stub_.Id_de, Xol_lang_stub_.Id_pl, Xol_lang_stub_.Id_zh_hans, Xol_lang_stub_ .Id_zh_hant); // add langs with translations first, so they alphabetize to top of list int len = Xol_lang_stub_.Id__max; for (int i = 0; i < len; i++) { // add rest of langs, but sort by code Xol_lang_stub itm = Xol_lang_stub_.Get_by_id(i); if (translated.Has(itm.Key())) continue; untranslated.Add(itm); } untranslated.Sort_by(Xol_lang_stub_.Comparer_key); Keyval[] rv = new Keyval[len]; int translated_max = translated.Count(); for (int i = 0; i < translated_max; i++) rv[i] = new_itm((Xol_lang_stub) translated.Get_at(i)); for (int i = translated_max; i < len; i++) rv[i] = new_itm((Xol_lang_stub) untranslated.Get_at(i - translated_max)); return rv; }
public List_adp Load( Xomp_mgr_db mgr_db, String machine_name, List_adp list, int list_idx, int list_len) { List_adp rv = List_adp_.New(); // add remaining pages from old pool to new_pool; for (int i = list_idx; i < list_len; ++i) { rv.Add((Xomp_page_itm) list.Get_at(i)); } // load pages into new pool Xomp_lock_mgr lock_mgr = mgr_db.Lock_mgr(); int uid_db = lock_mgr.Uid_prv__get(machine_name); if (uid_db == Xomp_lock_mgr__fsys.Uid__eos) return rv; // assert that uids must be incrementally larger; handle one machine reaching end, // and putting -1 in queue; int uid_new = 0; try { uid_new = this.Load_from_db(rv, uid_db); } finally { lock_mgr.Uid_prv__rls(machine_name, uid_new); } if (show_msg__fetched_pool) Gfo_usr_dlg_.Instance.Note_many( "", "", "fetched new pool: old=~{0} new=~{1}", uid_db, uid_new); return rv; }
public static void Find_pages(List_adp rv, Xowe_wiki wiki, Dpl_itm itm) { rv.Clear(); List_adp includes = itm.Ctg_includes(); if (includes == null) return; int includes_len = includes.Count(); Ordered_hash old_regy = Ordered_hash_.New(), new_regy = Ordered_hash_.New(), cur_regy = Ordered_hash_.New(); Xodb_load_mgr load_mgr = wiki.Db_mgr().Load_mgr(); Xowd_page_itm tmp_page = new Xowd_page_itm(); Int_obj_ref tmp_id = Int_obj_ref.New_zero(); List_adp del_list = List_adp_.New(); int ns_filter = itm.Ns_filter(); Ordered_hash exclude_pages = Ordered_hash_.New(); Find_excludes(exclude_pages, wiki, load_mgr, tmp_page, tmp_id, itm.Ctg_excludes()); for (int i = 0; i < includes_len; i++) { // loop over includes byte[] include = (byte[]) includes.Get_at(i); cur_regy.Clear(); del_list.Clear(); Find_pages_in_ctg(cur_regy, wiki, load_mgr, tmp_page, tmp_id, include); Del_old_pages_not_in_cur(i, tmp_id, old_regy, cur_regy, del_list); Add_cur_pages_also_in_old(i, tmp_id, old_regy, cur_regy, new_regy, exclude_pages, ns_filter); old_regy = new_regy; new_regy = Ordered_hash_.New(); } int pages_len = old_regy.Count(); for (int i = 0; i < pages_len; i++) { // loop over old and create pages Int_obj_ref old_id = (Int_obj_ref) old_regy.Get_at(i); rv.Add(new Xowd_page_itm().Id_(old_id.Val())); } wiki.Db_mgr().Load_mgr().Load_by_ids(Cancelable_.Never, rv, 0, pages_len); rv.Sort_by(Xowd_page_itm_sorter.IdAsc); }
public Xoud_site_row[] Select_all() { List_adp rv = List_adp_.new_(); Db_rdr rdr = conn.Stmt_select(tbl_name, flds).Exec_select__rls_auto(); try { while (rdr.Move_next()) rv.Add(new_row(rdr)); return (Xoud_site_row[]) rv.To_ary_and_clear(Xoud_site_row.class); } finally { rdr.Rls(); } }
public Xow_search_tkn[] Scan(byte[] src) { this.src = src; this.src_len = src.length; tkns.Clear(); pos = 0; txt_bgn = -1; while (pos < src_len) { byte cur_b = src[pos]; Object cur_obj = trie.Match_bgn_w_byte(cur_b, src, pos, src_len); if (cur_obj == null) { // text character if (txt_bgn == -1) txt_bgn = pos; // 1st character not set; set it ++pos; } else { // AND, OR, (, ), -, \s, " int pos_end = trie.Match_pos(); byte cur_tid = ((Byte_obj_val) cur_obj).Val(); if (Cur_join_is_word(cur_tid, pos_end)) continue; // ignore words containing "and", "or"; EX: "random"; "for" if (txt_bgn != -1) { // pending word; create Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); txt_bgn = -1; } switch (cur_tid) { case Xow_search_tkn.Tid_space: // discard spaces pos = Bry_finder.Find_fwd_while(src, pos, src_len, Byte_ascii.Space); break; case Xow_search_tkn.Tid_quote: // find end quote and add as word int quote_bgn = pos + 1; int quote_end = Bry_finder.Find_fwd(src, Byte_ascii.Quote, quote_bgn, src_len); if (quote_end == Bry_.NotFound) throw Err_.new_fmt_("could not find end quote: {0}", String_.new_u8(src)); Tkns_add_word(Xow_search_tkn.Tid_word_quoted, quote_bgn, quote_end); pos = quote_end + 1; // +1 to place after quote break; case Xow_search_tkn.Tid_not: Tkns_add_word(Xow_search_tkn.Tid_not, pos, pos_end); pos = pos_end; break; case Xow_search_tkn.Tid_paren_bgn: case Xow_search_tkn.Tid_paren_end: case Xow_search_tkn.Tid_and: case Xow_search_tkn.Tid_or: tkns.Add(new_tkn(cur_tid, pos, pos_end)); pos = pos_end; break; default: throw Err_.unhandled(cur_tid); } } } if (txt_bgn != -1) { // pending word; create Tkns_add_word(Xow_search_tkn.Tid_word, txt_bgn, pos); txt_bgn = -1; } return (Xow_search_tkn[]) tkns.To_ary_and_clear(Xow_search_tkn.class); }
int[] Xto_int_ary(Xoctg_data_ctg ctg) { List_adp list = List_adp_.new_(); byte tid_max = Xoa_ctg_mgr.Tid__max; for (byte tid = 0; tid < tid_max; tid++) { Xoctg_idx_mgr grp = ctg.Grp_by_tid(tid); if (grp == null) continue; int len = grp.Itms_len(); for (int i = 0; i < len; i++) { Xoctg_idx_itm itm = grp.Itms_get_at(i); list.Add(itm.Id()); } } return (int[]) list.To_ary_and_clear(int.class); }
private static void Del_old_pages_not_in_cur( int i, Int_obj_ref tmp_id, Ordered_hash old_regy, Ordered_hash cur_regy, List_adp del_list) { if (i == 0) return; // skip logic for first ctg (which doesn't have a predecessor) int old_len = old_regy.Count(); for (int j = 0; j < old_len; j++) { // if cur is not in new, del it Int_obj_ref old_id = (Int_obj_ref) old_regy.Get_at(j); if (!cur_regy.Has(tmp_id.Val_(old_id.Val()))) // old_itm does not exist in cur_regy del_list.Add( old_id); // remove; EX: (A,B) in old; B only in cur; old should now be (A) only } int del_len = del_list.Count(); for (int j = 0; j < del_len; j++) { Int_obj_ref old_itm = (Int_obj_ref) del_list.Get_at(j); old_regy.Del(tmp_id.Val_(old_itm.Val())); } }
private int Load_from_db(List_adp list, int uid_prv) { // prepare for page_tbl String sql = String_.Format( String_.Concat_lines_nl_skip_last // ANSI.Y ( "SELECT mp.xomp_uid", ", pp.page_id", ", pp.page_namespace", ", pp.page_title", ", pp.page_text_db_id", "FROM xomp_page mp", " JOIN <page_db>page pp ON mp.page_id = pp.page_id", "WHERE mp.xomp_uid > {0}", "AND mp.page_status = 0", "LIMIT {1}"), uid_prv, num_pages_per_load); this.attach_mgr.Conn_links_( new Db_attach_itm("page_db", wiki.Data__core_mgr().Db__core().Conn())); sql = attach_mgr.Resolve_sql(sql); // run page_tbl int rv = -1; Xomp_text_db_loader text_db_loader = new Xomp_text_db_loader(wiki); attach_mgr.Attach(); Db_rdr rdr = make_conn.Stmt_sql(sql).Exec_select__rls_auto(); try { while (rdr.Move_next()) { rv = rdr.Read_int("xomp_uid"); int text_db_id = rdr.Read_int("page_text_db_id"); Xomp_page_itm ppg = new Xomp_page_itm(rdr.Read_int("page_id")); ppg.Init_by_page( rdr.Read_int("page_namespace"), rdr.Read_bry_by_str("page_title"), text_db_id); list.Add(ppg); text_db_loader.Add(text_db_id, ppg); } } finally { rdr.Rls(); } attach_mgr.Detach(); text_db_loader.Load(); return rv; }
private static void New_subs(Io_url owner_dir, List_adp list, IoItmList subs, byte[] dir_cmd) { subs.Sort(); int len = subs.Len(); int list_total = list.Len(); byte[] owner_dir_bry = owner_dir.RawBry(); for (int i = 0; i < len; ++i) { IoItm_base src = (IoItm_base) subs.Get_at(i); Xow_import_doc trg = null; if (src.Type_dir()) { byte[] trg_url = src.Url().RawBry(); trg = new Xow_import_doc( Bool_.Y, Bool_.N, list_total + i, owner_dir_bry, trg_url, Bry_.new_u8(src.Url().NameAndExt_noDirSpr()), Bry_.Empty, Bry_.Empty, dir_cmd, Ary_empty); } else { IoItmFil src_as_fil = (IoItmFil) src; String size_str = Io_size_.To_str(src_as_fil.Size(), "#,###"); boolean is_xowa_core = gplx.xowa.wikis.data.Xow_db_file__core_.Is_core_fil_name( owner_dir.NameOnly(), src.Url().NameAndExt()); trg = new Xow_import_doc( Bool_.N, is_xowa_core, list_total + i, owner_dir_bry, src.Url().RawBry(), Bry_.new_u8(src.Name()), Bry_.new_u8(src_as_fil.ModifiedTime().XtoStr_fmt("yyyy-MM-dd")), Bry_.new_u8(size_str), dir_cmd, Ary_empty); } list.Add(trg); } }
public static byte[] Write_wdata_links(List_adp slink_list, Xowe_wiki wiki, Xoa_ttl ttl, Wdata_external_lang_links_data external_links_mgr) { try { switch (wiki.Domain_tid()) { case Xow_domain_type_.Tid_home: // home will never be in wikidata case Xow_domain_type_.Tid_wikidata: // wikidata will never be in wikidata return Qid_null; } Wdata_wiki_mgr wdata_mgr = wiki.Appe().Wiki_mgr().Wdata_mgr(); Wdata_doc doc = wdata_mgr.Pages_get(wiki, ttl); if (doc == null) return Qid_null; // no links boolean external_links_mgr_enabled = external_links_mgr.Enabled(); Ordered_hash links = doc.Slink_list(); Bry_bfr tmp_bfr = wiki.Appe().Utl__bfr_mkr().Get_k004(); Xow_wiki_abrv wiki_abrv = new Xow_wiki_abrv(); int len = links.Count(); for (int i = 0; i < len; i++) { Wdata_sitelink_itm slink = (Wdata_sitelink_itm)links.Get_at(i); byte[] xwiki_key = slink.Site(); Xow_wiki_abrv_.parse_(wiki_abrv, xwiki_key, 0, xwiki_key.length); if (wiki_abrv.Domain_tid() == Xow_wiki_abrv_.Tid_null) { wiki.Appe().Usr_dlg().Warn_many("", "", "unknown wiki in wikidata: ttl=~{0} wiki=~{1}", ttl.Page_db_as_str(), String_.new_u8(xwiki_key)); continue; } if (wiki_abrv.Domain_tid() != wiki.Domain_tid()) continue; // ignore wikis in a different domain; EX: looking at enwiki:Earth, and wikidata has dewikiquote; ignore dewikiquote; DATE:2014-06-21 byte[] lang_key = wiki_abrv.Lang_itm().Key(); if (external_links_mgr_enabled && external_links_mgr.Langs_hide(lang_key, 0, lang_key.length)) continue; tmp_bfr.Add(lang_key); tmp_bfr.Add_byte(Byte_ascii.Colon); tmp_bfr.Add(slink.Name()); Xoa_ttl slink_ttl = Xoa_ttl.parse_(wiki, tmp_bfr.Xto_bry_and_clear()); if (slink_ttl == null) continue; // invalid ttl Xow_xwiki_itm xwiki_itm = slink_ttl.Wik_itm(); if ( xwiki_itm == null // not a known xwiki; EX: [[zzz:abc]] || Bry_.Eq(xwiki_itm.Domain_bry(), wiki.Domain_bry()) // skip if same as self; i.e.: do not include links to enwiki if already in enwiki ) continue; slink.Page_ttl_(slink_ttl); slink_list.Add(slink); } tmp_bfr.Mkr_rls(); if (external_links_mgr_enabled && external_links_mgr.Sort()) slink_list.Sort_by(Xoa_ttl_sorter._); return doc.Qid(); } catch (Exception e) {Err_.Noop(e); return Qid_null;} }
public Xobc_import_step_itm[] Select_by_task_id(int task_id) { List_adp list = List_adp_.New(); Db_rdr rdr = conn.Stmt_sql( Db_sql_.Make_by_fmt( String_.Ary( "SELECT s.*", "FROM import_step s", " JOIN step_map sm ON s.step_id = sm.step_id", "WHERE sm.task_id = {0}"), task_id)) .Exec_select__rls_auto(); try { while (rdr.Move_next()) { list.Add(New_itm(rdr)); } } finally { rdr.Rls(); } return (Xobc_import_step_itm[]) list.To_ary_and_clear(Xobc_import_step_itm.class); }
public Xol_interval_itm[] Get_duration_intervals(long seconds, Xol_duration_itm[] intervals) { if (intervals == null) intervals = Xol_duration_itm_.Ary_default; Array_.Sort(intervals, Xol_duration_itm_sorter.Instance); int intervals_len = intervals.length; long val = seconds; List_adp rv = List_adp_.New(); for (int i = 0; i < intervals_len; i++) { Xol_duration_itm itm = intervals[i]; long itm_seconds = itm.Seconds(); val = seconds / itm_seconds; if (val > 0 || (i == intervals_len - 1 && rv.Count() == 0) // always add one seg; EX: 40 seconds, but minutes requested -> 0 minutes; // DATE:2014-05-10 ) { seconds -= val * itm_seconds; rv.Add(new Xol_interval_itm(itm, val)); } } return (Xol_interval_itm[]) rv.To_ary(Xol_interval_itm.class); }
public static int[] Ary_get(Xow_domain_itm domain_itm, boolean wmf) { if (wmf && domain_itm != null && domain_itm.Domain_type().Tid() != Xow_domain_tid_.Tid__home) return Ary_wmf; if (Ary_nonwmf == null) { List_adp list = List_adp_.New(); int len = Ary_wmf.length; for (int i = 0; i < len; ++i) { list.Add(Ary_wmf[i]); } list.Add_many( Xol_kwd_grp_.Id_strx_len, Xol_kwd_grp_.Id_strx_pos, Xol_kwd_grp_.Id_strx_rpos, Xol_kwd_grp_.Id_strx_sub, Xol_kwd_grp_.Id_strx_count, Xol_kwd_grp_.Id_strx_replace, Xol_kwd_grp_.Id_strx_explode, Xol_kwd_grp_.Id_strx_urldecode, Xol_kwd_grp_.Id_new_window_link); Ary_nonwmf = (int[]) list.To_ary_and_clear(int.class); } return Ary_nonwmf; }
private byte Select_fsdb_itms(List_adp list) { list.Clear(); boolean pages_found = false, links_found = false; DataRdr rdr = Xob_xfer_regy_tbl.Select_by_tier_page(bldr_conn, tier_id_val, page_id_val, select_interval); try { while (rdr.MoveNextPeer()) { pages_found = true; // at least one page found; set true Xodb_tbl_oimg_xfer_itm itm = Xodb_tbl_oimg_xfer_itm.new_rdr_(rdr); if (itm.Lnki_page_id() == page_id_val // same page_id && itm.Lnki_id() <= lnki_id_val // ... but lnki_id < last ) continue; // ... ignore; note that select is by page_id, not page_id + link_id; needed // else restarts would not resume exactly at same point; links_found = true; list.Add(itm); } } finally { rdr.Rls(); } if (pages_found && !links_found) return Select_rv_next_page; // pages found, but all links processed else if (!pages_found) return Select_rv_stop; // no more pages found else return Select_rv_process; // pages and links found }
public byte[] Convert_to_local_urls(byte[] rel_url_prefix, byte[] src, List_adp list) { try { int src_len = src.length; int prv_pos = 0; Bry_bfr bfr = Bry_bfr_.New_w_size(src_len); Hash_adp img_hash = Hash_adp_bry.cs(); while (true) { int url_pos = Bry_find_.Find_fwd(src, Bry_url, prv_pos); if (url_pos == Bry_find_.Not_found) { bfr.Add_mid(src, prv_pos, src_len); break; } // no more "url("; exit; int bgn_pos = url_pos + Bry_url_len; // set bgn_pos after "url(" byte bgn_byte = src[bgn_pos]; byte end_byte = Byte_ascii.Null; boolean quoted = true; switch (bgn_byte) { // find end_byte case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; end_byte is ' or " end_byte = bgn_byte; ++bgn_pos; break; default: // not quoted; end byte is ")" end_byte = Byte_ascii.Paren_end; quoted = false; break; } int end_pos = Bry_find_.Find_fwd(src, end_byte, bgn_pos, src_len); if (end_pos == Bry_find_.Not_found) { // unclosed "url("; exit since nothing else will be found usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.end_missing", "could not find end_sequence for 'url(': bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25)); bfr.Add_mid(src, prv_pos, src_len); break; } if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.empty", "'url(' is empty: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25)); bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue; } byte[] img_raw = Bry_.Mid(src, bgn_pos, end_pos); int img_raw_len = img_raw.length; if (Bry_.Has_at_bgn(img_raw, Bry_data_image, 0, img_raw_len)) { // base64 bfr.Add_mid(src, prv_pos, end_pos); // nothing to download; just add entire String prv_pos = end_pos; continue; } int import_url_end = Import_url_chk( rel_url_prefix, src, src_len, prv_pos, url_pos, img_raw, bfr); // check for embedded stylesheets via @import tag if (import_url_end != Bry_find_.Not_found) { prv_pos = import_url_end; continue; } byte[] img_cleaned = Xob_url_fixer.Fix(wiki_domain, img_raw, img_raw_len); if (img_cleaned == null) { // could not clean img usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.clean_failed", "could not extract valid http src: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8(img_raw)); bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue; } if (!img_hash.Has(img_cleaned)) { // only add unique items for download; img_hash.Add_as_key_and_val(img_cleaned); list.Add(String_.new_u8(img_cleaned)); } img_cleaned = Replace_invalid_chars( Bry_.Copy( img_cleaned)); // NOTE: must call ByteAry.Copy else img_cleaned will change // *inside* hash bfr.Add_mid(src, prv_pos, bgn_pos); if (!quoted) bfr.Add_byte(Byte_ascii.Quote); bfr.Add(img_cleaned); if (!quoted) bfr.Add_byte(Byte_ascii.Quote); prv_pos = end_pos; } return bfr.To_bry_and_clear(); } catch (Exception e) { usr_dlg.Warn_many( "", "", "failed to convert local_urls: ~{0} ~{1}", String_.new_u8(rel_url_prefix), Err_.Message_gplx_full(e)); return src; } }
public void Add(Xog_tab_close_lnr lnr) { list.Add(lnr); }