private static void Find_pages_in_ctg( Ordered_hash rv, Xowe_wiki wiki, Xodb_load_mgr load_mgr, Xowd_page_itm tmp_page, Int_obj_ref tmp_id, byte[] ctg_ttl) { Xoctg_catpage_ctg ctg = wiki.Ctg__catpage_mgr() .Get_or_load_or_null( Xoctg_catpage_url.New__blank(), wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, ctg_ttl), -1); if (ctg == null) return; // loop grps to get grp for (byte ctg_tid = 0; ctg_tid < Xoa_ctg_mgr.Tid___max; ++ctg_tid) { Xoctg_catpage_grp ctg_grp = ctg.Grp_by_tid(ctg_tid); int itms_len = ctg_grp.Itms__len(); // loop itms in grp and add to hash for (int i = 0; i < itms_len; ++i) { Xoctg_catpage_itm ctg_itm = ctg_grp.Itms__get_at(i); int itm_page_id = ctg_itm.Page_id(); if (rv.Has(tmp_id.Val_(itm_page_id))) continue; rv.Add(Int_obj_ref.New(itm_page_id), ctg_itm); // DELETE: recurse subcategories; PAGE:en.b:XML DATE:2016-09-18 // if (ctg_tid == Xoa_ctg_mgr.Tid__subc) { // load_mgr.Load_by_id(tmp_page, itm_page_id); // Find_pages_in_ctg(rv, wiki, load_mgr, tmp_page, tmp_id, tmp_page.Ttl_page_db()); // } } } }
public static void Find_pages(List_adp rv, Xowe_wiki wiki, Dpl_itm itm) { rv.Clear(); List_adp includes = itm.Ctg_includes(); if (includes == null) return; int includes_len = includes.Count(); Ordered_hash old_regy = Ordered_hash_.New(), new_regy = Ordered_hash_.New(), cur_regy = Ordered_hash_.New(); Xodb_load_mgr load_mgr = wiki.Db_mgr().Load_mgr(); Xowd_page_itm tmp_page = new Xowd_page_itm(); Int_obj_ref tmp_id = Int_obj_ref.New_zero(); List_adp del_list = List_adp_.New(); int ns_filter = itm.Ns_filter(); Ordered_hash exclude_pages = Ordered_hash_.New(); Find_excludes(exclude_pages, wiki, load_mgr, tmp_page, tmp_id, itm.Ctg_excludes()); for (int i = 0; i < includes_len; i++) { // loop over includes byte[] include = (byte[]) includes.Get_at(i); cur_regy.Clear(); del_list.Clear(); Find_pages_in_ctg(cur_regy, wiki, load_mgr, tmp_page, tmp_id, include); Del_old_pages_not_in_cur(i, tmp_id, old_regy, cur_regy, del_list); Add_cur_pages_also_in_old(i, tmp_id, old_regy, cur_regy, new_regy, exclude_pages, ns_filter); old_regy = new_regy; new_regy = Ordered_hash_.New(); } int pages_len = old_regy.Count(); for (int i = 0; i < pages_len; i++) { // loop over old and create pages Int_obj_ref old_id = (Int_obj_ref) old_regy.Get_at(i); rv.Add(new Xowd_page_itm().Id_(old_id.Val())); } wiki.Db_mgr().Load_mgr().Load_by_ids(Cancelable_.Never, rv, 0, pages_len); rv.Sort_by(Xowd_page_itm_sorter.IdAsc); }
private static void Del_old_pages_not_in_cur( int i, Int_obj_ref tmp_id, Ordered_hash old_regy, Ordered_hash cur_regy, List_adp del_list) { if (i == 0) return; // skip logic for first ctg (which doesn't have a predecessor) int old_len = old_regy.Count(); for (int j = 0; j < old_len; j++) { // if cur is not in new, del it Int_obj_ref old_id = (Int_obj_ref) old_regy.Get_at(j); if (!cur_regy.Has(tmp_id.Val_(old_id.Val()))) // old_itm does not exist in cur_regy del_list.Add( old_id); // remove; EX: (A,B) in old; B only in cur; old should now be (A) only } int del_len = del_list.Count(); for (int j = 0; j < del_len; j++) { Int_obj_ref old_itm = (Int_obj_ref) del_list.Get_at(j); old_regy.Del(tmp_id.Val_(old_itm.Val())); } }
public static byte[] Write_wdata_links(List_adp slink_list, Xowe_wiki wiki, Xoa_ttl ttl, Wdata_external_lang_links_data external_links_mgr) { try { switch (wiki.Domain_tid()) { case Xow_domain_type_.Tid_home: // home will never be in wikidata case Xow_domain_type_.Tid_wikidata: // wikidata will never be in wikidata return Qid_null; } Wdata_wiki_mgr wdata_mgr = wiki.Appe().Wiki_mgr().Wdata_mgr(); Wdata_doc doc = wdata_mgr.Pages_get(wiki, ttl); if (doc == null) return Qid_null; // no links boolean external_links_mgr_enabled = external_links_mgr.Enabled(); Ordered_hash links = doc.Slink_list(); Bry_bfr tmp_bfr = wiki.Appe().Utl__bfr_mkr().Get_k004(); Xow_wiki_abrv wiki_abrv = new Xow_wiki_abrv(); int len = links.Count(); for (int i = 0; i < len; i++) { Wdata_sitelink_itm slink = (Wdata_sitelink_itm)links.Get_at(i); byte[] xwiki_key = slink.Site(); Xow_wiki_abrv_.parse_(wiki_abrv, xwiki_key, 0, xwiki_key.length); if (wiki_abrv.Domain_tid() == Xow_wiki_abrv_.Tid_null) { wiki.Appe().Usr_dlg().Warn_many("", "", "unknown wiki in wikidata: ttl=~{0} wiki=~{1}", ttl.Page_db_as_str(), String_.new_u8(xwiki_key)); continue; } if (wiki_abrv.Domain_tid() != wiki.Domain_tid()) continue; // ignore wikis in a different domain; EX: looking at enwiki:Earth, and wikidata has dewikiquote; ignore dewikiquote; DATE:2014-06-21 byte[] lang_key = wiki_abrv.Lang_itm().Key(); if (external_links_mgr_enabled && external_links_mgr.Langs_hide(lang_key, 0, lang_key.length)) continue; tmp_bfr.Add(lang_key); tmp_bfr.Add_byte(Byte_ascii.Colon); tmp_bfr.Add(slink.Name()); Xoa_ttl slink_ttl = Xoa_ttl.parse_(wiki, tmp_bfr.Xto_bry_and_clear()); if (slink_ttl == null) continue; // invalid ttl Xow_xwiki_itm xwiki_itm = slink_ttl.Wik_itm(); if ( xwiki_itm == null // not a known xwiki; EX: [[zzz:abc]] || Bry_.Eq(xwiki_itm.Domain_bry(), wiki.Domain_bry()) // skip if same as self; i.e.: do not include links to enwiki if already in enwiki ) continue; slink.Page_ttl_(slink_ttl); slink_list.Add(slink); } tmp_bfr.Mkr_rls(); if (external_links_mgr_enabled && external_links_mgr.Sort()) slink_list.Sort_by(Xoa_ttl_sorter._); return doc.Qid(); } catch (Exception e) {Err_.Noop(e); return Qid_null;} }
private static void Add_cur_pages_also_in_old( int i, Int_obj_ref tmp_id, Ordered_hash old_regy, Ordered_hash cur_regy, Ordered_hash new_regy, Ordered_hash exclude_pages, int ns_filter) { int found_len = cur_regy.Count(); for (int j = 0; j < found_len; j++) { // if new_page is in cur, add it Xoctg_catpage_itm cur_itm = (Xoctg_catpage_itm) cur_regy.Get_at(j); Xoa_ttl cur_ttl = cur_itm.Page_ttl(); if (cur_ttl == null) continue; if (ns_filter != Dpl_itm.Ns_filter_null && ns_filter != cur_ttl.Ns().Id()) continue; tmp_id.Val_(cur_itm.Page_id()); // set tmp_id, since it will be used at least once if (exclude_pages.Has(tmp_id)) continue; // ignore excluded pages if (i != 0) { // skip logic for first ctg (which doesn't have a predecessor) if (!old_regy.Has(tmp_id)) continue; // cur_itm not in old_regy; ignore } new_regy.Add_as_key_and_val(Int_obj_ref.New(cur_itm.Page_id())); } }
public Wbase_claim_grp Get_at(int i) { return (Wbase_claim_grp) hash.Get_at(i); }
public int Len() { return hash.Count(); }
public void Add(Wbase_claim_grp itm) { hash.Add(itm.Id_ref(), itm); }