Beispiel #1
0
  public static void Find_pages(List_adp rv, Xowe_wiki wiki, Dpl_itm itm) {
    rv.Clear();
    List_adp includes = itm.Ctg_includes();
    if (includes == null) return;
    int includes_len = includes.Count();
    Ordered_hash old_regy = Ordered_hash_.New(),
        new_regy = Ordered_hash_.New(),
        cur_regy = Ordered_hash_.New();
    Xodb_load_mgr load_mgr = wiki.Db_mgr().Load_mgr();
    Xowd_page_itm tmp_page = new Xowd_page_itm();
    Int_obj_ref tmp_id = Int_obj_ref.New_zero();
    List_adp del_list = List_adp_.New();
    int ns_filter = itm.Ns_filter();
    Ordered_hash exclude_pages = Ordered_hash_.New();
    Find_excludes(exclude_pages, wiki, load_mgr, tmp_page, tmp_id, itm.Ctg_excludes());

    for (int i = 0; i < includes_len; i++) { // loop over includes
      byte[] include = (byte[]) includes.Get_at(i);
      cur_regy.Clear();
      del_list.Clear();
      Find_pages_in_ctg(cur_regy, wiki, load_mgr, tmp_page, tmp_id, include);
      Del_old_pages_not_in_cur(i, tmp_id, old_regy, cur_regy, del_list);
      Add_cur_pages_also_in_old(i, tmp_id, old_regy, cur_regy, new_regy, exclude_pages, ns_filter);
      old_regy = new_regy;
      new_regy = Ordered_hash_.New();
    }
    int pages_len = old_regy.Count();
    for (int i = 0; i < pages_len; i++) { // loop over old and create pages
      Int_obj_ref old_id = (Int_obj_ref) old_regy.Get_at(i);
      rv.Add(new Xowd_page_itm().Id_(old_id.Val()));
    }
    wiki.Db_mgr().Load_mgr().Load_by_ids(Cancelable_.Never, rv, 0, pages_len);
    rv.Sort_by(Xowd_page_itm_sorter.IdAsc);
  }
Beispiel #2
0
 private static void Del_old_pages_not_in_cur(
     int i, Int_obj_ref tmp_id, Ordered_hash old_regy, Ordered_hash cur_regy, List_adp del_list) {
   if (i == 0) return; // skip logic for first ctg (which doesn't have a predecessor)
   int old_len = old_regy.Count();
   for (int j = 0; j < old_len; j++) { // if cur is not in new, del it
     Int_obj_ref old_id = (Int_obj_ref) old_regy.Get_at(j);
     if (!cur_regy.Has(tmp_id.Val_(old_id.Val()))) // old_itm does not exist in cur_regy
     del_list.Add(
           old_id); // remove; EX: (A,B) in old; B only in cur; old should now be (A) only
   }
   int del_len = del_list.Count();
   for (int j = 0; j < del_len; j++) {
     Int_obj_ref old_itm = (Int_obj_ref) del_list.Get_at(j);
     old_regy.Del(tmp_id.Val_(old_itm.Val()));
   }
 }
Beispiel #3
0
	public static byte[] Write_wdata_links(List_adp slink_list, Xowe_wiki wiki, Xoa_ttl ttl, Wdata_external_lang_links_data external_links_mgr) {
		try {
			switch (wiki.Domain_tid()) {
				case Xow_domain_type_.Tid_home:		// home will never be in wikidata
				case Xow_domain_type_.Tid_wikidata:	// wikidata will never be in wikidata
					return Qid_null;
			}
			Wdata_wiki_mgr wdata_mgr = wiki.Appe().Wiki_mgr().Wdata_mgr();
			Wdata_doc doc = wdata_mgr.Pages_get(wiki, ttl); if (doc == null) return Qid_null;	// no links
			boolean external_links_mgr_enabled = external_links_mgr.Enabled();
			Ordered_hash links = doc.Slink_list();
			Bry_bfr tmp_bfr = wiki.Appe().Utl__bfr_mkr().Get_k004();
			Xow_wiki_abrv wiki_abrv = new Xow_wiki_abrv();
			int len = links.Count();
			for (int i = 0; i < len; i++) {
				Wdata_sitelink_itm slink = (Wdata_sitelink_itm)links.Get_at(i);
				byte[] xwiki_key = slink.Site();
				Xow_wiki_abrv_.parse_(wiki_abrv, xwiki_key, 0, xwiki_key.length);
				if (wiki_abrv.Domain_tid() == Xow_wiki_abrv_.Tid_null) {
					wiki.Appe().Usr_dlg().Warn_many("", "", "unknown wiki in wikidata: ttl=~{0} wiki=~{1}", ttl.Page_db_as_str(), String_.new_u8(xwiki_key));
					continue;
				}
				if (wiki_abrv.Domain_tid() != wiki.Domain_tid()) continue;	// ignore wikis in a different domain; EX: looking at enwiki:Earth, and wikidata has dewikiquote; ignore dewikiquote; DATE:2014-06-21
				byte[] lang_key = wiki_abrv.Lang_itm().Key();
				if (external_links_mgr_enabled && external_links_mgr.Langs_hide(lang_key, 0, lang_key.length)) continue;
				tmp_bfr.Add(lang_key);
				tmp_bfr.Add_byte(Byte_ascii.Colon);
				tmp_bfr.Add(slink.Name());
				Xoa_ttl slink_ttl = Xoa_ttl.parse_(wiki, tmp_bfr.Xto_bry_and_clear());
				if (slink_ttl == null) continue;								// invalid ttl
				Xow_xwiki_itm xwiki_itm = slink_ttl.Wik_itm();
				if (	xwiki_itm == null									// not a known xwiki; EX: [[zzz:abc]]
					||	Bry_.Eq(xwiki_itm.Domain_bry(), wiki.Domain_bry())	// skip if same as self; i.e.: do not include links to enwiki if already in enwiki
					) continue;
				slink.Page_ttl_(slink_ttl);
				slink_list.Add(slink);
			}
			tmp_bfr.Mkr_rls();
			if (external_links_mgr_enabled && external_links_mgr.Sort())
				slink_list.Sort_by(Xoa_ttl_sorter._);
			return doc.Qid();
		} catch (Exception e) {Err_.Noop(e); return Qid_null;}
	}
Beispiel #4
0
 private static void Add_cur_pages_also_in_old(
     int i,
     Int_obj_ref tmp_id,
     Ordered_hash old_regy,
     Ordered_hash cur_regy,
     Ordered_hash new_regy,
     Ordered_hash exclude_pages,
     int ns_filter) {
   int found_len = cur_regy.Count();
   for (int j = 0; j < found_len; j++) { // if new_page is in cur, add it
     Xoctg_catpage_itm cur_itm = (Xoctg_catpage_itm) cur_regy.Get_at(j);
     Xoa_ttl cur_ttl = cur_itm.Page_ttl();
     if (cur_ttl == null) continue;
     if (ns_filter != Dpl_itm.Ns_filter_null && ns_filter != cur_ttl.Ns().Id()) continue;
     tmp_id.Val_(cur_itm.Page_id()); // set tmp_id, since it will be used at least once
     if (exclude_pages.Has(tmp_id)) continue; // ignore excluded pages
     if (i != 0) { // skip logic for first ctg (which doesn't have a predecessor)
       if (!old_regy.Has(tmp_id)) continue; // cur_itm not in old_regy; ignore
     }
     new_regy.Add_as_key_and_val(Int_obj_ref.New(cur_itm.Page_id()));
   }
 }
 public int Len() {
   return hash.Count();
 }