Esempio n. 1
0
 private void Parse_title(Gfh_atr title_atr) {
   // Tfds.Dbg(Bry_.Mid(href_src, href_bgn, href_end), Bry_.Mid(src, capt_bgn, capt_end),
   // Bry_.Mid(src, title_bgn, title_end));
   title_bgn = title_atr.Val_bgn();
   title_end = title_atr.Val_end();
   if (href_ns_name != null) { // ns_name exists
     int title_bgn_wo_ns = title_bgn + href_ns_name_len;
     if (Bry_.Match(src, title_bgn, title_bgn_wo_ns, href_ns_name)) // title matches href_ns;
     title_bgn = title_bgn_wo_ns; // skip ns; "Help:"
     else title_missing_ns = true;
   }
   if (title_end == -1) title_tid = Title__missing;
   else {
     if (Bry_.Match(src, title_bgn, title_end, href_src, href_bgn, href_end)
         && !title_missing_ns) // NOTE: do not mark title=href if href omitted title;
       // PAGE:en.b:Wikibooks:WikiProject; DATE:2016-01-20
       title_tid = Title__href;
     else if (Bry_.Match(src, title_bgn, title_end, src, capt_bgn, capt_end))
       title_tid = Title__capt;
     else {
       title_tid = Title__diff;
       if (href_ns_name != null)
         title_bgn = title_atr.Val_bgn(); // since title is different, add back ns_name; EX: "<a
       // href='/wiki/Help:A_b#c' title='Help:A b'>a</a>"; title should be
       // "Help:A b", not "A b"
     }
   }
 }
 private int Import_url_chk(
     byte[] rel_url_prefix,
     byte[] src,
     int src_len,
     int old_pos,
     int find_bgn,
     byte[] url_raw,
     Bry_bfr bfr) {
   if (find_bgn < Bry_import_len) return Bry_find_.Not_found;
   if (!Bry_.Match(src, find_bgn - Bry_import_len, find_bgn, Bry_import))
     return Bry_find_.Not_found;
   byte[] css_url = url_raw;
   int css_url_len = css_url.length;
   if (css_url_len > 0
       && css_url[0]
           == Byte_ascii
               .Slash) { // css_url starts with "/"; EX: "/page" or "//site/page" DATE:2014-02-03
     if (css_url_len > 1
         && css_url[1] != Byte_ascii.Slash) // skip if css_url starts with "//"; EX: "//site/page"
     css_url = Bry_.Add(rel_url_prefix, css_url); // "/w/a.css" -> "//en.wikipedia.org/w/a.css"
   }
   css_url =
       Bry_.Replace(
           css_url,
           Byte_ascii.Space,
           Byte_ascii
               .Underline); // NOTE: must replace spaces with underlines else download will fail;
                            // EX:https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e
                            // Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
   byte[] css_src_bry = Import_url_build(stylesheet_prefix, rel_url_prefix, css_url);
   String css_src_str = String_.new_u8(css_src_bry);
   download_wkr.Download_xrg()
       .Prog_fmt_hdr_(
           usr_dlg.Log_many(
               GRP_KEY, "logo.download", "downloading import for '~{0}'", css_src_str));
   byte[] css_trg_bry = download_wkr.Download_xrg().Exec_as_bry(css_src_str);
   if (css_trg_bry == null) {
     usr_dlg.Warn_many("", "", "could not import css: url=~{0}", css_src_str);
     return Bry_find_.Not_found; // css not found
   }
   bfr.Add_mid(src, old_pos, find_bgn - Bry_import_len).Add_byte_nl();
   bfr.Add(Bry_comment_bgn).Add(css_url).Add(Bry_comment_end).Add_byte_nl();
   if (Bry_find_.Find_fwd(css_url, Wikisource_dynimg_ttl) != -1)
     css_trg_bry =
         Bry_.Replace(
             css_trg_bry,
             Wikisource_dynimg_find,
             Wikisource_dynimg_repl); // FreedImg hack;
                                      // PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06
   bfr.Add(css_trg_bry).Add_byte_nl();
   bfr.Add_byte_nl();
   int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, find_bgn + url_raw.length, src_len);
   return semic_pos + Int_.Const_dlm_len;
 }
Esempio n. 3
0
  private void Parse_capt(Gfh_tag_rdr tag_rdr, Gfh_tag anch_head) {
    this.capt_bgn = anch_head.Src_end(); // capt starts after <a>
    Gfh_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__a); // </a>
    this.capt_end = anch_tail.Src_bgn(); // get capt between "<a>" and "</a>
    this.src_end = anch_tail.Src_end();

    // skip ns in href / capt
    if (href_ns_id != Xow_ns_.Tid__main) { // not main; try to remove template name;		
      int capt_bgn_wo_ns = capt_bgn + href_ns_name_len;
      href_bgn +=
          href_ns_name_len; // skip ns_name for href; EX: "Help:A" -> "A"; "Help" will be saved as
      // encoded number
      if (Bry_.Match(
          capt_src,
          capt_bgn,
          capt_bgn_wo_ns,
          href_ns_name)) { // capt matches ns_name; EX: <a href='/wiki/Help:A'>Help:A</a> ->
        // "Help:A" matches "Help:"
        capt_bgn = capt_bgn_wo_ns; // skip ns; "Help:"
        capt_has_ns = true;
      }
    }

    // get text splits
    this.text_tid =
        href_itm.Tid() == Xoh_anch_href_data.Tid__anch
            ? Xoh_anch_capt_itm.Tid__diff
            : capt_itm.Parse(
                rdr, capt_has_ns, href_src, href_bgn, href_end, src, capt_bgn, capt_end);
    int split_pos = capt_itm.Split_pos();
    this.text_0_src = href_src;
    this.text_0_bgn = href_bgn;
    this.text_0_end = href_end;
    this.text_1_src = capt_src;
    this.text_1_bgn = capt_bgn;
    this.text_1_end = capt_end;
    switch (text_tid) {
      case Xoh_anch_capt_itm.Tid__same:
        // case Xoh_anch_capt_itm.Tid__href_pipe:
      case Xoh_anch_capt_itm.Tid__diff: // nothing to do; href / capt already set above
        break;
      case Xoh_anch_capt_itm.Tid__more:
        this.text_1_bgn = split_pos;
        break;
      case Xoh_anch_capt_itm.Tid__less:
        this.text_0_end = split_pos;
        this.text_1_src = href_src;
        this.text_1_bgn = split_pos;
        this.text_1_end = href_end;
        break;
    }
  }
Esempio n. 4
0
 public boolean Init_by_parse(
     Xoh_hdoc_wkr hdoc_wkr,
     Xoh_hdoc_ctx hctx,
     Gfh_tag_rdr tag_rdr,
     byte[] src,
     Gfh_tag anch_head,
     Gfh_tag unused) {
   Gfh_tag img_tag = anch_head;
   Bry_err_wkr err_wkr = tag_rdr.Err_wkr();
   this.img_wo_anch = anch_head.Name_id() == Gfh_tag_.Id__img;
   if (img_wo_anch) {
     Gfh_atr xowa_title =
         anch_head.Atrs__get_by_or_empty(
             Xoh_img_xoimg_data.Bry__data_xowa_title); // data-xowa-title='A.png'
     anch_xo_ttl.Val_(xowa_title.Val());
   } else {
     if (anch_head.Name_id() == Gfh_tag_.Id__div) { // video / audio
       if (!anch_head.Atrs__cls_eq(
           gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_data.Atr__id__xowa_media_div))
         return false; // handle fake-thumbs created through en.w:Template:Image_label_begin;
                       // PAGE:en.w:Blackburnshire; DATE:2016-01-04
       img_is_vid = true;
       tag_rdr.Tag__move_fwd_head(); // next <div>
       anch_head = tag_rdr.Tag__move_fwd_head(); // next <div>
     }
     this.src_bgn = anch_head.Src_bgn(); // <a
     if (!anch_href.Parse(err_wkr, hctx, src, anch_head)) return false; // href='/wiki/File:A.png'
     if (!anch_cls.Parse(err_wkr, src, anch_head)) return false; // class='image'
     Gfh_atr anch_title = anch_head.Atrs__get_by_or_empty(Gfh_atr_.Bry__title); // title='abc'
     anch_title_bgn = anch_title.Val_bgn();
     anch_title_end = anch_title.Val_end();
     Gfh_atr xowa_title =
         anch_head.Atrs__get_by_or_empty(Bry__atr__xowa_title); // xowa_title='A.png'
     if (xowa_title.Val_dat_exists()) anch_xo_ttl.Val_(xowa_title.Val());
     img_tag = tag_rdr.Tag__move_fwd_head();
   }
   img_tag.Chk_name_or_fail(Gfh_tag_.Id__img); // <img
   if (img_tag.Atrs__cls_has(
       gplx.xowa.xtns.pagebanners.Pgbnr_xtn_mgr
           .Bry__cls__wpb_banner_image)) { // handle pagebanner; EX: <img class="wpb-banner-image">
     img_pgbnr.Init_by_parse(img_tag);
   }
   img_xoimg.Parse(err_wkr, src, img_tag); // data-xoimg='...'
   this.img_w =
       img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__width, Xof_img_size.Size__neg1); // width='220'
   this.img_h =
       img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__height, Xof_img_size.Size__neg1); // height='110'
   Gfh_atr img_alt = img_tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__alt); // alt='File:A.png'
   img_alt_bgn = img_alt.Val_bgn();
   img_alt_end = img_alt.Val_end();
   img_cls.Init_by_parse(err_wkr, src, img_tag); // class='thumbborder'
   img_alt__diff_anch_title =
       !Bry_.Match(src, img_alt_bgn, img_alt_end, src, anch_title_bgn, anch_title_end);
   if (!img_src.Parse(err_wkr, hctx, hctx.Wiki__domain_bry(), img_tag)) return false; // src='...'
   if (anch_xo_ttl.Val_is_empty()) {
     anch_xo_ttl.Val_(img_src.File_ttl_bry());
     if (anch_xo_ttl.Val_is_empty()) anch_xo_ttl.Val_(anch_href.Ttl_page_db());
   }
   this.img_imap_idx = Get_imap_idx(tag_rdr.Err_wkr(), img_tag);
   if (img_wo_anch) {
     src_end = img_tag.Src_end();
     return true;
   }
   Gfh_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__a); // </a>
   this.src_end = anch_tail.Src_end();
   if (anch_href.Site_exists()) {
     Xow_domain_itm itm =
         Xow_domain_itm_.parse(Bry_.Mid(src, anch_href.Site_bgn(), anch_href.Site_end()));
     anch_rel_is_nofollow = itm.Domain_type_id() == Xow_domain_tid_.Tid__other;
   }
   if (img_is_vid) {
     tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__div);
     anch_head = tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__div);
     src_end = anch_head.Src_end();
   }
   return true;
 }