private void Init(byte[] src) { this.src = href_src = capt_src = src; capt_has_ns = title_missing_ns = false; href_ns_id = Xow_ns_.Tid__main; href_ns_name = null; href_ns_name_len = 0; href_bgn = href_end = capt_bgn = capt_end = title_bgn = title_end = -1; title_tid = Title__href; this.cls_tid = Xoh_anch_cls_.Tid__none; href_itm.Clear(); }
private void Parse_capt(Gfh_tag_rdr tag_rdr, Gfh_tag anch_head) { this.capt_bgn = anch_head.Src_end(); // capt starts after <a> Gfh_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__a); // </a> this.capt_end = anch_tail.Src_bgn(); // get capt between "<a>" and "</a> this.src_end = anch_tail.Src_end(); // skip ns in href / capt if (href_ns_id != Xow_ns_.Tid__main) { // not main; try to remove template name; int capt_bgn_wo_ns = capt_bgn + href_ns_name_len; href_bgn += href_ns_name_len; // skip ns_name for href; EX: "Help:A" -> "A"; "Help" will be saved as // encoded number if (Bry_.Match( capt_src, capt_bgn, capt_bgn_wo_ns, href_ns_name)) { // capt matches ns_name; EX: <a href='/wiki/Help:A'>Help:A</a> -> // "Help:A" matches "Help:" capt_bgn = capt_bgn_wo_ns; // skip ns; "Help:" capt_has_ns = true; } } // get text splits this.text_tid = href_itm.Tid() == Xoh_anch_href_data.Tid__anch ? Xoh_anch_capt_itm.Tid__diff : capt_itm.Parse( rdr, capt_has_ns, href_src, href_bgn, href_end, src, capt_bgn, capt_end); int split_pos = capt_itm.Split_pos(); this.text_0_src = href_src; this.text_0_bgn = href_bgn; this.text_0_end = href_end; this.text_1_src = capt_src; this.text_1_bgn = capt_bgn; this.text_1_end = capt_end; switch (text_tid) { case Xoh_anch_capt_itm.Tid__same: // case Xoh_anch_capt_itm.Tid__href_pipe: case Xoh_anch_capt_itm.Tid__diff: // nothing to do; href / capt already set above break; case Xoh_anch_capt_itm.Tid__more: this.text_1_bgn = split_pos; break; case Xoh_anch_capt_itm.Tid__less: this.text_0_end = split_pos; this.text_1_src = href_src; this.text_1_bgn = split_pos; this.text_1_end = href_end; break; } }
public void Clear() { this.img_alt__diff_anch_title = anch_rel_is_nofollow = img_is_vid = img_wo_anch = img_is_gallery = false; this.src_bgn = src_end = anch_title_bgn = anch_title_end = img_w = img_h = img_alt_bgn = img_alt_end = -1; this.img_imap_idx = -1; anch_href.Clear(); anch_cls.Clear(); img_src.Clear(); img_cls.Clear(); img_xoimg.Clear(); img_pgbnr.Clear_by_hdump(); }
private void Parse_href(Xoh_hdoc_ctx hctx, Gfh_tag anch_head) { href_itm.Parse(rdr.Err_wkr(), hctx, src, anch_head); this.href_bgn = href_itm.Ttl_bgn(); this.href_end = href_itm.Ttl_end(); switch (href_itm.Tid()) { case Xoh_anch_href_data.Tid__wiki: case Xoh_anch_href_data.Tid__site: this.href_ns_id = href_itm.Ttl_ns_id(); this.href_src = href_itm.Ttl_full_txt(); this.href_bgn = 0; this.href_end = href_src.length; if (href_ns_id != Xow_ns_.Tid__main) { // not main; try to remove template name; int colon_pos = Bry_find_.Find_fwd(href_src, Byte_ascii.Colon, href_bgn, href_end); this.href_ns_name = Xoa_ttl.Replace_unders( Bry_.Mid(href_src, href_bgn, colon_pos + 1)); // EX: 11="Template talk:" this.href_ns_name_len = href_ns_name.length; } break; } }
public boolean Init_by_parse( Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Gfh_tag_rdr tag_rdr, byte[] src, Gfh_tag anch_head, Gfh_tag unused) { Gfh_tag img_tag = anch_head; Bry_err_wkr err_wkr = tag_rdr.Err_wkr(); this.img_wo_anch = anch_head.Name_id() == Gfh_tag_.Id__img; if (img_wo_anch) { Gfh_atr xowa_title = anch_head.Atrs__get_by_or_empty( Xoh_img_xoimg_data.Bry__data_xowa_title); // data-xowa-title='A.png' anch_xo_ttl.Val_(xowa_title.Val()); } else { if (anch_head.Name_id() == Gfh_tag_.Id__div) { // video / audio if (!anch_head.Atrs__cls_eq( gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_data.Atr__id__xowa_media_div)) return false; // handle fake-thumbs created through en.w:Template:Image_label_begin; // PAGE:en.w:Blackburnshire; DATE:2016-01-04 img_is_vid = true; tag_rdr.Tag__move_fwd_head(); // next <div> anch_head = tag_rdr.Tag__move_fwd_head(); // next <div> } this.src_bgn = anch_head.Src_bgn(); // <a if (!anch_href.Parse(err_wkr, hctx, src, anch_head)) return false; // href='/wiki/File:A.png' if (!anch_cls.Parse(err_wkr, src, anch_head)) return false; // class='image' Gfh_atr anch_title = anch_head.Atrs__get_by_or_empty(Gfh_atr_.Bry__title); // title='abc' anch_title_bgn = anch_title.Val_bgn(); anch_title_end = anch_title.Val_end(); Gfh_atr xowa_title = anch_head.Atrs__get_by_or_empty(Bry__atr__xowa_title); // xowa_title='A.png' if (xowa_title.Val_dat_exists()) anch_xo_ttl.Val_(xowa_title.Val()); img_tag = tag_rdr.Tag__move_fwd_head(); } img_tag.Chk_name_or_fail(Gfh_tag_.Id__img); // <img if (img_tag.Atrs__cls_has( gplx.xowa.xtns.pagebanners.Pgbnr_xtn_mgr .Bry__cls__wpb_banner_image)) { // handle pagebanner; EX: <img class="wpb-banner-image"> img_pgbnr.Init_by_parse(img_tag); } img_xoimg.Parse(err_wkr, src, img_tag); // data-xoimg='...' this.img_w = img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__width, Xof_img_size.Size__neg1); // width='220' this.img_h = img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__height, Xof_img_size.Size__neg1); // height='110' Gfh_atr img_alt = img_tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__alt); // alt='File:A.png' img_alt_bgn = img_alt.Val_bgn(); img_alt_end = img_alt.Val_end(); img_cls.Init_by_parse(err_wkr, src, img_tag); // class='thumbborder' img_alt__diff_anch_title = !Bry_.Match(src, img_alt_bgn, img_alt_end, src, anch_title_bgn, anch_title_end); if (!img_src.Parse(err_wkr, hctx, hctx.Wiki__domain_bry(), img_tag)) return false; // src='...' if (anch_xo_ttl.Val_is_empty()) { anch_xo_ttl.Val_(img_src.File_ttl_bry()); if (anch_xo_ttl.Val_is_empty()) anch_xo_ttl.Val_(anch_href.Ttl_page_db()); } this.img_imap_idx = Get_imap_idx(tag_rdr.Err_wkr(), img_tag); if (img_wo_anch) { src_end = img_tag.Src_end(); return true; } Gfh_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__a); // </a> this.src_end = anch_tail.Src_end(); if (anch_href.Site_exists()) { Xow_domain_itm itm = Xow_domain_itm_.parse(Bry_.Mid(src, anch_href.Site_bgn(), anch_href.Site_end())); anch_rel_is_nofollow = itm.Domain_type_id() == Xow_domain_tid_.Tid__other; } if (img_is_vid) { tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__div); anch_head = tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__div); src_end = anch_head.Src_end(); } return true; }