Example #1
0
 private void Parse_title(Gfh_atr title_atr) {
   // Tfds.Dbg(Bry_.Mid(href_src, href_bgn, href_end), Bry_.Mid(src, capt_bgn, capt_end),
   // Bry_.Mid(src, title_bgn, title_end));
   title_bgn = title_atr.Val_bgn();
   title_end = title_atr.Val_end();
   if (href_ns_name != null) { // ns_name exists
     int title_bgn_wo_ns = title_bgn + href_ns_name_len;
     if (Bry_.Match(src, title_bgn, title_bgn_wo_ns, href_ns_name)) // title matches href_ns;
     title_bgn = title_bgn_wo_ns; // skip ns; "Help:"
     else title_missing_ns = true;
   }
   if (title_end == -1) title_tid = Title__missing;
   else {
     if (Bry_.Match(src, title_bgn, title_end, href_src, href_bgn, href_end)
         && !title_missing_ns) // NOTE: do not mark title=href if href omitted title;
       // PAGE:en.b:Wikibooks:WikiProject; DATE:2016-01-20
       title_tid = Title__href;
     else if (Bry_.Match(src, title_bgn, title_end, src, capt_bgn, capt_end))
       title_tid = Title__capt;
     else {
       title_tid = Title__diff;
       if (href_ns_name != null)
         title_bgn = title_atr.Val_bgn(); // since title is different, add back ns_name; EX: "<a
       // href='/wiki/Help:A_b#c' title='Help:A b'>a</a>"; title should be
       // "Help:A b", not "A b"
     }
   }
 }
Example #2
0
 private static int Get_imap_idx(Bry_err_wkr err_wkr, Gfh_tag img_tag) {
   byte[] val = img_tag.Atrs__get_as_bry(Imap_xtn_mgr.Bry__usemap__name);
   if (Bry_.Len_eq_0(val)) return -1;
   int rv = Bry_.To_int_or(val, Imap_xtn_mgr.Bry__usemap__prefix.length, val.length, -1);
   if (rv == -1) {
     err_wkr.Warn("invalid imap_idx", "val", val);
     return rv;
   }
   return rv;
 }
Example #3
0
  private void Parse_capt(Gfh_tag_rdr tag_rdr, Gfh_tag anch_head) {
    this.capt_bgn = anch_head.Src_end(); // capt starts after <a>
    Gfh_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__a); // </a>
    this.capt_end = anch_tail.Src_bgn(); // get capt between "<a>" and "</a>
    this.src_end = anch_tail.Src_end();

    // skip ns in href / capt
    if (href_ns_id != Xow_ns_.Tid__main) { // not main; try to remove template name;		
      int capt_bgn_wo_ns = capt_bgn + href_ns_name_len;
      href_bgn +=
          href_ns_name_len; // skip ns_name for href; EX: "Help:A" -> "A"; "Help" will be saved as
      // encoded number
      if (Bry_.Match(
          capt_src,
          capt_bgn,
          capt_bgn_wo_ns,
          href_ns_name)) { // capt matches ns_name; EX: <a href='/wiki/Help:A'>Help:A</a> ->
        // "Help:A" matches "Help:"
        capt_bgn = capt_bgn_wo_ns; // skip ns; "Help:"
        capt_has_ns = true;
      }
    }

    // get text splits
    this.text_tid =
        href_itm.Tid() == Xoh_anch_href_data.Tid__anch
            ? Xoh_anch_capt_itm.Tid__diff
            : capt_itm.Parse(
                rdr, capt_has_ns, href_src, href_bgn, href_end, src, capt_bgn, capt_end);
    int split_pos = capt_itm.Split_pos();
    this.text_0_src = href_src;
    this.text_0_bgn = href_bgn;
    this.text_0_end = href_end;
    this.text_1_src = capt_src;
    this.text_1_bgn = capt_bgn;
    this.text_1_end = capt_end;
    switch (text_tid) {
      case Xoh_anch_capt_itm.Tid__same:
        // case Xoh_anch_capt_itm.Tid__href_pipe:
      case Xoh_anch_capt_itm.Tid__diff: // nothing to do; href / capt already set above
        break;
      case Xoh_anch_capt_itm.Tid__more:
        this.text_1_bgn = split_pos;
        break;
      case Xoh_anch_capt_itm.Tid__less:
        this.text_0_end = split_pos;
        this.text_1_src = href_src;
        this.text_1_bgn = split_pos;
        this.text_1_end = href_end;
        break;
    }
  }
Example #4
0
 private void Parse_href(Xoh_hdoc_ctx hctx, Gfh_tag anch_head) {
   href_itm.Parse(rdr.Err_wkr(), hctx, src, anch_head);
   this.href_bgn = href_itm.Ttl_bgn();
   this.href_end = href_itm.Ttl_end();
   switch (href_itm.Tid()) {
     case Xoh_anch_href_data.Tid__wiki:
     case Xoh_anch_href_data.Tid__site:
       this.href_ns_id = href_itm.Ttl_ns_id();
       this.href_src = href_itm.Ttl_full_txt();
       this.href_bgn = 0;
       this.href_end = href_src.length;
       if (href_ns_id != Xow_ns_.Tid__main) { // not main; try to remove template name;		
         int colon_pos = Bry_find_.Find_fwd(href_src, Byte_ascii.Colon, href_bgn, href_end);
         this.href_ns_name =
             Xoa_ttl.Replace_unders(
                 Bry_.Mid(href_src, href_bgn, colon_pos + 1)); // EX: 11="Template talk:"
         this.href_ns_name_len = href_ns_name.length;
       }
       break;
   }
 }
Example #5
0
 public void To_html(Bry_bfr bfr) {
   bfr.Add_byte(Byte_ascii.Angle_bgn);
   bfr.Add(node);
   To_html_atr(bfr, "data-source", "xowa");
   int len = atrs.length;
   for (int i = 0; i < len; ++i) {
     Keyval atr = atrs[i];
     To_html_atr(bfr, atr.Key(), atr.Val_to_str_or_empty());
   }
   bfr.Add_byte(Byte_ascii.Angle_end);
   if (!Bry_.Eq(node, Gfh_tag_.Bry__link)) {
     if (body != null) {
       bfr.Add_byte_nl();
       bfr.Add(body);
       bfr.Add_byte_nl();
     }
     bfr.Add_byte(Byte_ascii.Angle_bgn).Add_byte(Byte_ascii.Slash);
     bfr.Add(node);
     bfr.Add_byte(Byte_ascii.Angle_end);
   }
   bfr.Add_byte_nl();
 }
Example #6
0
 private void Parse_cls(Gfh_tag anch_head) {
   byte[] cls_bry = anch_head.Atrs__get_as_bry(Gfh_atr_.Bry__class);
   if (Bry_.Len_eq_0(cls_bry)) return;
   this.cls_tid =
       Xoh_anch_cls_.Trie.Match_byte_or(cls_bry, 0, cls_bry.length, Xoh_anch_cls_.Tid__unknown);
 }
Example #7
0
public class Xoh_img_data implements Xoh_data_itm {
  public int Tid() {
    return Xoh_hzip_dict_.Tid__img;
  }

  public int Src_bgn() {
    return src_bgn;
  }

  private int src_bgn;

  public int Src_end() {
    return src_end;
  }

  private int src_end;

  public Xoh_anch_href_data Anch_href() {
    return anch_href;
  }

  private Xoh_anch_href_data anch_href = new Xoh_anch_href_data();

  public Xoh_anch_cls_data Anch_cls() {
    return anch_cls;
  }

  private Xoh_anch_cls_data anch_cls = new Xoh_anch_cls_data();

  public boolean Anch_rel_nofollow_exists() {
    return anch_rel_is_nofollow;
  }

  private boolean anch_rel_is_nofollow;

  public int Anch_title_bgn() {
    return anch_title_bgn;
  }

  private int anch_title_bgn;

  public int Anch_title_end() {
    return anch_title_end;
  }

  private int anch_title_end;

  public boolean Anch_title_exists() {
    return anch_title_end != -1;
  }

  public Bry_obj_ref Anch_xo_ttl() {
    return anch_xo_ttl;
  }

  private Bry_obj_ref anch_xo_ttl = Bry_obj_ref.New_empty();

  public Xoh_img_src_data Img_src() {
    return img_src;
  }

  private final Xoh_img_src_data img_src = new Xoh_img_src_data();

  public Xoh_img_cls_data Img_cls() {
    return img_cls;
  }

  private final Xoh_img_cls_data img_cls = new Xoh_img_cls_data();

  public Xoh_img_xoimg_data Img_xoimg() {
    return img_xoimg;
  }

  private final Xoh_img_xoimg_data img_xoimg = new Xoh_img_xoimg_data();

  public int Img_alt_bgn() {
    return img_alt_bgn;
  }

  private int img_alt_bgn;

  public int Img_alt_end() {
    return img_alt_end;
  }

  private int img_alt_end;

  public boolean Img_alt__diff__anch_title() {
    return img_alt__diff_anch_title;
  }

  private boolean img_alt__diff_anch_title;

  public int Img_w() {
    return img_w;
  }

  private int img_w;

  public int Img_h() {
    return img_h;
  }

  private int img_h;

  public boolean Img_w__diff__file_w() {
    return !img_src.File_is_orig() && img_w != img_src.File_w();
  }

  public boolean Img_is_vid() {
    return img_is_vid;
  }

  private boolean img_is_vid;

  public boolean Img_wo_anch() {
    return img_wo_anch;
  }

  private boolean img_wo_anch;

  public int Img_imap_idx() {
    return img_imap_idx;
  }

  private int img_imap_idx;

  public boolean Img_is_gallery() {
    return img_is_gallery;
  }

  private boolean img_is_gallery;

  public void Img_is_gallery_(boolean v) {
    this.img_is_gallery = v;
  }

  public Pgbnr_itm Img_pgbnr() {
    return img_pgbnr;
  }

  private final Pgbnr_itm img_pgbnr = new Pgbnr_itm();

  public void Clear() {
    this.img_alt__diff_anch_title =
        anch_rel_is_nofollow = img_is_vid = img_wo_anch = img_is_gallery = false;
    this.src_bgn =
        src_end = anch_title_bgn = anch_title_end = img_w = img_h = img_alt_bgn = img_alt_end = -1;
    this.img_imap_idx = -1;
    anch_href.Clear();
    anch_cls.Clear();
    img_src.Clear();
    img_cls.Clear();
    img_xoimg.Clear();
    img_pgbnr.Clear_by_hdump();
  }

  public boolean Init_by_parse(
      Xoh_hdoc_wkr hdoc_wkr,
      Xoh_hdoc_ctx hctx,
      Gfh_tag_rdr tag_rdr,
      byte[] src,
      Gfh_tag anch_head,
      Gfh_tag unused) {
    Gfh_tag img_tag = anch_head;
    Bry_err_wkr err_wkr = tag_rdr.Err_wkr();
    this.img_wo_anch = anch_head.Name_id() == Gfh_tag_.Id__img;
    if (img_wo_anch) {
      Gfh_atr xowa_title =
          anch_head.Atrs__get_by_or_empty(
              Xoh_img_xoimg_data.Bry__data_xowa_title); // data-xowa-title='A.png'
      anch_xo_ttl.Val_(xowa_title.Val());
    } else {
      if (anch_head.Name_id() == Gfh_tag_.Id__div) { // video / audio
        if (!anch_head.Atrs__cls_eq(
            gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_data.Atr__id__xowa_media_div))
          return false; // handle fake-thumbs created through en.w:Template:Image_label_begin;
                        // PAGE:en.w:Blackburnshire; DATE:2016-01-04
        img_is_vid = true;
        tag_rdr.Tag__move_fwd_head(); // next <div>
        anch_head = tag_rdr.Tag__move_fwd_head(); // next <div>
      }
      this.src_bgn = anch_head.Src_bgn(); // <a
      if (!anch_href.Parse(err_wkr, hctx, src, anch_head)) return false; // href='/wiki/File:A.png'
      if (!anch_cls.Parse(err_wkr, src, anch_head)) return false; // class='image'
      Gfh_atr anch_title = anch_head.Atrs__get_by_or_empty(Gfh_atr_.Bry__title); // title='abc'
      anch_title_bgn = anch_title.Val_bgn();
      anch_title_end = anch_title.Val_end();
      Gfh_atr xowa_title =
          anch_head.Atrs__get_by_or_empty(Bry__atr__xowa_title); // xowa_title='A.png'
      if (xowa_title.Val_dat_exists()) anch_xo_ttl.Val_(xowa_title.Val());
      img_tag = tag_rdr.Tag__move_fwd_head();
    }
    img_tag.Chk_name_or_fail(Gfh_tag_.Id__img); // <img
    if (img_tag.Atrs__cls_has(
        gplx.xowa.xtns.pagebanners.Pgbnr_xtn_mgr
            .Bry__cls__wpb_banner_image)) { // handle pagebanner; EX: <img class="wpb-banner-image">
      img_pgbnr.Init_by_parse(img_tag);
    }
    img_xoimg.Parse(err_wkr, src, img_tag); // data-xoimg='...'
    this.img_w =
        img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__width, Xof_img_size.Size__neg1); // width='220'
    this.img_h =
        img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__height, Xof_img_size.Size__neg1); // height='110'
    Gfh_atr img_alt = img_tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__alt); // alt='File:A.png'
    img_alt_bgn = img_alt.Val_bgn();
    img_alt_end = img_alt.Val_end();
    img_cls.Init_by_parse(err_wkr, src, img_tag); // class='thumbborder'
    img_alt__diff_anch_title =
        !Bry_.Match(src, img_alt_bgn, img_alt_end, src, anch_title_bgn, anch_title_end);
    if (!img_src.Parse(err_wkr, hctx, hctx.Wiki__domain_bry(), img_tag)) return false; // src='...'
    if (anch_xo_ttl.Val_is_empty()) {
      anch_xo_ttl.Val_(img_src.File_ttl_bry());
      if (anch_xo_ttl.Val_is_empty()) anch_xo_ttl.Val_(anch_href.Ttl_page_db());
    }
    this.img_imap_idx = Get_imap_idx(tag_rdr.Err_wkr(), img_tag);
    if (img_wo_anch) {
      src_end = img_tag.Src_end();
      return true;
    }
    Gfh_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__a); // </a>
    this.src_end = anch_tail.Src_end();
    if (anch_href.Site_exists()) {
      Xow_domain_itm itm =
          Xow_domain_itm_.parse(Bry_.Mid(src, anch_href.Site_bgn(), anch_href.Site_end()));
      anch_rel_is_nofollow = itm.Domain_type_id() == Xow_domain_tid_.Tid__other;
    }
    if (img_is_vid) {
      tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__div);
      anch_head = tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__div);
      src_end = anch_head.Src_end();
    }
    return true;
  }

  public void Init_by_decode(
      boolean anch_rel_is_nofollow,
      int anch_title_bgn,
      int anch_title_end,
      boolean img_wo_anch,
      boolean img_is_vid,
      int img_w,
      int img_h,
      int img_alt_bgn,
      int img_alt_end,
      int img_imap_idx) {
    this.anch_rel_is_nofollow = anch_rel_is_nofollow;
    this.anch_title_bgn = anch_title_bgn;
    this.anch_title_end = anch_title_end;
    this.img_wo_anch = img_wo_anch;
    this.img_is_vid = img_is_vid;
    this.img_w = img_w;
    this.img_h = img_h;
    this.img_alt_bgn = img_alt_bgn;
    this.img_alt_end = img_alt_end;
    this.img_imap_idx = img_imap_idx;
  }

  public void Pool__rls() {
    pool_mgr.Rls_fast(pool_idx);
  }

  private Gfo_poolable_mgr pool_mgr;
  private int pool_idx;

  public Gfo_poolable_itm Pool__make(Gfo_poolable_mgr mgr, int idx, Object[] args) {
    Xoh_img_data rv = new Xoh_img_data();
    rv.pool_mgr = mgr;
    rv.pool_idx = idx;
    return rv;
  }

  private static int Get_imap_idx(Bry_err_wkr err_wkr, Gfh_tag img_tag) {
    byte[] val = img_tag.Atrs__get_as_bry(Imap_xtn_mgr.Bry__usemap__name);
    if (Bry_.Len_eq_0(val)) return -1;
    int rv = Bry_.To_int_or(val, Imap_xtn_mgr.Bry__usemap__prefix.length, val.length, -1);
    if (rv == -1) {
      err_wkr.Warn("invalid imap_idx", "val", val);
      return rv;
    }
    return rv;
  }

  public static final byte[] Bry__cls__anch__image = Bry_.new_a7("image"),
      Bry__cls__img__thumbimage = Bry_.new_a7("thumbimage"),
      Bry__atr__xowa_title = Bry_.new_a7("xowa_title");
}
Example #8
0
 public boolean Init_by_parse(
     Xoh_hdoc_wkr hdoc_wkr,
     Xoh_hdoc_ctx hctx,
     Gfh_tag_rdr tag_rdr,
     byte[] src,
     Gfh_tag anch_head,
     Gfh_tag unused) {
   Gfh_tag img_tag = anch_head;
   Bry_err_wkr err_wkr = tag_rdr.Err_wkr();
   this.img_wo_anch = anch_head.Name_id() == Gfh_tag_.Id__img;
   if (img_wo_anch) {
     Gfh_atr xowa_title =
         anch_head.Atrs__get_by_or_empty(
             Xoh_img_xoimg_data.Bry__data_xowa_title); // data-xowa-title='A.png'
     anch_xo_ttl.Val_(xowa_title.Val());
   } else {
     if (anch_head.Name_id() == Gfh_tag_.Id__div) { // video / audio
       if (!anch_head.Atrs__cls_eq(
           gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_data.Atr__id__xowa_media_div))
         return false; // handle fake-thumbs created through en.w:Template:Image_label_begin;
                       // PAGE:en.w:Blackburnshire; DATE:2016-01-04
       img_is_vid = true;
       tag_rdr.Tag__move_fwd_head(); // next <div>
       anch_head = tag_rdr.Tag__move_fwd_head(); // next <div>
     }
     this.src_bgn = anch_head.Src_bgn(); // <a
     if (!anch_href.Parse(err_wkr, hctx, src, anch_head)) return false; // href='/wiki/File:A.png'
     if (!anch_cls.Parse(err_wkr, src, anch_head)) return false; // class='image'
     Gfh_atr anch_title = anch_head.Atrs__get_by_or_empty(Gfh_atr_.Bry__title); // title='abc'
     anch_title_bgn = anch_title.Val_bgn();
     anch_title_end = anch_title.Val_end();
     Gfh_atr xowa_title =
         anch_head.Atrs__get_by_or_empty(Bry__atr__xowa_title); // xowa_title='A.png'
     if (xowa_title.Val_dat_exists()) anch_xo_ttl.Val_(xowa_title.Val());
     img_tag = tag_rdr.Tag__move_fwd_head();
   }
   img_tag.Chk_name_or_fail(Gfh_tag_.Id__img); // <img
   if (img_tag.Atrs__cls_has(
       gplx.xowa.xtns.pagebanners.Pgbnr_xtn_mgr
           .Bry__cls__wpb_banner_image)) { // handle pagebanner; EX: <img class="wpb-banner-image">
     img_pgbnr.Init_by_parse(img_tag);
   }
   img_xoimg.Parse(err_wkr, src, img_tag); // data-xoimg='...'
   this.img_w =
       img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__width, Xof_img_size.Size__neg1); // width='220'
   this.img_h =
       img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__height, Xof_img_size.Size__neg1); // height='110'
   Gfh_atr img_alt = img_tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__alt); // alt='File:A.png'
   img_alt_bgn = img_alt.Val_bgn();
   img_alt_end = img_alt.Val_end();
   img_cls.Init_by_parse(err_wkr, src, img_tag); // class='thumbborder'
   img_alt__diff_anch_title =
       !Bry_.Match(src, img_alt_bgn, img_alt_end, src, anch_title_bgn, anch_title_end);
   if (!img_src.Parse(err_wkr, hctx, hctx.Wiki__domain_bry(), img_tag)) return false; // src='...'
   if (anch_xo_ttl.Val_is_empty()) {
     anch_xo_ttl.Val_(img_src.File_ttl_bry());
     if (anch_xo_ttl.Val_is_empty()) anch_xo_ttl.Val_(anch_href.Ttl_page_db());
   }
   this.img_imap_idx = Get_imap_idx(tag_rdr.Err_wkr(), img_tag);
   if (img_wo_anch) {
     src_end = img_tag.Src_end();
     return true;
   }
   Gfh_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__a); // </a>
   this.src_end = anch_tail.Src_end();
   if (anch_href.Site_exists()) {
     Xow_domain_itm itm =
         Xow_domain_itm_.parse(Bry_.Mid(src, anch_href.Site_bgn(), anch_href.Site_end()));
     anch_rel_is_nofollow = itm.Domain_type_id() == Xow_domain_tid_.Tid__other;
   }
   if (img_is_vid) {
     tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__div);
     anch_head = tag_rdr.Tag__move_fwd_tail(Gfh_tag_.Id__div);
     src_end = anch_head.Src_end();
   }
   return true;
 }
Example #9
0
 public static Xopg_tag_itm New_js_code(String code) {
   return New_js_code(Bry_.new_u8(code));
 }
  private void Write_body_wikitext(
      Bry_bfr bfr,
      Xoae_app app,
      Xowe_wiki wiki,
      byte[] data_raw,
      Xop_ctx ctx,
      Xoh_wtr_ctx hctx,
      Xoae_page page,
      byte page_tid,
      int ns_id) {
    // dump and exit if pre-generated html from html dumps
    byte[] hdump_data = page.Db().Html().Html_bry();
    if (Bry_.Len_gt_0(hdump_data)) {
      bfr.Add(hdump_data);
      return;
    }

    // dump and exit if MediaWiki message;
    if (ns_id
        == Xow_ns_
            .Tid__mediawiki) { // if MediaWiki and wikitext, must be a message; convert args back to
                               // php; DATE:2014-06-13
      bfr.Add(Gfs_php_converter.Xto_php(tmp_bfr, Bool_.N, data_raw));
      return;
    }

    // if [[File]], add boilerplate header; note that html is XOWA-generated so does not need to be
    // tidied
    if (ns_id == Xow_ns_.Tid__file)
      app.Ns_file_page_mgr()
          .Bld_html(wiki, ctx, page, bfr, page.Ttl(), wiki.Cfg_file_page(), page.File_queue());

    // get separate bfr; note that bfr already has <html> and <head> written to it, so this can't be
    // passed to tidy; DATE:2014-06-11
    Bry_bfr tidy_bfr = wiki.Utl__bfr_mkr().Get_m001();

    // write wikitext
    if (page.Html_data().Skip_parse()) {
      tidy_bfr.Add(page.Html_data().Custom_body());
    } else {
      if (page.Root()
          != null) { // NOTE: will be null if blank; occurs for one test:
                     // Logo_has_correct_main_page; DATE:2015-09-29
        page.Html_data()
            .Toc_mgr()
            .Clear(); // NOTE: always clear tocs before writing html; toc_itms added when writing
                      // html_hdr; DATE:2016-07-17
        wiki.Html_mgr()
            .Html_wtr()
            .Write_doc(tidy_bfr, ctx, hctx, page.Root().Data_mid(), page.Root());
        if (wiki.Html_mgr().Html_wtr().Cfg().Toc__show())
          gplx.xowa.htmls.core.wkrs.tocs.Xoh_toc_wtr.Write_toc(tidy_bfr, page, hctx);
      }
    }

    // if [[Category]], add catpage data
    if (ns_id == Xow_ns_.Tid__category) tidy_bfr.Add_safe(page.Html_data().Catpage_data());
    // if (ns_id == Xow_ns_.Tid__category) wiki.Ctg__catpage_mgr().Write_catpage(tidy_bfr, page,
    // hctx);

    // tidy html
    wiki.Html_mgr().Tidy_mgr().Exec_tidy(tidy_bfr, !hctx.Mode_is_hdump(), page.Url_bry_safe());

    // add back to main bfr
    bfr.Add_bfr_and_clear(tidy_bfr);
    tidy_bfr.Mkr_rls();

    // handle Categories at bottom of page; note that html is XOWA-generated so does not need to be
    // tidied
    int ctgs_len = page.Wtxt().Ctgs__len();
    if (ctgs_enabled
        && ctgs_len > 0 // skip if no categories found while parsing wikitext
        && !wiki.Html_mgr()
            .Importing_ctgs() // do not show categories if importing categories, page will wait for
                              // category import to be done; DATE:2014-10-15
        && !hctx.Mode_is_hdump() // do not dump categories during hdump; DATE:2016-10-12
    ) {
      if (app.Mode().Tid_is_gui())
        app.Usr_dlg().Prog_many("", "", "loading categories: count=~{0}", ctgs_len);
      Xoctg_pagebox_itm[] pagebox_itms = wiki.Ctg__pagebox_wtr().Get_catlinks_by_page(wiki, page);
      boolean hidden_enabled = wiki.App().Api_root().Addon().Wikis__ctgs__hidden_enabled();
      wiki.Ctg__pagebox_wtr().Write_pagebox(hidden_enabled, bfr, wiki, page, pagebox_itms);
    }

    // translate if variants are enabled
    Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr();
    if (vnt_mgr.Enabled())
      bfr.Add(
          vnt_mgr.Convert_lang()
              .Parse_page(vnt_mgr.Cur_itm(), page.Db().Page().Id(), bfr.To_bry_and_clear()));
  }
public class Xoh_page_wtr_wkr {
  private final Object thread_lock_1 = new Object(), thread_lock_2 = new Object();
  private final Bry_bfr tmp_bfr = Bry_bfr_.Reset(255);
  private final Xoh_page_wtr_mgr mgr;
  private final byte page_mode;
  private final Wdata_xwiki_link_wtr wdata_lang_wtr =
      new Wdata_xwiki_link_wtr(); // In other languages
  private Xoae_app app;
  private Xowe_wiki wiki;
  private Xoae_page page;
  private byte[] root_dir_bry;

  public Xoh_page_wtr_wkr(Xoh_page_wtr_mgr mgr, byte page_mode) {
    this.mgr = mgr;
    this.page_mode = page_mode;
  }

  public Xoh_page_wtr_wkr Ctgs_enabled_(boolean v) {
    ctgs_enabled = v;
    return this;
  }

  private boolean ctgs_enabled = true;

  public void Write_page(Bry_bfr rv, Xoae_page page, Xop_ctx ctx) {
    synchronized (thread_lock_1) {
      this.page = page;
      this.wiki = page.Wikie();
      this.app = wiki.Appe();
      ctx.Page_(page); // HACK: must update page for toc_mgr; WHEN: Xoae_page rewrite
      Bry_fmtr fmtr = null;
      if (mgr.Html_capable()) {
        wdata_lang_wtr.Page_(page);
        byte view_mode = page_mode;
        switch (page_mode) {
          case Xopg_page_.Tid_edit:
            fmtr = mgr.Page_edit_fmtr();
            break;
          case Xopg_page_.Tid_html:
            fmtr = mgr.Page_read_fmtr();
            view_mode = Xopg_page_.Tid_read;
            break; // set view_mode to read, so that "read" is highlighted in HTML
          case Xopg_page_.Tid_read:
            fmtr = mgr.Page_read_fmtr();
            // ctx.Page().Redlink_list().Clear();	// not sure if this is the best place to put it,
            // but redlinks (a) must only fire once; (b) must fire before html generation; (c)
            // cannot fire during edit (preview will handle separately); NOTE: probably put in to
            // handle reusable redlink lists; redlink lists are now instantiated per page, so clear
            // is not useful
            break;
        }
        Bry_bfr page_bfr =
            wiki.Utl__bfr_mkr()
                .Get_m001(); // NOTE: get separate page rv to output page; do not reuse tmp_bfr b/c
                             // it will be used inside Fmt_do
        Xoh_wtr_ctx hctx = null;
        if (page_mode == Xopg_page_.Tid_html
            && wiki.App().Api_root().Wiki().Hdump().Html_mode().Tid_is_hdump_save()) {
          hctx = Xoh_wtr_ctx.Hdump;
          Write_body(page_bfr, ctx, hctx, page);
          Write_page_by_tid(
              ctx,
              hctx,
              page_mode,
              rv,
              mgr.Page_html_fmtr(),
              Gfh_utl.Escape_html_as_bry(page_bfr.To_bry_and_clear()));
        } else {
          hctx = Xoh_wtr_ctx.Basic;
          Write_body(page_bfr, ctx, hctx, page);
          Write_page_by_tid(ctx, hctx, view_mode, rv, fmtr, page_bfr.To_bry_and_rls());
          new gplx.xowa.addons.apps.scripts.Xoscript_mgr().Write(rv, wiki, page);
          if (page_mode
              == Xopg_page_
                  .Tid_html) // if html, write page again, but wrap it in html skin this time
          Write_page_by_tid(
                ctx,
                hctx,
                page_mode,
                rv,
                mgr.Page_html_fmtr(),
                Gfh_utl.Escape_html_as_bry(rv.To_bry_and_clear()));
          wdata_lang_wtr.Page_(null);
        }
      } else Write_body(rv, ctx, Xoh_wtr_ctx.Basic, page);
      this.page = null;
    }
  }

  private void Write_page_by_tid(
      Xop_ctx ctx,
      Xoh_wtr_ctx hctx,
      byte html_gen_tid,
      Bry_bfr bfr,
      Bry_fmtr fmtr,
      byte[] page_data) {
    // if custom_html, use it and exit; needed for Default_tab
    byte[] custom_html = page.Html_data().Custom_html();
    if (custom_html != null) {
      bfr.Add(custom_html);
      return;
    }
    // temp variables
    if (root_dir_bry == null) this.root_dir_bry = app.Fsys_mgr().Root_dir().To_http_file_bry();
    Xoa_ttl page_ttl = page.Ttl();
    int page_ns_id = page_ttl.Ns().Id();
    byte page_tid = Xow_page_tid.Identify(wiki.Domain_tid(), page_ns_id, page_ttl.Page_db());
    DateAdp modified_on = page.Db().Page().Modified_on();
    byte[] modified_on_msg =
        wiki.Msg_mgr()
            .Val_by_id_args(
                Xol_msg_itm_.Id_portal_lastmodified,
                modified_on.XtoStr_fmt_yyyy_MM_dd(),
                modified_on.XtoStr_fmt_HHmm());
    byte[] page_body_class = Xoh_page_body_cls.Calc(tmp_bfr, page_ttl, page_tid);
    byte[] html_content_editable =
        wiki.Gui_mgr().Cfg_browser().Content_editable() ? Content_editable_bry : Bry_.Empty;
    byte[] page_content_sub = Xoh_page_wtr_wkr_.Bld_page_content_sub(app, wiki, page, tmp_bfr);
    byte[] js_edit_toolbar_bry =
        html_gen_tid == Xopg_page_.Tid_edit
            ? wiki.Fragment_mgr().Html_js_edit_toolbar()
            : Bry_.Empty;
    Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr();
    if (vnt_mgr.Enabled()) {
      byte[] converted_title = vnt_mgr.Convert_lang().Converted_title(); // prefer converted title
      if (converted_title
          == null) // converted title does not exist; use regular page title and convert it
      converted_title = vnt_mgr.Convert_lang().Auto_convert(vnt_mgr.Cur_itm(), page_ttl.Page_txt());
      page_ttl = Xoa_ttl.Parse(wiki, page_ttl.Ns().Id(), converted_title);
    }
    byte[] page_name =
        Xoh_page_wtr_wkr_.Bld_page_name(
            tmp_bfr, page_ttl,
            null); // NOTE: page_name does not show display_title (<i>). always pass in null
    byte[] page_display_title =
        Xoh_page_wtr_wkr_.Bld_page_name(tmp_bfr, page_ttl, page.Html_data().Display_ttl());
    page.Html_data()
        .Custom_tab_name_(
            page_name); // set tab_name to page_name; note that if null, gui code will ignore and
                        // use Ttl.Page_txt; PAGE: zh.w:釣魚臺列嶼主權問題 DATE:2015-10-05
    Xow_portal_mgr portal_mgr = wiki.Html_mgr().Portal_mgr().Init_assert();
    fmtr.Bld_bfr_many(
        bfr,
        root_dir_bry,
        Xoa_app_.Version,
        Xoa_app_.Build_date,
        app.Tcp_server().Running_str(),
        page.Db().Page().Id(),
        page.Ttl().Full_db(),
        page_name,
        page.Html_data().Page_heading().Init(page.Html_data(), page_display_title),
        modified_on_msg,
        mgr.Css_common_bry(),
        mgr.Css_wiki_bry(),
        page.Html_data().Head_mgr().Init(app, wiki, page).Init_dflts(),
        page.Lang().Dir_ltr_bry(),
        page.Html_data().Indicators(),
        page_content_sub,
        wiki.Html_mgr().Portal_mgr().Div_jump_to(),
        wiki.Xtn_mgr().Xtn_pgbnr().Write_html(page, ctx, hctx),
        page_body_class,
        html_content_editable,
        page_data,
        wdata_lang_wtr

        // sidebar divs
        ,
        portal_mgr.Div_personal_bry(),
        portal_mgr.Div_ns_bry(wiki.Utl__bfr_mkr(), page_ttl, wiki.Ns_mgr()),
        portal_mgr.Div_view_bry(
            wiki.Utl__bfr_mkr(), html_gen_tid, page.Html_data().Xtn_search_text()),
        portal_mgr.Div_logo_bry(),
        portal_mgr.Div_home_bry(),
        new Xopg_xtn_skin_fmtr_arg(page, Xopg_xtn_skin_itm_tid.Tid_sidebar),
        portal_mgr.Div_sync_bry(
            tmp_bfr, app.Api_root().Addon().Bldr().Sync().Manual_enabled(), wiki, page),
        portal_mgr.Div_wikis_bry(wiki.Utl__bfr_mkr()),
        portal_mgr.Sidebar_mgr().Html_bry(),
        mgr.Edit_rename_div_bry(page_ttl),
        page.Html_data().Edit_preview_w_dbg(),
        js_edit_toolbar_bry);
    Xoh_page_wtr_wkr_.Bld_head_end(bfr, tmp_bfr, page); // add after </head>
    Xoh_page_wtr_wkr_.Bld_html_end(bfr, tmp_bfr, page); // add after </html>		
  }

  public void Write_hdump(Bry_bfr bfr, Xop_ctx ctx, Xoh_wtr_ctx hctx, Xoae_page wpg) {
    if (wpg.Html_data().Xtn_pgbnr() != null) {
      ctx.Wiki()
          .Xtn_mgr()
          .Xtn_pgbnr()
          .Write_html(wpg, ctx, hctx)
          .Bfr_arg__add(bfr); // if pgbnr exists, write to top of html
    }
    this.Write_body(bfr, ctx, hctx, wpg);
  }

  public void Write_body(Bry_bfr bfr, Xop_ctx ctx, Xoh_wtr_ctx hctx, Xoae_page page) {
    synchronized (thread_lock_2) {
      this.page = page;
      this.wiki = page.Wikie();
      this.app = wiki.Appe();
      Xoa_ttl page_ttl = page.Ttl();
      int page_ns_id = page_ttl.Ns().Id();
      byte page_tid =
          Xow_page_tid.Identify(
              wiki.Domain_tid(),
              page_ns_id,
              page_ttl
                  .Page_db()); // NOTE: can't cache page_tid b/c Write_body is called directly;
                               // DATE:2014-10-02
      byte[] data_raw = page.Db().Text().Text_bry();
      int bfr_page_bgn = bfr.Len();
      boolean page_tid_uses_pre = false;
      if (page_mode == Xopg_page_.Tid_edit) Write_body_edit(bfr, data_raw, page_ns_id, page_tid);
      else {
        switch (page_tid) {
          case Xow_page_tid.Tid_msg:
          case Xow_page_tid.Tid_js:
          case Xow_page_tid.Tid_css:
          case Xow_page_tid.Tid_lua:
            Write_body_pre(bfr, app, wiki, hctx, data_raw, tmp_bfr);
            page_tid_uses_pre = true;
            break;
          case Xow_page_tid.Tid_json:
            app.Wiki_mgr().Wdata_mgr().Write_json_as_html(bfr, page_ttl.Full_db(), data_raw);
            break;
          case Xow_page_tid.Tid_wikitext:
            Write_body_wikitext(bfr, app, wiki, data_raw, ctx, hctx, page, page_tid, page_ns_id);
            break;
        }
      }
      if (wiki.Domain_tid() != Xow_domain_tid_.Tid__home // allow home wiki to use javascript
          && !page_tid_uses_pre) { // if .js, .css or .lua, skip test; may have js fragments, but
                                   // entire text is escaped and put in pre; don't show spurious
                                   // warning; DATE:2013-11-21
        wiki.Html_mgr().Js_cleaner().Clean_bfr(wiki, page_ttl, bfr, bfr_page_bgn);
      }
    }
  }

  private void Write_body_wikitext(
      Bry_bfr bfr,
      Xoae_app app,
      Xowe_wiki wiki,
      byte[] data_raw,
      Xop_ctx ctx,
      Xoh_wtr_ctx hctx,
      Xoae_page page,
      byte page_tid,
      int ns_id) {
    // dump and exit if pre-generated html from html dumps
    byte[] hdump_data = page.Db().Html().Html_bry();
    if (Bry_.Len_gt_0(hdump_data)) {
      bfr.Add(hdump_data);
      return;
    }

    // dump and exit if MediaWiki message;
    if (ns_id
        == Xow_ns_
            .Tid__mediawiki) { // if MediaWiki and wikitext, must be a message; convert args back to
                               // php; DATE:2014-06-13
      bfr.Add(Gfs_php_converter.Xto_php(tmp_bfr, Bool_.N, data_raw));
      return;
    }

    // if [[File]], add boilerplate header; note that html is XOWA-generated so does not need to be
    // tidied
    if (ns_id == Xow_ns_.Tid__file)
      app.Ns_file_page_mgr()
          .Bld_html(wiki, ctx, page, bfr, page.Ttl(), wiki.Cfg_file_page(), page.File_queue());

    // get separate bfr; note that bfr already has <html> and <head> written to it, so this can't be
    // passed to tidy; DATE:2014-06-11
    Bry_bfr tidy_bfr = wiki.Utl__bfr_mkr().Get_m001();

    // write wikitext
    if (page.Html_data().Skip_parse()) {
      tidy_bfr.Add(page.Html_data().Custom_body());
    } else {
      if (page.Root()
          != null) { // NOTE: will be null if blank; occurs for one test:
                     // Logo_has_correct_main_page; DATE:2015-09-29
        page.Html_data()
            .Toc_mgr()
            .Clear(); // NOTE: always clear tocs before writing html; toc_itms added when writing
                      // html_hdr; DATE:2016-07-17
        wiki.Html_mgr()
            .Html_wtr()
            .Write_doc(tidy_bfr, ctx, hctx, page.Root().Data_mid(), page.Root());
        if (wiki.Html_mgr().Html_wtr().Cfg().Toc__show())
          gplx.xowa.htmls.core.wkrs.tocs.Xoh_toc_wtr.Write_toc(tidy_bfr, page, hctx);
      }
    }

    // if [[Category]], add catpage data
    if (ns_id == Xow_ns_.Tid__category) tidy_bfr.Add_safe(page.Html_data().Catpage_data());
    // if (ns_id == Xow_ns_.Tid__category) wiki.Ctg__catpage_mgr().Write_catpage(tidy_bfr, page,
    // hctx);

    // tidy html
    wiki.Html_mgr().Tidy_mgr().Exec_tidy(tidy_bfr, !hctx.Mode_is_hdump(), page.Url_bry_safe());

    // add back to main bfr
    bfr.Add_bfr_and_clear(tidy_bfr);
    tidy_bfr.Mkr_rls();

    // handle Categories at bottom of page; note that html is XOWA-generated so does not need to be
    // tidied
    int ctgs_len = page.Wtxt().Ctgs__len();
    if (ctgs_enabled
        && ctgs_len > 0 // skip if no categories found while parsing wikitext
        && !wiki.Html_mgr()
            .Importing_ctgs() // do not show categories if importing categories, page will wait for
                              // category import to be done; DATE:2014-10-15
        && !hctx.Mode_is_hdump() // do not dump categories during hdump; DATE:2016-10-12
    ) {
      if (app.Mode().Tid_is_gui())
        app.Usr_dlg().Prog_many("", "", "loading categories: count=~{0}", ctgs_len);
      Xoctg_pagebox_itm[] pagebox_itms = wiki.Ctg__pagebox_wtr().Get_catlinks_by_page(wiki, page);
      boolean hidden_enabled = wiki.App().Api_root().Addon().Wikis__ctgs__hidden_enabled();
      wiki.Ctg__pagebox_wtr().Write_pagebox(hidden_enabled, bfr, wiki, page, pagebox_itms);
    }

    // translate if variants are enabled
    Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr();
    if (vnt_mgr.Enabled())
      bfr.Add(
          vnt_mgr.Convert_lang()
              .Parse_page(vnt_mgr.Cur_itm(), page.Db().Page().Id(), bfr.To_bry_and_clear()));
  }

  private void Write_body_pre(
      Bry_bfr bfr,
      Xoae_app app,
      Xowe_wiki wiki,
      Xoh_wtr_ctx hctx,
      byte[] data_raw,
      Bry_bfr tmp_bfr) {
    Xoh_html_wtr_escaper.Escape(
        app.Parser_amp_mgr(), tmp_bfr, data_raw, 0, data_raw.length, false, false);
    if (hctx.Mode_is_hdump()) bfr.Add(data_raw);
    else app.Html_mgr().Page_mgr().Content_code_fmtr().Bld_bfr_many(bfr, tmp_bfr);
    tmp_bfr.Clear();
  }

  private void Write_body_edit(Bry_bfr bfr, byte[] data_raw, int ns_id, byte page_tid) {
    if (ns_id
            == Xow_ns_
                .Tid__mediawiki // if MediaWiki and wikitext, must be a message; convert args back
                                // to php; DATE:2014-06-13
        && page_tid == Xow_page_tid.Tid_wikitext)
      data_raw = Gfs_php_converter.Xto_php(tmp_bfr, Bool_.N, data_raw);
    int data_raw_len = data_raw.length;
    if (mgr.Html_capable())
      Xoh_html_wtr_escaper.Escape(
          page.Wikie().Appe().Parser_amp_mgr(),
          bfr,
          data_raw,
          0,
          data_raw_len,
          false,
          false); // NOTE: must escape; assume that browser will automatically escape (&lt;) (which
                  // Mozilla does)
    else bfr.Add(data_raw);
    if (data_raw_len > 0) // do not add nl if empty String
    bfr
          .Add_byte_nl(); // per MW:EditPage.php: "Ensure there's a newline at the end, otherwise
                          // adding lines is awkward."
  }

  private static final byte[] Content_editable_bry = Bry_.new_a7(" contenteditable=\"true\"");
}
Example #12
0
public class Dpl_xnde implements Xox_xnde {
  private Dpl_itm itm = new Dpl_itm();
  private List_adp pages = List_adp_.New();

  public void Xatr__set(
      Xowe_wiki wiki,
      byte[] src,
      Mwh_atr_itm xatr,
      Object xatr_id_obj) {} // NOTE: <dynamicPageList> has no attributes

  public void Xtn_parse(
      Xowe_wiki wiki, Xop_ctx ctx, Xop_root_tkn root, byte[] src, Xop_xnde_tkn xnde) {
    itm.Parse(wiki, ctx, ctx.Page().Ttl().Full_txt_w_ttl_case(), src, xnde);
    Dpl_page_finder.Find_pages(pages, wiki, itm);
    if (itm.Sort_ascending() != Bool_.__byte) pages.Sort_by(new Dpl_page_sorter(itm));
  }

  public void Xtn_write(
      Bry_bfr bfr,
      Xoae_app app,
      Xop_ctx ctx,
      Xoh_html_wtr html_wtr,
      Xoh_wtr_ctx hctx,
      Xoae_page wpg,
      Xop_xnde_tkn xnde,
      byte[] src) {
    Xowe_wiki wiki = ctx.Wiki();
    Dpl_html_data html_mode = Dpl_html_data.new_(Dpl_itm_keys.Key_unordered);
    int itms_len = pages.Count();
    if (itms_len == 0) {
      if (!itm.Suppress_errors()) bfr.Add_str_a7("No pages meet these criteria.");
      return;
    }
    int itms_bgn = 0;
    if (itm.Offset() != Int_.Min_value) {
      itms_bgn = itm.Offset();
    }
    if (itm.Count() != Int_.Min_value && itms_bgn + itm.Count() < itms_len) {
      itms_len = itms_bgn + itm.Count();
    }
    boolean show_ns = itm.Show_ns();
    Bry_bfr tmp_bfr = Bry_bfr_.Get();
    Xop_amp_mgr amp_mgr = wiki.Appe().Parser_amp_mgr();
    try {
      bfr.Add(html_mode.Grp_bgn()).Add_byte_nl();
      for (int i = itms_bgn; i < itms_len; i++) {
        Xowd_page_itm page = (Xowd_page_itm) pages.Get_at(i);
        Xoa_ttl ttl = Xoa_ttl.Parse(wiki, page.Ns_id(), page.Ttl_page_db());
        byte[] ttl_page_txt = show_ns ? ttl.Full_txt_w_ttl_case() : ttl.Page_txt();
        if (ttl_page_txt == null)
          continue; // NOTE: apparently DynamicPageList allows null pages; DATE:2013-07-22
        switch (html_mode.Tid()) {
          case Dpl_html_data.Tid_list_ul:
          case Dpl_html_data.Tid_list_ol:
            bfr.Add(Xoh_consts.Space_2).Add(html_mode.Itm_bgn()).Add(Gfh_bldr_.Bry__a_lhs_w_href);
            bfr.Add_str_a7("/wiki/")
                .Add(Gfo_url_encoder_.Href.Encode(ttl.Full_db()))
                .Add_byte_quote(); // NOTE: Full_db to encode spaces as underscores;
                                   // PAGE:en.q:Wikiquote:Speedy_deletions DATE:2016-01-19
            Gfh_atr_.Add(
                bfr,
                Gfh_atr_.Bry__title,
                Xoh_html_wtr_escaper.Escape(
                    amp_mgr,
                    tmp_bfr,
                    ttl
                        .Full_txt_w_ttl_case())); // NOTE: Full_txt b/c title always includes ns,
                                                  // even if show_ns is off;
                                                  // PAGE:en.b:Wikibooks:WikiProject DATE:2016-01-20
            if (itm.No_follow()) bfr.Add(Bry_nofollow);
            bfr.Add_byte(Byte_ascii.Gt);
            Xoh_html_wtr_escaper.Escape(
                amp_mgr, bfr, ttl_page_txt, 0, ttl_page_txt.length, false, false);
            bfr.Add(Gfh_bldr_.Bry__a_rhs).Add(html_mode.Itm_end()).Add_byte_nl();
            // TODO_OLD:
            // lnki_wtr.Clear().Href_wiki_(ttl).Title_(ttl).Nofollow_().Write_head(bfr).Write_text(bfr).Write_tail(bfr)
            break;
          default:
            break;
        }
      }
      bfr.Add(html_mode.Grp_end()).Add_byte_nl();
    } finally {
      tmp_bfr.Mkr_rls();
    }
  }

  private static byte[] Bry_nofollow = Bry_.new_a7(" rel=\"nofollow\"");
}