public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) { this.Clear(); para_enabled = enabled && ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki; // only enable for wikitext (not for template) if (para_enabled) Prv_para_new(ctx, root, -1, 0); // create <para> at bos }
public int Make_tkn( Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) { Xop_tkn_itm tkn = tkn_mkr.Brack_bgn(bgn_pos, cur_pos); ctx.Subs_add_and_stack(root, tkn); return cur_pos; }
public void Process_lnki_category( Xop_ctx ctx, Xop_root_tkn root, byte[] src, int pos, int src_len) { // REF.MW:Parser.php|replaceInternalLinks2|Strip the whitespace Category links // produce; if (!para_enabled) return; int subs_len = root.Subs_len(); for (int i = subs_len - 2; i > -1; i--) { // -2: -1 b/c subs_len is invalid; -1 to skip current lnki Xop_tkn_itm sub_tkn = root.Subs_get(i); switch (sub_tkn.Tkn_tid()) { case Xop_tkn_itm_ .Tid_para: // nl found; note this means that BOL -> [[Category:]] is all ws; if (prv_ws_bgn > 0) { // line begins with ws a if (sub_tkn.Src_bgn() != 0) // do not ignore BOS para; needed b/c it is often <p>; needed for test; sub_tkn.Ignore_y_(); // ignore nl (pretty-printing only) prv_ws_bgn = 0; // remove ws if (ctx.Stack_has( Xop_tkn_itm_ .Tid_list)) { // HACK: if in list, set prv_nl_pos to EOL; only here for one test // to pass int nl_at_eol = -1; for (int j = pos; j < src_len; j++) { // check if rest of line is ws byte b = src[j]; switch (b) { case Byte_ascii.Space: case Byte_ascii.Tab: break; // ignore space / tab case Byte_ascii.Nl: nl_at_eol = j; j = src_len; break; default: // something else besides ws; stop j = src_len; break; } if (nl_at_eol != -1) prv_nl_pos = nl_at_eol + 1; // SEE:NOTE_2 } } } return; default: // exit if anything except para / nl in front of [[Category:]] i = -1; break; } } // if (para_found) // BOS exit; just remove prv_ws_bgn prv_ws_bgn = 0; }
private void Dd_clear(Xop_ctx ctx) { ctx.List().Dd_chk_(false); }
private void Add_br(Xop_ctx ctx, Xop_root_tkn root, int bgn_pos) { ctx.Subs_add(root, ctx.Tkn_mkr().Xnde(bgn_pos, bgn_pos).Tag_(Xop_xnde_tag_.Tag_br)); }
private void Prv_para_new(Xop_ctx ctx, Xop_root_tkn root, int prv_nl_pos, int para_pos) { this.prv_nl_pos = prv_nl_pos; prv_para = ctx.Tkn_mkr().Para(para_pos); ctx.Subs_add(root, prv_para); }
public int Process_pre( Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int txt_pos) { Dd_clear(ctx); Btrie_slim_mgr tblw_ws_trie = ctx.App().Utl_trie_tblw_ws(); Object o = tblw_ws_trie.Match_bgn(src, txt_pos, src_len); if (o != null) { // tblw_ws found Xop_tblw_ws_itm ws_itm = (Xop_tblw_ws_itm) o; byte tblw_type = ws_itm.Tblw_type(); switch (tblw_type) { case Xop_tblw_ws_itm.Type_nl: // \n\s if (cur_mode == Mode_pre) { // already in pre; just process "\n\s" ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos, Xop_nl_tkn.Tid_char, 1)); prv_nl_pos = bgn_pos; // NOTE: must update prv_nl_pos; PAGE:en.w:Preferred_number DATE:2014-06-24 return txt_pos; } break; case Xop_tblw_ws_itm.Type_xnde: int nxt_pos = tblw_ws_trie.Match_pos(); if (nxt_pos < src_len) { // bounds check switch (src[ nxt_pos]) { // check that next char is "end" of xnde name; guard against false // matches like "<trk" PAGE:de.v:Via_Jutlandica/Gpx DATE:2014-11-29 case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // whitespace case Byte_ascii.Slash: case Byte_ascii.Gt: // end node case Byte_ascii.Quote: case Byte_ascii.Apos: // quotes if (bgn_pos != Xop_parser_.Doc_bgn_bos) ctx.Para().Process_nl(ctx, root, src, bgn_pos, cur_pos); return ctx.Xnde().Make_tkn(ctx, tkn_mkr, root, src, src_len, txt_pos, txt_pos + 1); } } break; default: { int tblw_rv = ctx.Tblw() .Make_tkn_bgn( ctx, tkn_mkr, root, src, src_len, bgn_pos, txt_pos + ws_itm.Hook_len(), false, tblw_type, Xop_tblw_wkr.Called_from_pre, -1, -1); if (tblw_rv != -1) // \n\s| is valid tblw tkn and processed; otherwise process pre-code below; // EX:w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14 return tblw_rv; break; } } } // NOTE: pre lxr emulates MW for "\n\s" by (1) calling Process nl for "\n"; (2) anticipating // next line by setting prv_ws_bgn // EX: "\na\n b\n"; note that "\n " is cur if (bgn_pos != Xop_parser_.Doc_bgn_bos) // if bos, then don't close 1st para Process_nl( ctx, root, src, bgn_pos, bgn_pos + 1); // note that tkn is \n\s; so, bgn_pos -> bgn_pos + 1 is \n ... if (cur_mode == Mode_pre) // in pre_mode ctx.Subs_add( root, tkn_mkr.Space( root, cur_pos, txt_pos)); // cur_pos to start after \s; do not capture "\s" in "\n\s"; (not sure why // not before \s) prv_ws_bgn = txt_pos - cur_pos + 1; return txt_pos; }
public void Process_nl( Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos) { // REF.MW:Parser.php|doBlockLevels Dd_clear(ctx); if (block_is_bgn_xnde || block_is_end_xnde) { para_stack = Para_stack_none; // MW: $paragraphStack = false; Prv_para_end(); // MW: $output .= $this->closeParagraph() if (block_is_bgn_blockquote && !block_is_end_blockquote) // MW: if ( $preOpenMatch and !$preCloseMatch ) in_blockquote = true; // MW: $this->mInPre = true; else in_blockquote = false; // XO: turn off blockquote else following para / nl won't work; // w:Snappy_(software); DATE:2014-04-25 in_block = !block_is_end_xnde; // MW: $inBlockElem = !$closematch; } else if (!in_block && !in_blockquote) { // MW: elseif ( !$inBlockElem && !$this->mInPre ) { boolean line_is_ws = Line_is_ws(src, bgn_pos); if (prv_ws_bgn > 0 && (cur_mode == Mode_pre || !line_is_ws)) { // MW: if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === // 'pre' || trim( $t ) != '' ) ) { if (cur_mode != Mode_pre) { // MW: if ( $this->mLastSection !== 'pre' ) { para_stack = Para_stack_none; // MW: $paragraphStack = false; prv_para.Space_bgn_( prv_ws_bgn - 1); // -1 to ignore 1st "\s" in "\n\s"; note that prv_ws_bgn only includes // spaces, so BOS doesn't matter; DATE:2014-04-14 Prv_para_end(); Prv_para_bgn(Xop_para_tkn.Tid_pre); // MW: $output .= $this->closeParagraph() . '<pre>'; cur_mode = Mode_pre; // MW: $this->mLastSection = 'pre'; } else { // already in pre if (line_is_ws) { // line is entirely ws int next_char_pos = prv_nl_pos + 2; // "\n\s".length if (next_char_pos < src.length // bounds check && src[next_char_pos] == Byte_ascii.Nl // is "\n\s\n"; i.e.: "\n" only ) { ctx.Subs_add( root, ctx.Tkn_mkr() .Bry_raw( bgn_pos, bgn_pos, Byte_ascii .Nl_bry)); // add a "\n" tkn; note that adding a NewLine tkn doesn't // work, b/c Xoh_html_wtr has code to remove consecutive // \n; PAGE:en.w:Preferred_numbers DATE:2014-06-24 prv_nl_pos = bgn_pos; } } } prv_ws_bgn = 0; // MW: $t = substr( $t, 1 ); } else { if (bgn_pos - prv_nl_pos == 1 || line_is_ws) { // line is blank ("b" for blank) MW: if ( trim( $t ) === '' ) { if (para_stack != Para_stack_none) { // "b1"; stack has "<p>" or "</p><p>"; output "<br/>"; MW: if ( // $paragraphStack ) { Para_stack_end(cur_pos); Add_br(ctx, root, bgn_pos); // MW: $output .= $paragraphStack . '<br />'; para_stack = Para_stack_none; // MW: $paragraphStack = false; cur_mode = Mode_para; // MW: $this->mLastSection = 'p'; } else { // stack is empty if (cur_mode != Mode_para) { // "b2"; cur is '' or <pre> MW: if ( $this->mLastSection !== // 'p' ) { Prv_para_end(); // MW: $output .= $this->closeParagraph(); cur_mode = Mode_none; // MW: $this->mLastSection = ''; para_stack = Para_stack_bgn; // put <p> on stack MW: $paragraphStack = '<p>'; } else // "b3"; cur is p para_stack = Para_stack_mid; // put </p><p> on stack MW: $paragraphStack = '</p><p>'; } } else { // line has text ("t" for text); NOTE: tkn already added before \n, so must change // prv_para; EX: "a\n" -> this code is called for "\n" but "a" already processed if (para_stack != Para_stack_none) { // "t1" MW: if ( $paragraphStack ) { Para_stack_end(cur_pos); // MW: $output .= $paragraphStack; para_stack = Para_stack_none; // MW: $paragraphStack = false; cur_mode = Mode_para; // MW: $this->mLastSection = 'p'; } else if (cur_mode != Mode_para) { // "t2"; cur is '' or <pre> MW: elseif ( $this->mLastSection // !== 'p' ) { Prv_para_end(); Prv_para_bgn( Xop_para_tkn .Tid_para); // MW: $output .= $this->closeParagraph() . '<p>'; cur_mode = Mode_para; // MW: $this->mLastSection = 'p'; } else { } // "t3" } } } if (in_blockquote && prv_ws_bgn > 0) // handle blockquote separate; EX: <blockquote>\n\sa\n</blockquote>; note that "\s" // needs to be added literally; MW doesn't have this logic specifically, since it // assumes all characters go into $output, whereas XO, sets aside the "\s" in // "\n\s" separately prv_para.Space_bgn_(prv_ws_bgn); prv_ws_bgn = 0; // nl encountered and processed; always prv_ws_bgn set to 0, else ws from one line will // carry over to next // in_blockquote = false; block_is_bgn_xnde = block_is_end_xnde = false; // if ( $preCloseMatch && $this->mInPre ) // $this->mInPre = false; // prv_ws_bgn = false; Prv_para_new(ctx, root, bgn_pos, cur_pos); // add a prv_para placeholder if (para_stack == Para_stack_none) // "x1" MW: if ( $paragraphStack === false ) { if (prv_para != null) prv_para .Nl_bgn_y_(); // add nl; note that "$t" has already been processed; MW: $output .= $t . // "\n"; }