public void Xtn_parse( Xowe_wiki wiki, Xop_ctx ctx, Xop_root_tkn root, byte[] src, Xop_xnde_tkn xnde) { Xox_xnde_.Xatr__set(wiki, this, xatrs_hash, src, xnde); this.html = Xop_parser_.Parse_text_to_html( wiki, ctx, ctx.Page(), ctx.Page().Ttl(), Bry_.Mid(src, xnde.Tag_open_end(), xnde.Tag_close_bgn()), false); Indicator_html_bldr html_bldr = ctx.Page().Html_data().Indicators(); if (this.name != null) html_bldr.Add(this); // NOTE: must do null-check b/c Add will use Name as key for hashtable }
public int MakeTkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int lxr_bgn_pos, int lxr_cur_pos) { if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_brack_bgn) // WORKAROUND: ignore }} if inside lnki; EX.CM:Template:Protected; {{#switch:a|b=[[a|ja=}}]]}} return ctx.Lxr_make_txt_(lxr_cur_pos); int lxr_end_pos = Bry_finder.Find_fwd_while(src, lxr_cur_pos, src_len, Byte_ascii.Curly_end); // NOTE: can be many consecutive }; EX: {{a|{{{1}}}}} int end_tkn_len = lxr_end_pos - lxr_bgn_pos; boolean vnt_enabled = ctx.Wiki().Lang().Vnt_mgr().Enabled(); while (end_tkn_len > 0) { int acs_pos = -1, acs_len = ctx.Stack_len(); for (int i = acs_len - 1; i > -1; i--) { // find auto-close pos Xop_tkn_itm stack_tkn = ctx.Stack_get(i); switch (stack_tkn.Tkn_tid()) { case Xop_tkn_itm_.Tid_tmpl_curly_bgn: // found curly_bgn; mark and exit acs_pos = i; i = -1; break; case Xop_tkn_itm_.Tid_brack_bgn: // found no curly_bgn, but found brack_bgn; note that extra }} should not close any frames beyond lnki; EX:w:Template:Cite wikisource; w:John Fletcher (playwright) i = -1; break; case Xop_tkn_itm_.Tid_xnde: // found xnde; ignore; handle {{template|<poem>}}</poem>}} DATE:2014-03-03 Xop_xnde_tkn stack_xnde = (Xop_xnde_tkn)stack_tkn; if (stack_xnde.Tag().Xtn()) i = -1; break; } } if (acs_pos == -1) { // "}}+" found but no "{{+" found; warn and output literal tkn ctx.Msg_log().Add_itm_none(Xop_curly_log.Bgn_not_found, src, lxr_bgn_pos, lxr_end_pos); ctx.Subs_add(root, tkn_mkr.Txt(lxr_bgn_pos, lxr_end_pos)); return lxr_end_pos; } Xop_curly_bgn_tkn bgn_tkn = (Xop_curly_bgn_tkn)ctx.Stack_pop_til(root, src, acs_pos, true, lxr_bgn_pos, lxr_end_pos, Xop_tkn_itm_.Tid_tmpl_curly_bgn); // NOTE: in theory, an unclosed [[ can be on stack; for now, ignore int bgn_tkn_len = bgn_tkn.Src_end() - bgn_tkn.Src_bgn(); int bgn_tkn_pos_bgn = bgn_tkn.Src_bgn();// save original pos_bgn boolean vnt_dash_adjust = false; if (vnt_enabled ) { int curly_bgn_dash = bgn_tkn.Src_bgn() - 1; if (curly_bgn_dash > -1 && src[curly_bgn_dash] == Byte_ascii.Dash) { // "-" exists before curlies; EX: "-{{" int curly_end_dash = lxr_end_pos; if (curly_end_dash < src_len && src[curly_end_dash] == Byte_ascii.Dash) { // "-" exists after curlies; EX: "}}-" if (bgn_tkn_len > 2 && end_tkn_len > 2) { // more than 3 curlies at bgn / end with flanking dashes; EX: "-{{{ }}}-"; NOTE: 3 is needed b/c 2 will never be reduced; EX: "-{{" will always be "-" and "{{", not "-{" and "{" int numeric_val = Bry_.Xto_int_or(src, bgn_tkn.Src_end(), lxr_bgn_pos, -1); if ( numeric_val != -1 // do not apply if numeric val; EX:"-{{{0}}}-" vs "-{{{#expr:0}}}-" sr.w:Template:Link_FA && bgn_tkn_len == 3 && end_tkn_len == 3 // exactly 3 tokens; assume param token; "-{{{" -> "-" + "{{{" x> -> "-{" + "{{"; if unbalanced (3,4 or 4,3) fall into code below ) { } // noop; PAGE:sr.w:ДНК; EX:<span id="interwiki-{{{1}}}-fa"></span> DATE:2014-07-03 else { --bgn_tkn_len; // reduce bgn curlies by 1; EX: "{{{" -> "{{" ++bgn_tkn_pos_bgn; // add one to bgn tkn_pos; --end_tkn_len; // reduce end curlies by 1; EX: "}}}" -> "}}" --lxr_end_pos; // reduce end by 1; this will "reprocess" the final "}" as a text tkn; EX: "}}}-" -> "}}" and position before "}-" vnt_dash_adjust = true; } } } } } int new_tkn_len = 0; if (bgn_tkn_len == end_tkn_len) // exact match; should be majority of cases new_tkn_len = bgn_tkn_len; else if (bgn_tkn_len > end_tkn_len) // more bgn than end; use end, and deduct bgn; EX: {{{{{1}}}|a}} new_tkn_len = end_tkn_len; else /*bgn_tkn_len < end_tkn_len*/ // more end than bgn; use bgn, and deduct end; EX: {{a|{{{1}}}}} new_tkn_len = bgn_tkn_len; int keep_curly_bgn = 0; /* NOTE: this is a semi-hack; if bgn_tkn > new_tkn, then pretend bgn_tkn fits new_tkn, give to bldr, and then adjust back later EX: {{{{{1}}}|a}} -> bgn_tkn_len=5,new_tkn_len=3 -> change bgn(0, 5) to bgn(2, 5) The "correct" way is to insert a new_bgn_tkn after cur_bgn_tkn on root, but this would have performance implications: array would have to be resized, and all subs will have to be reindexed NOTE: bgn curlies should also be preserved if new_tkn_len > 3; EX: {{{{{{1}}}}}}; note that bgn = end, but len > 3 */ if (bgn_tkn_len > new_tkn_len || new_tkn_len > 3) { bgn_tkn.Tkn_ini_pos(false, bgn_tkn.Src_end() - new_tkn_len, bgn_tkn.Src_end()); keep_curly_bgn = 1; // preserves {{ } switch (new_tkn_len) { case 0: // EXC_CASE: should not happen; warn; ctx.Msg_log().Add_itm_none(Xop_curly_log.Bgn_len_0, src, bgn_tkn.Src_bgn(), lxr_end_pos); break; // case 1: // EXC_CASE: SEE:NOTE_1; // break; case 2: // USE_CASE: make invk_tkn ctx.Invk().Make_tkn(ctx, root, src, lxr_bgn_pos, lxr_bgn_pos + new_tkn_len, bgn_tkn, keep_curly_bgn); break; default: // USE_CASE: make prm_tkn; NOTE: 3 or more new_tkn_len = 3; // gobble 3 at a time; EX: 6 -> 3 -> 0; EX: 7 -> 4 -> 1; prm_wkr.Make_tkn(ctx, tkn_mkr, root, src, src_len, lxr_bgn_pos, lxr_bgn_pos + new_tkn_len, bgn_tkn, keep_curly_bgn); break; } switch (bgn_tkn_len - new_tkn_len) { // continuation of semi-hack above; some bgn still left over; adjust and throw back on stack case 1: // 1 tkn; convert curly to generic text tkn bgn_tkn.Src_end_(bgn_tkn.Src_end() - 1); // NOTE: shorten end of bgn_tkn by 1; TEST ctx.Stack_add(tkn_mkr.Txt(bgn_tkn_pos_bgn, bgn_tkn.Src_end() - new_tkn_len)); break; case 0: // noop break; default: bgn_tkn.Tkn_ini_pos(false, bgn_tkn_pos_bgn, bgn_tkn.Src_end() - new_tkn_len); // bgn(2, 5) -> bgn (0, 2) ctx.Stack_add(bgn_tkn); break; } if (vnt_dash_adjust) { Xop_tkn_itm text_tkn = root.Subs_get_or_null(root.Subs_len() - 2); // -2 to get tkn before newly-created tmpl / prm if (text_tkn == null || text_tkn.Tkn_tid() != Xop_tkn_itm_.Tid_txt) ctx.Wiki().Appe().Usr_dlg().Warn_many("", "", "token before curly_bgn was not text tkn; src=~{0}", String_.new_u8(src, lxr_bgn_pos, lxr_end_pos)); else text_tkn.Src_end_(text_tkn.Src_end() + 1); // +1 to extend txt_tkn with dash be 1 to include curly; EX: "-" "{{{" -> "-{" "{{" } end_tkn_len -= new_tkn_len; lxr_bgn_pos += new_tkn_len; // move lxr_bgn_pos along if (end_tkn_len == 1) { // SEE:NOTE_1: ctx.Subs_add(root, tkn_mkr.Txt(lxr_bgn_pos, lxr_bgn_pos + 1)); end_tkn_len = 0; ++lxr_bgn_pos; } } return lxr_end_pos; }
public boolean Bld( Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_arg_wkr wkr, int wkr_typ, Xop_root_tkn root, Xop_tkn_itm tkn, int bgn_pos, int cur_pos, int loop_bgn, int loop_end, byte[] src) { boolean ws_bgn_chk = true, colon_chk = false, itm_is_static = true, key_exists = false; int ws_bgn_idx = -1, ws_end_idx = -1, cur_itm_subs_len = 0, cur_nde_idx = -1; Arg_nde_tkn cur_nde = null; Arg_itm_tkn cur_itm = null; int brack_count = 0; Xop_tkn_itm eq_pending = null; for (int i = loop_bgn; i < loop_end; i++) { // loop over subs between bookends; if lnki, all tkns between [[ and ]]; if tmpl, {{ // and }} Xop_tkn_itm sub = root.Subs_get(i); int sub_pos_bgn = sub.Src_bgn_grp(root, i); if (cur_nde == null) { cur_nde = tkn_mkr.ArgNde(++cur_nde_idx, sub_pos_bgn); brack_count = 0; key_exists = false; } if (cur_itm == null) { cur_itm = tkn_mkr.ArgItm(sub_pos_bgn, -1); itm_is_static = ws_bgn_chk = true; cur_itm_subs_len = 0; ws_bgn_idx = ws_end_idx = -1; if (eq_pending != null) { // something like "A==B" encountered; zh.w:Wikipedia:条目评选; DATE:2014-08-27 eq_pending.Src_end_(eq_pending.Src_end() - 1); // remove an "=" EX:"A==B" -> "A","=","=B" cur_itm.Subs_add_grp(eq_pending, root, i); cur_itm_subs_len++; // add the tkn to cur_itm eq_pending = null; } } switch (sub.Tkn_tid()) { case Xop_tkn_itm_ .Tid_ignore: // comment or *include* tkn; mark itm as non_static for tmpl (forces // re-eval) switch (wkr_typ) { case Xop_arg_wkr_.Typ_tmpl: case Xop_arg_wkr_.Typ_prm: itm_is_static = false; break; } break; case Xop_tkn_itm_ .Tid_para: // NOTE: para can appear in following: [[File:A.png| \n 40px]]; EX: // w:Supreme_Court_of_the_United_States; DATE:2014-04-05 case Xop_tkn_itm_.Tid_newLine: case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab: // whitespace if (ws_bgn_chk) ws_bgn_idx = cur_itm_subs_len; // definite ws at bgn; set ws_bgn_idx, and keep setting until text // tkn reached; handles mixed sequence of \s\n\t where last tkn // should be ws_bgn_idx else { if (ws_end_idx == -1) ws_end_idx = cur_itm_subs_len; } ; // possible ws at end; may be overriden later; see AdjustWsForTxtTkn break; case Xop_tkn_itm_.Tid_colon: if (wkr_typ == Xop_arg_wkr_ .Typ_tmpl) { // treat colons as text; tmpl will do its own : parsing for 1st arg; // NOTE: must do ws check else 2nd colon will break; EX: "{{#ifeq: // :|a|b|c}}"; DATE:2013-12-10 if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn } else { if (cur_nde_idx == 0 && !colon_chk) { // if 1st arg, mark colon pos; needed for lnki; EX: [[Category:A]]; // {{#ifeq:1}} colon_chk = true; cur_nde.Arg_colon_pos_(sub_pos_bgn); } } break; case Xop_tkn_itm_.Tid_brack_bgn: ++brack_count; if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn break; case Xop_tkn_itm_.Tid_brack_end: --brack_count; if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn break; case Xop_tkn_itm_.Tid_eq: if (wkr_typ == Xop_arg_wkr_.Typ_tmpl && brack_count > 0) { } else if (wkr_typ == Xop_arg_wkr_.Typ_prm) { } // always ignore for prm else { if (cur_nde_idx != 0 // if 1st arg, treat equal_tkn as txt_tkn; i.e.: eq should not be used to // separate key/val && cur_nde.Eq_tkn() == Xop_tkn_null .Null_tkn // only mark key if key is not set; handle multiple-keys; EX: // {{name|key1=b=c}}; DATE:2014-02-09 ) { Xop_eq_tkn sub_as_eq = (Xop_eq_tkn) sub; int sub_as_eq_len = sub_as_eq.Eq_len(); boolean eq_is_spr = sub_as_eq_len == 1 // eq with len of 1 are considered separators; // MW.REF:Preprocessor_DOM.php|preprocessToXml; "if ( $count == 1 && // $findEquals )" PAGE:en.w:Wikipedia:Picture_of_the_day/June_2014; // DATE:2014-07-21 || (cur_itm.Subs_len() > 0 // or eq.len > 1 that occur later in itm; EX: a==b; // zh.w:Wikipedia:条目评选; DATE:2014-08-27 && cur_itm.Subs_get(0).Tkn_tid() != Xop_tkn_itm_ .Tid_eq // and 1st tkn is not ==; EX:==a==; 2nd == should not be // eq b/c 1st == "deactivates" nde; DATE:2014-08-27 ); if (eq_is_spr) { if (sub_as_eq_len == 1) // =.len == 1 cur_nde.Eq_tkn_(sub); // set as eq tkn else // =.len > 1 eq_pending = sub; // do not set as eq tkn; note that Eq_tkn exists for bookkeeping and is // not printed out, key_exists = true; Arg_itm_end( ctx, cur_nde, cur_itm, ws_bgn_idx, ws_end_idx, cur_itm_subs_len, sub_pos_bgn, wkr_typ, key_exists, true, itm_is_static, src, cur_nde_idx); cur_nde.Key_tkn_(cur_itm); cur_itm = null; continue; // do not add tkn to cur_itm } } if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn break; } break; case Xop_tkn_itm_.Tid_pipe: if (cur_nde_idx == 0 && ws_bgn_chk && !colon_chk && wkr_typ == Xop_arg_wkr_.Typ_tmpl) return false; // 1st arg, but no name; EX: "{{|a}}", "{{ }}"; disregard if lnki, since // "[[|a]]" is valid if (wkr_typ == Xop_arg_wkr_.Typ_tmpl && brack_count > 0) { break; } else { Arg_itm_end( ctx, cur_nde, cur_itm, ws_bgn_idx, ws_end_idx, cur_itm_subs_len, sub_pos_bgn, wkr_typ, key_exists, false, itm_is_static, src, cur_nde_idx); cur_nde.Val_tkn_(cur_itm); if (!wkr.Args_add(ctx, src, tkn, cur_nde, cur_nde_idx)) return false; // NOTE: if invalid, exit now; lnki_wkr expects false if any argument is // invalid; DATE:2014-06-06 cur_nde = null; cur_itm = null; key_exists = false; // reset continue; // do not add tkn to cur_itm } case Xop_tkn_itm_ .Tid_tmpl_prm: // nested prm (3 {) or invk (2 {); mark itm_is_static = false and treat // tkn as txt case Xop_tkn_itm_.Tid_tmpl_invk: itm_is_static = false; if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn break; case Xop_tkn_itm_.Tid_xnde: Xop_xnde_tkn sub_as_xnde = (Xop_xnde_tkn) sub; switch (sub_as_xnde.Tag().Id()) { case Xop_xnde_tag_.Tid__noinclude: case Xop_xnde_tag_.Tid__includeonly: case Xop_xnde_tag_.Tid__onlyinclude: itm_is_static = false; break; } if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn break; default: if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn break; } cur_itm.Subs_add_grp(sub, root, i); cur_itm_subs_len++; } if (brack_count > 0) return false; if (cur_nde == null) // occurs when | is last tkn; EX: {{name|a|}}; cur_nde = tkn_mkr.ArgNde(++cur_nde_idx, bgn_pos); if (cur_itm == null) { // occurs when = is last tkn; EX: {{name|a=}}; cur_itm = tkn_mkr.ArgItm(bgn_pos, -1); itm_is_static = ws_bgn_chk = true; cur_itm_subs_len = 0; ws_bgn_idx = ws_end_idx = -1; key_exists = false; } Arg_itm_end( ctx, cur_nde, cur_itm, ws_bgn_idx, ws_end_idx, cur_itm_subs_len, bgn_pos, wkr_typ, key_exists, false, itm_is_static, src, cur_nde_idx); cur_nde.Val_tkn_(cur_itm); return wkr.Args_add(ctx, src, tkn, cur_nde, cur_nde_idx); }