public void Process_lnki_category( Xop_ctx ctx, Xop_root_tkn root, byte[] src, int pos, int src_len) { // REF.MW:Parser.php|replaceInternalLinks2|Strip the whitespace Category links // produce; if (!para_enabled) return; int subs_len = root.Subs_len(); for (int i = subs_len - 2; i > -1; i--) { // -2: -1 b/c subs_len is invalid; -1 to skip current lnki Xop_tkn_itm sub_tkn = root.Subs_get(i); switch (sub_tkn.Tkn_tid()) { case Xop_tkn_itm_ .Tid_para: // nl found; note this means that BOL -> [[Category:]] is all ws; if (prv_ws_bgn > 0) { // line begins with ws a if (sub_tkn.Src_bgn() != 0) // do not ignore BOS para; needed b/c it is often <p>; needed for test; sub_tkn.Ignore_y_(); // ignore nl (pretty-printing only) prv_ws_bgn = 0; // remove ws if (ctx.Stack_has( Xop_tkn_itm_ .Tid_list)) { // HACK: if in list, set prv_nl_pos to EOL; only here for one test // to pass int nl_at_eol = -1; for (int j = pos; j < src_len; j++) { // check if rest of line is ws byte b = src[j]; switch (b) { case Byte_ascii.Space: case Byte_ascii.Tab: break; // ignore space / tab case Byte_ascii.Nl: nl_at_eol = j; j = src_len; break; default: // something else besides ws; stop j = src_len; break; } if (nl_at_eol != -1) prv_nl_pos = nl_at_eol + 1; // SEE:NOTE_2 } } } return; default: // exit if anything except para / nl in front of [[Category:]] i = -1; break; } } // if (para_found) // BOS exit; just remove prv_ws_bgn prv_ws_bgn = 0; }
public boolean Bld( Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_arg_wkr wkr, int wkr_typ, Xop_root_tkn root, Xop_tkn_itm tkn, int bgn_pos, int cur_pos, int loop_bgn, int loop_end, byte[] src) { boolean ws_bgn_chk = true, colon_chk = false, itm_is_static = true, key_exists = false; int ws_bgn_idx = -1, ws_end_idx = -1, cur_itm_subs_len = 0, cur_nde_idx = -1; Arg_nde_tkn cur_nde = null; Arg_itm_tkn cur_itm = null; int brack_count = 0; Xop_tkn_itm eq_pending = null; for (int i = loop_bgn; i < loop_end; i++) { // loop over subs between bookends; if lnki, all tkns between [[ and ]]; if tmpl, {{ // and }} Xop_tkn_itm sub = root.Subs_get(i); int sub_pos_bgn = sub.Src_bgn_grp(root, i); if (cur_nde == null) { cur_nde = tkn_mkr.ArgNde(++cur_nde_idx, sub_pos_bgn); brack_count = 0; key_exists = false; } if (cur_itm == null) { cur_itm = tkn_mkr.ArgItm(sub_pos_bgn, -1); itm_is_static = ws_bgn_chk = true; cur_itm_subs_len = 0; ws_bgn_idx = ws_end_idx = -1; if (eq_pending != null) { // something like "A==B" encountered; zh.w:Wikipedia:条目评选; DATE:2014-08-27 eq_pending.Src_end_(eq_pending.Src_end() - 1); // remove an "=" EX:"A==B" -> "A","=","=B" cur_itm.Subs_add_grp(eq_pending, root, i); cur_itm_subs_len++; // add the tkn to cur_itm eq_pending = null; } } switch (sub.Tkn_tid()) { case Xop_tkn_itm_ .Tid_ignore: // comment or *include* tkn; mark itm as non_static for tmpl (forces // re-eval) switch (wkr_typ) { case Xop_arg_wkr_.Typ_tmpl: case Xop_arg_wkr_.Typ_prm: itm_is_static = false; break; } break; case Xop_tkn_itm_ .Tid_para: // NOTE: para can appear in following: [[File:A.png| \n 40px]]; EX: // w:Supreme_Court_of_the_United_States; DATE:2014-04-05 case Xop_tkn_itm_.Tid_newLine: case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab: // whitespace if (ws_bgn_chk) ws_bgn_idx = cur_itm_subs_len; // definite ws at bgn; set ws_bgn_idx, and keep setting until text // tkn reached; handles mixed sequence of \s\n\t where last tkn // should be ws_bgn_idx else { if (ws_end_idx == -1) ws_end_idx = cur_itm_subs_len; } ; // possible ws at end; may be overriden later; see AdjustWsForTxtTkn break; case Xop_tkn_itm_.Tid_colon: if (wkr_typ == Xop_arg_wkr_ .Typ_tmpl) { // treat colons as text; tmpl will do its own : parsing for 1st arg; // NOTE: must do ws check else 2nd colon will break; EX: "{{#ifeq: // :|a|b|c}}"; DATE:2013-12-10 if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn } else { if (cur_nde_idx == 0 && !colon_chk) { // if 1st arg, mark colon pos; needed for lnki; EX: [[Category:A]]; // {{#ifeq:1}} colon_chk = true; cur_nde.Arg_colon_pos_(sub_pos_bgn); } } break; case Xop_tkn_itm_.Tid_brack_bgn: ++brack_count; if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn break; case Xop_tkn_itm_.Tid_brack_end: --brack_count; if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn break; case Xop_tkn_itm_.Tid_eq: if (wkr_typ == Xop_arg_wkr_.Typ_tmpl && brack_count > 0) { } else if (wkr_typ == Xop_arg_wkr_.Typ_prm) { } // always ignore for prm else { if (cur_nde_idx != 0 // if 1st arg, treat equal_tkn as txt_tkn; i.e.: eq should not be used to // separate key/val && cur_nde.Eq_tkn() == Xop_tkn_null .Null_tkn // only mark key if key is not set; handle multiple-keys; EX: // {{name|key1=b=c}}; DATE:2014-02-09 ) { Xop_eq_tkn sub_as_eq = (Xop_eq_tkn) sub; int sub_as_eq_len = sub_as_eq.Eq_len(); boolean eq_is_spr = sub_as_eq_len == 1 // eq with len of 1 are considered separators; // MW.REF:Preprocessor_DOM.php|preprocessToXml; "if ( $count == 1 && // $findEquals )" PAGE:en.w:Wikipedia:Picture_of_the_day/June_2014; // DATE:2014-07-21 || (cur_itm.Subs_len() > 0 // or eq.len > 1 that occur later in itm; EX: a==b; // zh.w:Wikipedia:条目评选; DATE:2014-08-27 && cur_itm.Subs_get(0).Tkn_tid() != Xop_tkn_itm_ .Tid_eq // and 1st tkn is not ==; EX:==a==; 2nd == should not be // eq b/c 1st == "deactivates" nde; DATE:2014-08-27 ); if (eq_is_spr) { if (sub_as_eq_len == 1) // =.len == 1 cur_nde.Eq_tkn_(sub); // set as eq tkn else // =.len > 1 eq_pending = sub; // do not set as eq tkn; note that Eq_tkn exists for bookkeeping and is // not printed out, key_exists = true; Arg_itm_end( ctx, cur_nde, cur_itm, ws_bgn_idx, ws_end_idx, cur_itm_subs_len, sub_pos_bgn, wkr_typ, key_exists, true, itm_is_static, src, cur_nde_idx); cur_nde.Key_tkn_(cur_itm); cur_itm = null; continue; // do not add tkn to cur_itm } } if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn break; } break; case Xop_tkn_itm_.Tid_pipe: if (cur_nde_idx == 0 && ws_bgn_chk && !colon_chk && wkr_typ == Xop_arg_wkr_.Typ_tmpl) return false; // 1st arg, but no name; EX: "{{|a}}", "{{ }}"; disregard if lnki, since // "[[|a]]" is valid if (wkr_typ == Xop_arg_wkr_.Typ_tmpl && brack_count > 0) { break; } else { Arg_itm_end( ctx, cur_nde, cur_itm, ws_bgn_idx, ws_end_idx, cur_itm_subs_len, sub_pos_bgn, wkr_typ, key_exists, false, itm_is_static, src, cur_nde_idx); cur_nde.Val_tkn_(cur_itm); if (!wkr.Args_add(ctx, src, tkn, cur_nde, cur_nde_idx)) return false; // NOTE: if invalid, exit now; lnki_wkr expects false if any argument is // invalid; DATE:2014-06-06 cur_nde = null; cur_itm = null; key_exists = false; // reset continue; // do not add tkn to cur_itm } case Xop_tkn_itm_ .Tid_tmpl_prm: // nested prm (3 {) or invk (2 {); mark itm_is_static = false and treat // tkn as txt case Xop_tkn_itm_.Tid_tmpl_invk: itm_is_static = false; if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn break; case Xop_tkn_itm_.Tid_xnde: Xop_xnde_tkn sub_as_xnde = (Xop_xnde_tkn) sub; switch (sub_as_xnde.Tag().Id()) { case Xop_xnde_tag_.Tid__noinclude: case Xop_xnde_tag_.Tid__includeonly: case Xop_xnde_tag_.Tid__onlyinclude: itm_is_static = false; break; } if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn break; default: if (ws_bgn_chk) ws_bgn_chk = false; else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn break; } cur_itm.Subs_add_grp(sub, root, i); cur_itm_subs_len++; } if (brack_count > 0) return false; if (cur_nde == null) // occurs when | is last tkn; EX: {{name|a|}}; cur_nde = tkn_mkr.ArgNde(++cur_nde_idx, bgn_pos); if (cur_itm == null) { // occurs when = is last tkn; EX: {{name|a=}}; cur_itm = tkn_mkr.ArgItm(bgn_pos, -1); itm_is_static = ws_bgn_chk = true; cur_itm_subs_len = 0; ws_bgn_idx = ws_end_idx = -1; key_exists = false; } Arg_itm_end( ctx, cur_nde, cur_itm, ws_bgn_idx, ws_end_idx, cur_itm_subs_len, bgn_pos, wkr_typ, key_exists, false, itm_is_static, src, cur_nde_idx); cur_nde.Val_tkn_(cur_itm); return wkr.Args_add(ctx, src, tkn, cur_nde, cur_nde_idx); }
private void Arg_itm_end( Xop_ctx ctx, Arg_nde_tkn nde, Arg_itm_tkn itm, int ws_bgn_idx, int ws_end_idx, int subs_len, int lxr_bgn, int wkr_typ, boolean key_exists, boolean cur_itm_is_key, boolean itm_is_static, byte[] src, int arg_idx) { // PURPOSE: mark tkns Ignore; find dat_bgn, dat_end int dat_bgn = itm.Src_bgn(), dat_end = lxr_bgn; boolean trim = false; // trim ws at bgn boolean wkr_is_not_prm = wkr_typ != Xop_arg_wkr_.Typ_prm; if (ws_bgn_idx != -1) { // ignore ws if (ws_found_at_bgn && (tmpl_arg || (lnki_arg && key))); lnki_arg && // val does not ignore at bgn; EX: [[alt= a b c]] -> " a b c" switch (wkr_typ) { case Xop_arg_wkr_.Typ_prm: trim = arg_idx == 0; break; case Xop_arg_wkr_.Typ_tmpl: trim = key_exists || arg_idx == 0; break; case Xop_arg_wkr_.Typ_lnki: trim = cur_itm_is_key || !key_exists; break; // NOTE: trim if "a= b"; skip if " a=b" or " a" } if (trim) { for (int i = 0; i <= ws_bgn_idx; i++) { Xop_tkn_itm sub_tkn = itm.Subs_get(i); // NOTE: tknTypeId should be space, newline, or tab if (wkr_is_not_prm) sub_tkn.Ignore_y_grp_(ctx, itm, i); // mark tkn ignore unless wkr is prm; SEE:NOTE_1 if (i == ws_bgn_idx) dat_bgn = sub_tkn.Src_end_grp(itm, i); // if last_tkn, set dat_bgn to bgn } } } // trim ws at end if (ws_end_idx != -1) { // ignore ws if (ws_found_at_end && (tmpl_arg || (lnki_arg && val))); lnki_arg && // key does not ignore at end; EX: [[alt =a b c]] -> unrecognized nde ("alt ") trim = false; switch (wkr_typ) { case Xop_arg_wkr_.Typ_prm: trim = arg_idx == 0; break; case Xop_arg_wkr_.Typ_tmpl: trim = key_exists || arg_idx == 0; break; // NOTE: never set "trim = true"; // PAGE:fr.w:Histoire_de_la_marine_française_sous_Louis_XV_et_Louis_XVI // DATE:2015-11-17 case Xop_arg_wkr_.Typ_lnki: trim = !cur_itm_is_key; break; } if (trim) { for (int i = ws_end_idx; i < subs_len; i++) { Xop_tkn_itm sub_tkn = itm.Subs_get(i); // NOTE: tknTypeId should be space, newline, or tab if (wkr_is_not_prm) sub_tkn.Ignore_y_grp_(ctx, itm, i); // mark tkn ignore unless wkr is prm; SEE:NOTE_1 if (i == ws_end_idx) dat_end = sub_tkn.Src_bgn_grp(itm, i); // if 1st_tkn; set dat_end to bgn } } } itm.Src_end_(lxr_bgn); nde.Src_end_(lxr_bgn); // NOTE: src_end is lxr_bgn; EX: {{a}} has src_end at 3; lxr_bgn for }} itm.Dat_rng_( dat_bgn, dat_end); // always set dat, even if itm has dynamic parts; EX: {{{ a{{{1}}}b }}} has 4,13, // not 3,14 (ignore ws) // if (itm_is_static) // itm.Dat_ary_(dat_end == dat_bgn ? Bry_.Empty : Bry_.Mid(src, dat_bgn, dat_end)); itm.Itm_static_(itm_is_static); }