Esempio n. 1
0
 public void Process_lnki_category(
     Xop_ctx ctx,
     Xop_root_tkn root,
     byte[] src,
     int pos,
     int src_len) { // REF.MW:Parser.php|replaceInternalLinks2|Strip the whitespace Category links
   // produce;
   if (!para_enabled) return;
   int subs_len = root.Subs_len();
   for (int i = subs_len - 2;
       i > -1;
       i--) { // -2: -1 b/c subs_len is invalid; -1 to skip current lnki
     Xop_tkn_itm sub_tkn = root.Subs_get(i);
     switch (sub_tkn.Tkn_tid()) {
       case Xop_tkn_itm_
           .Tid_para: // nl found; note this means that BOL -> [[Category:]] is all ws;
         if (prv_ws_bgn > 0) { // line begins with ws a
           if (sub_tkn.Src_bgn()
               != 0) // do not ignore BOS para; needed b/c it is often <p>; needed for test;
           sub_tkn.Ignore_y_(); // ignore nl (pretty-printing only)
           prv_ws_bgn = 0; // remove ws
           if (ctx.Stack_has(
               Xop_tkn_itm_
                   .Tid_list)) { // HACK: if in list, set prv_nl_pos to EOL; only here for one test
             // to pass
             int nl_at_eol = -1;
             for (int j = pos; j < src_len; j++) { // check if rest of line is ws
               byte b = src[j];
               switch (b) {
                 case Byte_ascii.Space:
                 case Byte_ascii.Tab:
                   break; // ignore space / tab
                 case Byte_ascii.Nl:
                   nl_at_eol = j;
                   j = src_len;
                   break;
                 default: // something else besides ws; stop
                   j = src_len;
                   break;
               }
               if (nl_at_eol != -1) prv_nl_pos = nl_at_eol + 1; // SEE:NOTE_2
             }
           }
         }
         return;
       default: // exit if anything except para / nl in front of [[Category:]]
         i = -1;
         break;
     }
   }
   //			if (para_found)	// BOS exit; just remove prv_ws_bgn
   prv_ws_bgn = 0;
 }
Esempio n. 2
0
	public int MakeTkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int lxr_bgn_pos, int lxr_cur_pos) {
		if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_brack_bgn)	// WORKAROUND: ignore }} if inside lnki; EX.CM:Template:Protected; {{#switch:a|b=[[a|ja=}}]]}}
			return ctx.Lxr_make_txt_(lxr_cur_pos);
		int lxr_end_pos = Bry_finder.Find_fwd_while(src, lxr_cur_pos, src_len, Byte_ascii.Curly_end);	// NOTE: can be many consecutive }; EX: {{a|{{{1}}}}}
		int end_tkn_len = lxr_end_pos - lxr_bgn_pos;
		boolean vnt_enabled = ctx.Wiki().Lang().Vnt_mgr().Enabled();
		while (end_tkn_len > 0) {
			int acs_pos = -1, acs_len = ctx.Stack_len();
			for (int i = acs_len - 1; i > -1; i--) {		// find auto-close pos
				Xop_tkn_itm stack_tkn = ctx.Stack_get(i);
				switch (stack_tkn.Tkn_tid()) {
					case Xop_tkn_itm_.Tid_tmpl_curly_bgn:	// found curly_bgn; mark and exit
						acs_pos = i;
						i = -1;
						break;
					case Xop_tkn_itm_.Tid_brack_bgn:		// found no curly_bgn, but found brack_bgn; note that extra }} should not close any frames beyond lnki; EX:w:Template:Cite wikisource; w:John Fletcher (playwright)
						i = -1;
						break;
					case Xop_tkn_itm_.Tid_xnde:				// found xnde; ignore; handle {{template|<poem>}}</poem>}} DATE:2014-03-03
						Xop_xnde_tkn stack_xnde = (Xop_xnde_tkn)stack_tkn;
						if (stack_xnde.Tag().Xtn())
							i = -1;
						break;
				}
			}
			if (acs_pos == -1) {	// "}}+" found but no "{{+" found; warn and output literal tkn
				ctx.Msg_log().Add_itm_none(Xop_curly_log.Bgn_not_found, src, lxr_bgn_pos, lxr_end_pos);
				ctx.Subs_add(root, tkn_mkr.Txt(lxr_bgn_pos, lxr_end_pos));
				return lxr_end_pos;
			}

			Xop_curly_bgn_tkn bgn_tkn = (Xop_curly_bgn_tkn)ctx.Stack_pop_til(root, src, acs_pos, true, lxr_bgn_pos, lxr_end_pos, Xop_tkn_itm_.Tid_tmpl_curly_bgn);	// NOTE: in theory, an unclosed [[ can be on stack; for now, ignore
			int bgn_tkn_len = bgn_tkn.Src_end() - bgn_tkn.Src_bgn();
			int bgn_tkn_pos_bgn = bgn_tkn.Src_bgn();// save original pos_bgn
			boolean vnt_dash_adjust = false;
			if (vnt_enabled ) {
				int curly_bgn_dash = bgn_tkn.Src_bgn() - 1;
				if (curly_bgn_dash > -1 && src[curly_bgn_dash] == Byte_ascii.Dash) {			// "-" exists before curlies; EX: "-{{"
					int curly_end_dash = lxr_end_pos;
					if (curly_end_dash < src_len && src[curly_end_dash] == Byte_ascii.Dash) {	// "-" exists after curlies;  EX: "}}-"
						if (bgn_tkn_len > 2 && end_tkn_len > 2) {	// more than 3 curlies at bgn / end with flanking dashes; EX: "-{{{ }}}-"; NOTE: 3 is needed b/c 2 will never be reduced; EX: "-{{" will always be "-" and "{{", not "-{" and "{"
							int numeric_val = Bry_.Xto_int_or(src, bgn_tkn.Src_end(), lxr_bgn_pos, -1);
							if (	numeric_val != -1						// do not apply if numeric val; EX:"-{{{0}}}-" vs "-{{{#expr:0}}}-" sr.w:Template:Link_FA
								&&	bgn_tkn_len == 3 && end_tkn_len == 3	// exactly 3 tokens; assume param token; "-{{{" -> "-" + "{{{" x> -> "-{" + "{{"; if unbalanced (3,4 or 4,3) fall into code below
								) {
							}												// noop; PAGE:sr.w:ДНК; EX:<span id="interwiki-{{{1}}}-fa"></span> DATE:2014-07-03
							else {
								--bgn_tkn_len;		// reduce bgn curlies by 1; EX: "{{{" -> "{{"
								++bgn_tkn_pos_bgn;	// add one to bgn tkn_pos;
								--end_tkn_len;		// reduce end curlies by 1; EX: "}}}" -> "}}"
								--lxr_end_pos;		// reduce end by 1; this will "reprocess" the final "}" as a text tkn; EX: "}}}-" -> "}}" and position before "}-"
								vnt_dash_adjust = true;
							}
						}
					}
				}
			}

			int new_tkn_len = 0;
			if		(bgn_tkn_len == end_tkn_len)	// exact match; should be majority of cases
				new_tkn_len = bgn_tkn_len;
			else if (bgn_tkn_len >  end_tkn_len)	// more bgn than end; use end, and deduct bgn; EX: {{{{{1}}}|a}}
				new_tkn_len = end_tkn_len;
			else   /*bgn_tkn_len <  end_tkn_len*/	// more end than bgn; use bgn, and deduct end; EX: {{a|{{{1}}}}}
				new_tkn_len = bgn_tkn_len;

			int keep_curly_bgn = 0;
			/* NOTE: this is a semi-hack; if bgn_tkn > new_tkn, then pretend bgn_tkn fits new_tkn, give to bldr, and then adjust back later
			EX: {{{{{1}}}|a}} -> bgn_tkn_len=5,new_tkn_len=3 -> change bgn(0, 5) to bgn(2, 5)
			The "correct" way is to insert a new_bgn_tkn after cur_bgn_tkn on root, but this would have performance implications: array would have to be resized, and all subs will have to be reindexed
			NOTE: bgn curlies should also be preserved if new_tkn_len > 3; EX: {{{{{{1}}}}}}; note that bgn = end, but len > 3
			*/
			if (bgn_tkn_len > new_tkn_len || new_tkn_len > 3) {
				bgn_tkn.Tkn_ini_pos(false, bgn_tkn.Src_end() - new_tkn_len, bgn_tkn.Src_end());
				keep_curly_bgn = 1;	// preserves {{
			}
			switch (new_tkn_len) {
				case 0:			// EXC_CASE: should not happen; warn;
					ctx.Msg_log().Add_itm_none(Xop_curly_log.Bgn_len_0, src, bgn_tkn.Src_bgn(), lxr_end_pos);
					break;		
//					case 1:			// EXC_CASE: SEE:NOTE_1;
//						break;
				case 2:			// USE_CASE: make invk_tkn
					ctx.Invk().Make_tkn(ctx, root, src, lxr_bgn_pos, lxr_bgn_pos + new_tkn_len, bgn_tkn, keep_curly_bgn);
					break;
				default:		// USE_CASE: make prm_tkn; NOTE: 3 or more
					new_tkn_len = 3;	// gobble 3 at a time; EX: 6 -> 3 -> 0; EX: 7 -> 4 -> 1;
					prm_wkr.Make_tkn(ctx, tkn_mkr, root, src, src_len, lxr_bgn_pos, lxr_bgn_pos + new_tkn_len, bgn_tkn, keep_curly_bgn);
					break;
			}
			switch (bgn_tkn_len - new_tkn_len) {	// continuation of semi-hack above; some bgn still left over; adjust and throw back on stack
				case 1: // 1 tkn; convert curly to generic text tkn
					bgn_tkn.Src_end_(bgn_tkn.Src_end() - 1);	// NOTE: shorten end of bgn_tkn by 1; TEST
					ctx.Stack_add(tkn_mkr.Txt(bgn_tkn_pos_bgn, bgn_tkn.Src_end() - new_tkn_len));
					break;
				case 0:	// noop
					break;
				default:
					bgn_tkn.Tkn_ini_pos(false, bgn_tkn_pos_bgn, bgn_tkn.Src_end() - new_tkn_len);	// bgn(2, 5) -> bgn (0, 2)
					ctx.Stack_add(bgn_tkn);
					break;
			}
			if (vnt_dash_adjust) {
				Xop_tkn_itm text_tkn = root.Subs_get_or_null(root.Subs_len() - 2);	// -2 to get tkn before newly-created tmpl / prm
				if (text_tkn == null || text_tkn.Tkn_tid() != Xop_tkn_itm_.Tid_txt)
					ctx.Wiki().Appe().Usr_dlg().Warn_many("", "", "token before curly_bgn was not text tkn; src=~{0}", String_.new_u8(src, lxr_bgn_pos, lxr_end_pos));
				else
					text_tkn.Src_end_(text_tkn.Src_end() + 1);	// +1 to extend txt_tkn with dash be 1 to include curly; EX: "-" "{{{" -> "-{" "{{"
			}

			end_tkn_len -= new_tkn_len;
			lxr_bgn_pos += new_tkn_len;	// move lxr_bgn_pos along
			if (end_tkn_len == 1) {	// SEE:NOTE_1:
				ctx.Subs_add(root, tkn_mkr.Txt(lxr_bgn_pos, lxr_bgn_pos + 1));
				end_tkn_len = 0;
				++lxr_bgn_pos;
			}
		}
		return lxr_end_pos;
	}
Esempio n. 3
0
 public boolean Bld(
     Xop_ctx ctx,
     Xop_tkn_mkr tkn_mkr,
     Xop_arg_wkr wkr,
     int wkr_typ,
     Xop_root_tkn root,
     Xop_tkn_itm tkn,
     int bgn_pos,
     int cur_pos,
     int loop_bgn,
     int loop_end,
     byte[] src) {
   boolean ws_bgn_chk = true, colon_chk = false, itm_is_static = true, key_exists = false;
   int ws_bgn_idx = -1, ws_end_idx = -1, cur_itm_subs_len = 0, cur_nde_idx = -1;
   Arg_nde_tkn cur_nde = null;
   Arg_itm_tkn cur_itm = null;
   int brack_count = 0;
   Xop_tkn_itm eq_pending = null;
   for (int i = loop_bgn;
       i < loop_end;
       i++) { // loop over subs between bookends; if lnki, all tkns between [[ and ]]; if tmpl, {{
              // and }}
     Xop_tkn_itm sub = root.Subs_get(i);
     int sub_pos_bgn = sub.Src_bgn_grp(root, i);
     if (cur_nde == null) {
       cur_nde = tkn_mkr.ArgNde(++cur_nde_idx, sub_pos_bgn);
       brack_count = 0;
       key_exists = false;
     }
     if (cur_itm == null) {
       cur_itm = tkn_mkr.ArgItm(sub_pos_bgn, -1);
       itm_is_static = ws_bgn_chk = true;
       cur_itm_subs_len = 0;
       ws_bgn_idx = ws_end_idx = -1;
       if (eq_pending
           != null) { // something like  "A==B" encountered; zh.w:Wikipedia:条目评选; DATE:2014-08-27
         eq_pending.Src_end_(eq_pending.Src_end() - 1); // remove an "=" EX:"A==B" -> "A","=","=B"
         cur_itm.Subs_add_grp(eq_pending, root, i);
         cur_itm_subs_len++; // add the tkn to cur_itm
         eq_pending = null;
       }
     }
     switch (sub.Tkn_tid()) {
       case Xop_tkn_itm_
           .Tid_ignore: // comment or *include* tkn; mark itm as non_static for tmpl (forces
                        // re-eval)
         switch (wkr_typ) {
           case Xop_arg_wkr_.Typ_tmpl:
           case Xop_arg_wkr_.Typ_prm:
             itm_is_static = false;
             break;
         }
         break;
       case Xop_tkn_itm_
           .Tid_para: // NOTE: para can appear in following: [[File:A.png| \n 40px]]; EX:
                      // w:Supreme_Court_of_the_United_States; DATE:2014-04-05
       case Xop_tkn_itm_.Tid_newLine:
       case Xop_tkn_itm_.Tid_space:
       case Xop_tkn_itm_.Tid_tab: // whitespace
         if (ws_bgn_chk)
           ws_bgn_idx =
               cur_itm_subs_len; // definite ws at bgn; set ws_bgn_idx, and keep setting until text
                                 // tkn reached; handles mixed sequence of \s\n\t where last tkn
                                 // should be ws_bgn_idx
         else {
           if (ws_end_idx == -1) ws_end_idx = cur_itm_subs_len;
         }
         ; // possible ws at end; may be overriden later; see AdjustWsForTxtTkn
         break;
       case Xop_tkn_itm_.Tid_colon:
         if (wkr_typ
             == Xop_arg_wkr_
                 .Typ_tmpl) { // treat colons as text; tmpl will do its own : parsing for 1st arg;
                              // NOTE: must do ws check else 2nd colon will break; EX: "{{#ifeq:
                              // :|a|b|c}}"; DATE:2013-12-10
           if (ws_bgn_chk) ws_bgn_chk = false;
           else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
         } else {
           if (cur_nde_idx == 0
               && !colon_chk) { // if 1st arg, mark colon pos; needed for lnki; EX: [[Category:A]];
                                // {{#ifeq:1}}
             colon_chk = true;
             cur_nde.Arg_colon_pos_(sub_pos_bgn);
           }
         }
         break;
       case Xop_tkn_itm_.Tid_brack_bgn:
         ++brack_count;
         if (ws_bgn_chk) ws_bgn_chk = false;
         else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
         break;
       case Xop_tkn_itm_.Tid_brack_end:
         --brack_count;
         if (ws_bgn_chk) ws_bgn_chk = false;
         else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
         break;
       case Xop_tkn_itm_.Tid_eq:
         if (wkr_typ == Xop_arg_wkr_.Typ_tmpl && brack_count > 0) {
         } else if (wkr_typ == Xop_arg_wkr_.Typ_prm) {
         } // always ignore for prm
         else {
           if (cur_nde_idx
                   != 0 // if 1st arg, treat equal_tkn as txt_tkn; i.e.: eq should not be used to
                        // separate key/val
               && cur_nde.Eq_tkn()
                   == Xop_tkn_null
                       .Null_tkn // only mark key if key is not set; handle multiple-keys; EX:
                                 // {{name|key1=b=c}}; DATE:2014-02-09
           ) {
             Xop_eq_tkn sub_as_eq = (Xop_eq_tkn) sub;
             int sub_as_eq_len = sub_as_eq.Eq_len();
             boolean eq_is_spr =
                 sub_as_eq_len
                         == 1 // eq with len of 1 are considered separators;
                              // MW.REF:Preprocessor_DOM.php|preprocessToXml; "if ( $count == 1 &&
                              // $findEquals )" PAGE:en.w:Wikipedia:Picture_of_the_day/June_2014;
                              // DATE:2014-07-21
                     || (cur_itm.Subs_len()
                             > 0 // or eq.len > 1 that occur later in itm; EX: a==b;
                                 // zh.w:Wikipedia:条目评选; DATE:2014-08-27
                         && cur_itm.Subs_get(0).Tkn_tid()
                             != Xop_tkn_itm_
                                 .Tid_eq // and 1st tkn is not ==; EX:==a==; 2nd == should not be
                                         // eq b/c 1st == "deactivates" nde; DATE:2014-08-27
                     );
             if (eq_is_spr) {
               if (sub_as_eq_len == 1) // =.len == 1
               cur_nde.Eq_tkn_(sub); // set as eq tkn
               else // =.len  > 1
               eq_pending =
                     sub; // do not set as eq tkn; note that Eq_tkn exists for bookkeeping and is
                          // not printed out,
               key_exists = true;
               Arg_itm_end(
                   ctx,
                   cur_nde,
                   cur_itm,
                   ws_bgn_idx,
                   ws_end_idx,
                   cur_itm_subs_len,
                   sub_pos_bgn,
                   wkr_typ,
                   key_exists,
                   true,
                   itm_is_static,
                   src,
                   cur_nde_idx);
               cur_nde.Key_tkn_(cur_itm);
               cur_itm = null;
               continue; // do not add tkn to cur_itm
             }
           }
           if (ws_bgn_chk) ws_bgn_chk = false;
           else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
           break;
         }
         break;
       case Xop_tkn_itm_.Tid_pipe:
         if (cur_nde_idx == 0 && ws_bgn_chk && !colon_chk && wkr_typ == Xop_arg_wkr_.Typ_tmpl)
           return false; // 1st arg, but no name; EX: "{{|a}}", "{{ }}"; disregard if lnki, since
                         // "[[|a]]" is valid
         if (wkr_typ == Xop_arg_wkr_.Typ_tmpl && brack_count > 0) {
           break;
         } else {
           Arg_itm_end(
               ctx,
               cur_nde,
               cur_itm,
               ws_bgn_idx,
               ws_end_idx,
               cur_itm_subs_len,
               sub_pos_bgn,
               wkr_typ,
               key_exists,
               false,
               itm_is_static,
               src,
               cur_nde_idx);
           cur_nde.Val_tkn_(cur_itm);
           if (!wkr.Args_add(ctx, src, tkn, cur_nde, cur_nde_idx))
             return false; // NOTE: if invalid, exit now; lnki_wkr expects false if any argument is
                           // invalid; DATE:2014-06-06
           cur_nde = null;
           cur_itm = null;
           key_exists = false; // reset
           continue; // do not add tkn to cur_itm
         }
       case Xop_tkn_itm_
           .Tid_tmpl_prm: // nested prm (3 {) or invk (2 {); mark itm_is_static = false and treat
                          // tkn as txt
       case Xop_tkn_itm_.Tid_tmpl_invk:
         itm_is_static = false;
         if (ws_bgn_chk) ws_bgn_chk = false;
         else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
         break;
       case Xop_tkn_itm_.Tid_xnde:
         Xop_xnde_tkn sub_as_xnde = (Xop_xnde_tkn) sub;
         switch (sub_as_xnde.Tag().Id()) {
           case Xop_xnde_tag_.Tid__noinclude:
           case Xop_xnde_tag_.Tid__includeonly:
           case Xop_xnde_tag_.Tid__onlyinclude:
             itm_is_static = false;
             break;
         }
         if (ws_bgn_chk) ws_bgn_chk = false;
         else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
         break;
       default:
         if (ws_bgn_chk) ws_bgn_chk = false;
         else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
         break;
     }
     cur_itm.Subs_add_grp(sub, root, i);
     cur_itm_subs_len++;
   }
   if (brack_count > 0) return false;
   if (cur_nde == null) // occurs when | is last tkn; EX: {{name|a|}};
   cur_nde = tkn_mkr.ArgNde(++cur_nde_idx, bgn_pos);
   if (cur_itm == null) { // occurs when = is last tkn; EX: {{name|a=}};
     cur_itm = tkn_mkr.ArgItm(bgn_pos, -1);
     itm_is_static = ws_bgn_chk = true;
     cur_itm_subs_len = 0;
     ws_bgn_idx = ws_end_idx = -1;
     key_exists = false;
   }
   Arg_itm_end(
       ctx,
       cur_nde,
       cur_itm,
       ws_bgn_idx,
       ws_end_idx,
       cur_itm_subs_len,
       bgn_pos,
       wkr_typ,
       key_exists,
       false,
       itm_is_static,
       src,
       cur_nde_idx);
   cur_nde.Val_tkn_(cur_itm);
   return wkr.Args_add(ctx, src, tkn, cur_nde, cur_nde_idx);
 }