Beispiel #1
0
 public void Process_lnki_category(
     Xop_ctx ctx,
     Xop_root_tkn root,
     byte[] src,
     int pos,
     int src_len) { // REF.MW:Parser.php|replaceInternalLinks2|Strip the whitespace Category links
   // produce;
   if (!para_enabled) return;
   int subs_len = root.Subs_len();
   for (int i = subs_len - 2;
       i > -1;
       i--) { // -2: -1 b/c subs_len is invalid; -1 to skip current lnki
     Xop_tkn_itm sub_tkn = root.Subs_get(i);
     switch (sub_tkn.Tkn_tid()) {
       case Xop_tkn_itm_
           .Tid_para: // nl found; note this means that BOL -> [[Category:]] is all ws;
         if (prv_ws_bgn > 0) { // line begins with ws a
           if (sub_tkn.Src_bgn()
               != 0) // do not ignore BOS para; needed b/c it is often <p>; needed for test;
           sub_tkn.Ignore_y_(); // ignore nl (pretty-printing only)
           prv_ws_bgn = 0; // remove ws
           if (ctx.Stack_has(
               Xop_tkn_itm_
                   .Tid_list)) { // HACK: if in list, set prv_nl_pos to EOL; only here for one test
             // to pass
             int nl_at_eol = -1;
             for (int j = pos; j < src_len; j++) { // check if rest of line is ws
               byte b = src[j];
               switch (b) {
                 case Byte_ascii.Space:
                 case Byte_ascii.Tab:
                   break; // ignore space / tab
                 case Byte_ascii.Nl:
                   nl_at_eol = j;
                   j = src_len;
                   break;
                 default: // something else besides ws; stop
                   j = src_len;
                   break;
               }
               if (nl_at_eol != -1) prv_nl_pos = nl_at_eol + 1; // SEE:NOTE_2
             }
           }
         }
         return;
       default: // exit if anything except para / nl in front of [[Category:]]
         i = -1;
         break;
     }
   }
   //			if (para_found)	// BOS exit; just remove prv_ws_bgn
   prv_ws_bgn = 0;
 }
Beispiel #2
0
 public boolean Bld(
     Xop_ctx ctx,
     Xop_tkn_mkr tkn_mkr,
     Xop_arg_wkr wkr,
     int wkr_typ,
     Xop_root_tkn root,
     Xop_tkn_itm tkn,
     int bgn_pos,
     int cur_pos,
     int loop_bgn,
     int loop_end,
     byte[] src) {
   boolean ws_bgn_chk = true, colon_chk = false, itm_is_static = true, key_exists = false;
   int ws_bgn_idx = -1, ws_end_idx = -1, cur_itm_subs_len = 0, cur_nde_idx = -1;
   Arg_nde_tkn cur_nde = null;
   Arg_itm_tkn cur_itm = null;
   int brack_count = 0;
   Xop_tkn_itm eq_pending = null;
   for (int i = loop_bgn;
       i < loop_end;
       i++) { // loop over subs between bookends; if lnki, all tkns between [[ and ]]; if tmpl, {{
              // and }}
     Xop_tkn_itm sub = root.Subs_get(i);
     int sub_pos_bgn = sub.Src_bgn_grp(root, i);
     if (cur_nde == null) {
       cur_nde = tkn_mkr.ArgNde(++cur_nde_idx, sub_pos_bgn);
       brack_count = 0;
       key_exists = false;
     }
     if (cur_itm == null) {
       cur_itm = tkn_mkr.ArgItm(sub_pos_bgn, -1);
       itm_is_static = ws_bgn_chk = true;
       cur_itm_subs_len = 0;
       ws_bgn_idx = ws_end_idx = -1;
       if (eq_pending
           != null) { // something like  "A==B" encountered; zh.w:Wikipedia:条目评选; DATE:2014-08-27
         eq_pending.Src_end_(eq_pending.Src_end() - 1); // remove an "=" EX:"A==B" -> "A","=","=B"
         cur_itm.Subs_add_grp(eq_pending, root, i);
         cur_itm_subs_len++; // add the tkn to cur_itm
         eq_pending = null;
       }
     }
     switch (sub.Tkn_tid()) {
       case Xop_tkn_itm_
           .Tid_ignore: // comment or *include* tkn; mark itm as non_static for tmpl (forces
                        // re-eval)
         switch (wkr_typ) {
           case Xop_arg_wkr_.Typ_tmpl:
           case Xop_arg_wkr_.Typ_prm:
             itm_is_static = false;
             break;
         }
         break;
       case Xop_tkn_itm_
           .Tid_para: // NOTE: para can appear in following: [[File:A.png| \n 40px]]; EX:
                      // w:Supreme_Court_of_the_United_States; DATE:2014-04-05
       case Xop_tkn_itm_.Tid_newLine:
       case Xop_tkn_itm_.Tid_space:
       case Xop_tkn_itm_.Tid_tab: // whitespace
         if (ws_bgn_chk)
           ws_bgn_idx =
               cur_itm_subs_len; // definite ws at bgn; set ws_bgn_idx, and keep setting until text
                                 // tkn reached; handles mixed sequence of \s\n\t where last tkn
                                 // should be ws_bgn_idx
         else {
           if (ws_end_idx == -1) ws_end_idx = cur_itm_subs_len;
         }
         ; // possible ws at end; may be overriden later; see AdjustWsForTxtTkn
         break;
       case Xop_tkn_itm_.Tid_colon:
         if (wkr_typ
             == Xop_arg_wkr_
                 .Typ_tmpl) { // treat colons as text; tmpl will do its own : parsing for 1st arg;
                              // NOTE: must do ws check else 2nd colon will break; EX: "{{#ifeq:
                              // :|a|b|c}}"; DATE:2013-12-10
           if (ws_bgn_chk) ws_bgn_chk = false;
           else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
         } else {
           if (cur_nde_idx == 0
               && !colon_chk) { // if 1st arg, mark colon pos; needed for lnki; EX: [[Category:A]];
                                // {{#ifeq:1}}
             colon_chk = true;
             cur_nde.Arg_colon_pos_(sub_pos_bgn);
           }
         }
         break;
       case Xop_tkn_itm_.Tid_brack_bgn:
         ++brack_count;
         if (ws_bgn_chk) ws_bgn_chk = false;
         else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
         break;
       case Xop_tkn_itm_.Tid_brack_end:
         --brack_count;
         if (ws_bgn_chk) ws_bgn_chk = false;
         else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
         break;
       case Xop_tkn_itm_.Tid_eq:
         if (wkr_typ == Xop_arg_wkr_.Typ_tmpl && brack_count > 0) {
         } else if (wkr_typ == Xop_arg_wkr_.Typ_prm) {
         } // always ignore for prm
         else {
           if (cur_nde_idx
                   != 0 // if 1st arg, treat equal_tkn as txt_tkn; i.e.: eq should not be used to
                        // separate key/val
               && cur_nde.Eq_tkn()
                   == Xop_tkn_null
                       .Null_tkn // only mark key if key is not set; handle multiple-keys; EX:
                                 // {{name|key1=b=c}}; DATE:2014-02-09
           ) {
             Xop_eq_tkn sub_as_eq = (Xop_eq_tkn) sub;
             int sub_as_eq_len = sub_as_eq.Eq_len();
             boolean eq_is_spr =
                 sub_as_eq_len
                         == 1 // eq with len of 1 are considered separators;
                              // MW.REF:Preprocessor_DOM.php|preprocessToXml; "if ( $count == 1 &&
                              // $findEquals )" PAGE:en.w:Wikipedia:Picture_of_the_day/June_2014;
                              // DATE:2014-07-21
                     || (cur_itm.Subs_len()
                             > 0 // or eq.len > 1 that occur later in itm; EX: a==b;
                                 // zh.w:Wikipedia:条目评选; DATE:2014-08-27
                         && cur_itm.Subs_get(0).Tkn_tid()
                             != Xop_tkn_itm_
                                 .Tid_eq // and 1st tkn is not ==; EX:==a==; 2nd == should not be
                                         // eq b/c 1st == "deactivates" nde; DATE:2014-08-27
                     );
             if (eq_is_spr) {
               if (sub_as_eq_len == 1) // =.len == 1
               cur_nde.Eq_tkn_(sub); // set as eq tkn
               else // =.len  > 1
               eq_pending =
                     sub; // do not set as eq tkn; note that Eq_tkn exists for bookkeeping and is
                          // not printed out,
               key_exists = true;
               Arg_itm_end(
                   ctx,
                   cur_nde,
                   cur_itm,
                   ws_bgn_idx,
                   ws_end_idx,
                   cur_itm_subs_len,
                   sub_pos_bgn,
                   wkr_typ,
                   key_exists,
                   true,
                   itm_is_static,
                   src,
                   cur_nde_idx);
               cur_nde.Key_tkn_(cur_itm);
               cur_itm = null;
               continue; // do not add tkn to cur_itm
             }
           }
           if (ws_bgn_chk) ws_bgn_chk = false;
           else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
           break;
         }
         break;
       case Xop_tkn_itm_.Tid_pipe:
         if (cur_nde_idx == 0 && ws_bgn_chk && !colon_chk && wkr_typ == Xop_arg_wkr_.Typ_tmpl)
           return false; // 1st arg, but no name; EX: "{{|a}}", "{{ }}"; disregard if lnki, since
                         // "[[|a]]" is valid
         if (wkr_typ == Xop_arg_wkr_.Typ_tmpl && brack_count > 0) {
           break;
         } else {
           Arg_itm_end(
               ctx,
               cur_nde,
               cur_itm,
               ws_bgn_idx,
               ws_end_idx,
               cur_itm_subs_len,
               sub_pos_bgn,
               wkr_typ,
               key_exists,
               false,
               itm_is_static,
               src,
               cur_nde_idx);
           cur_nde.Val_tkn_(cur_itm);
           if (!wkr.Args_add(ctx, src, tkn, cur_nde, cur_nde_idx))
             return false; // NOTE: if invalid, exit now; lnki_wkr expects false if any argument is
                           // invalid; DATE:2014-06-06
           cur_nde = null;
           cur_itm = null;
           key_exists = false; // reset
           continue; // do not add tkn to cur_itm
         }
       case Xop_tkn_itm_
           .Tid_tmpl_prm: // nested prm (3 {) or invk (2 {); mark itm_is_static = false and treat
                          // tkn as txt
       case Xop_tkn_itm_.Tid_tmpl_invk:
         itm_is_static = false;
         if (ws_bgn_chk) ws_bgn_chk = false;
         else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
         break;
       case Xop_tkn_itm_.Tid_xnde:
         Xop_xnde_tkn sub_as_xnde = (Xop_xnde_tkn) sub;
         switch (sub_as_xnde.Tag().Id()) {
           case Xop_xnde_tag_.Tid__noinclude:
           case Xop_xnde_tag_.Tid__includeonly:
           case Xop_xnde_tag_.Tid__onlyinclude:
             itm_is_static = false;
             break;
         }
         if (ws_bgn_chk) ws_bgn_chk = false;
         else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
         break;
       default:
         if (ws_bgn_chk) ws_bgn_chk = false;
         else ws_end_idx = -1; // INLINE: AdjustWsForTxtTkn
         break;
     }
     cur_itm.Subs_add_grp(sub, root, i);
     cur_itm_subs_len++;
   }
   if (brack_count > 0) return false;
   if (cur_nde == null) // occurs when | is last tkn; EX: {{name|a|}};
   cur_nde = tkn_mkr.ArgNde(++cur_nde_idx, bgn_pos);
   if (cur_itm == null) { // occurs when = is last tkn; EX: {{name|a=}};
     cur_itm = tkn_mkr.ArgItm(bgn_pos, -1);
     itm_is_static = ws_bgn_chk = true;
     cur_itm_subs_len = 0;
     ws_bgn_idx = ws_end_idx = -1;
     key_exists = false;
   }
   Arg_itm_end(
       ctx,
       cur_nde,
       cur_itm,
       ws_bgn_idx,
       ws_end_idx,
       cur_itm_subs_len,
       bgn_pos,
       wkr_typ,
       key_exists,
       false,
       itm_is_static,
       src,
       cur_nde_idx);
   cur_nde.Val_tkn_(cur_itm);
   return wkr.Args_add(ctx, src, tkn, cur_nde, cur_nde_idx);
 }
Beispiel #3
0
  private void Arg_itm_end(
      Xop_ctx ctx,
      Arg_nde_tkn nde,
      Arg_itm_tkn itm,
      int ws_bgn_idx,
      int ws_end_idx,
      int subs_len,
      int lxr_bgn,
      int wkr_typ,
      boolean key_exists,
      boolean cur_itm_is_key,
      boolean itm_is_static,
      byte[] src,
      int arg_idx) {
    // PURPOSE: mark tkns Ignore; find dat_bgn, dat_end
    int dat_bgn = itm.Src_bgn(), dat_end = lxr_bgn;
    boolean trim = false;
    // trim ws at bgn
    boolean wkr_is_not_prm = wkr_typ != Xop_arg_wkr_.Typ_prm;
    if (ws_bgn_idx
        != -1) { // ignore ws if (ws_found_at_bgn && (tmpl_arg || (lnki_arg && key))); lnki_arg &&
                 // val does not ignore at bgn; EX: [[alt= a b c]] -> " a b c"
      switch (wkr_typ) {
        case Xop_arg_wkr_.Typ_prm:
          trim = arg_idx == 0;
          break;
        case Xop_arg_wkr_.Typ_tmpl:
          trim = key_exists || arg_idx == 0;
          break;
        case Xop_arg_wkr_.Typ_lnki:
          trim = cur_itm_is_key || !key_exists;
          break; // NOTE: trim if "a= b"; skip if " a=b" or " a"
      }
      if (trim) {
        for (int i = 0; i <= ws_bgn_idx; i++) {
          Xop_tkn_itm sub_tkn = itm.Subs_get(i); // NOTE: tknTypeId should be space, newline, or tab
          if (wkr_is_not_prm)
            sub_tkn.Ignore_y_grp_(ctx, itm, i); // mark tkn ignore unless wkr is prm; SEE:NOTE_1
          if (i == ws_bgn_idx)
            dat_bgn = sub_tkn.Src_end_grp(itm, i); // if last_tkn, set dat_bgn to bgn
        }
      }
    }
    // trim ws at end
    if (ws_end_idx
        != -1) { // ignore ws if (ws_found_at_end && (tmpl_arg || (lnki_arg && val))); lnki_arg &&
                 // key does not ignore at end; EX: [[alt =a b c]] -> unrecognized nde ("alt ")
      trim = false;
      switch (wkr_typ) {
        case Xop_arg_wkr_.Typ_prm:
          trim = arg_idx == 0;
          break;
        case Xop_arg_wkr_.Typ_tmpl:
          trim = key_exists || arg_idx == 0;
          break; // NOTE: never set "trim = true";
                 // PAGE:fr.w:Histoire_de_la_marine_française_sous_Louis_XV_et_Louis_XVI
                 // DATE:2015-11-17
        case Xop_arg_wkr_.Typ_lnki:
          trim = !cur_itm_is_key;
          break;
      }
      if (trim) {
        for (int i = ws_end_idx; i < subs_len; i++) {
          Xop_tkn_itm sub_tkn = itm.Subs_get(i); // NOTE: tknTypeId should be space, newline, or tab
          if (wkr_is_not_prm)
            sub_tkn.Ignore_y_grp_(ctx, itm, i); // mark tkn ignore unless wkr is prm; SEE:NOTE_1
          if (i == ws_end_idx)
            dat_end = sub_tkn.Src_bgn_grp(itm, i); // if 1st_tkn; set dat_end to bgn
        }
      }
    }
    itm.Src_end_(lxr_bgn);
    nde.Src_end_(lxr_bgn); // NOTE: src_end is lxr_bgn; EX: {{a}} has src_end at 3; lxr_bgn for }}
    itm.Dat_rng_(
        dat_bgn,
        dat_end); // always set dat, even if itm has dynamic parts; EX: {{{ a{{{1}}}b }}} has 4,13,
                  // not 3,14 (ignore ws)

    //			if (itm_is_static)
    //				itm.Dat_ary_(dat_end == dat_bgn ? Bry_.Empty : Bry_.Mid(src, dat_bgn, dat_end));
    itm.Itm_static_(itm_is_static);
  }