示例#1
0
  /**
   * Parse a comment. Parse a remark markup.
   *
   * <p>From the <a href="http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.4">HTML 4.01
   * Specification, W3C Recommendation 24 December 1999</a>
   * http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.4
   *
   * <p><cite> 3.2.4 Comments
   *
   * <p>HTML comments have the following syntax:
   *
   * <p><code>
   * &lt;!-- this is a comment --&gt;<p>
   * &lt;!-- and so is this one,<p>
   *     which occupies more than one line --&gt;<p>
   * </code> White space is not permitted between the markup declaration open delimiter("&lt;!") and
   * the comment open delimiter ("--"), but is permitted between the comment close delimiter ("--")
   * and the markup declaration close delimiter ("&gt;"). A common error is to include a string of
   * hyphens ("---") within a comment. Authors should avoid putting two or more adjacent hyphens
   * inside comments. Information that appears between comments has no special meaning (e.g.,
   * character references are not interpreted as such). Note that comments are markup.
   *
   * <p></cite>
   *
   * <p>This method uses a state machine with the following states:
   *
   * <ol>
   *   <li>state 0 - prior to the first open delimiter (first dash)
   *   <li>state 1 - prior to the second open delimiter (second dash)
   *   <li>state 2 - prior to the first closing delimiter (first dash)
   *   <li>state 3 - prior to the second closing delimiter (second dash)
   *   <li>state 4 - prior to the terminating &gt;
   * </ol>
   *
   * <p>All comment text (everything excluding the &lt; and &gt;), is included in the remark text.
   * We allow terminators like --!&gt; even though this isn't part of the spec.
   *
   * @param start The position at which to start scanning.
   * @param quotesmart If <code>true</code>, strings ignore quoted contents.
   * @return The parsed node.
   * @exception ParserException If a problem occurs reading from the source.
   */
  protected Node parseRemark(int start, boolean quotesmart) throws ParserException {
    boolean done;
    char ch;
    int state;

    done = false;
    state = 0;
    while (!done) {
      ch = mPage.getCharacter(mCursor);
      if (Page.EOF == ch) done = true;
      else
        switch (state) {
          case 0: // prior to the first open delimiter
            if ('>' == ch) done = true;
            if ('-' == ch) state = 1;
            else return (parseString(start, quotesmart));
            break;
          case 1: // prior to the second open delimiter
            if ('-' == ch) {
              // handle <!--> because netscape does
              ch = mPage.getCharacter(mCursor);
              if (Page.EOF == ch) done = true;
              else if ('>' == ch) done = true;
              else {
                mPage.ungetCharacter(mCursor);
                state = 2;
              }
            } else return (parseString(start, quotesmart));
            break;
          case 2: // prior to the first closing delimiter
            if ('-' == ch) state = 3;
            else if (Page.EOF == ch) return (parseString(start, quotesmart)); // no terminator
            break;
          case 3: // prior to the second closing delimiter
            if ('-' == ch) state = 4;
            else state = 2;
            break;
          case 4: // prior to the terminating >
            if ('>' == ch) done = true;
            else if (Character.isWhitespace(ch)) {
              // stay in state 4
            } else if (!STRICT_REMARKS && (('-' == ch) || ('!' == ch))) {
              // stay in state 4
            } else
              // bug #1345049 HTMLParser should not terminate a comment with --->
              // should maybe issue a warning mentioning STRICT_REMARKS
              state = 2;
            break;
          default:
            throw new IllegalStateException("how the f**k did we get in state " + state);
        }
    }

    return (makeRemark(start, mCursor.getPosition()));
  }
示例#2
0
  /**
   * Get the next node from the source.
   *
   * @param quotesmart If <code>true</code>, strings ignore quoted contents.
   * @return A Remark, Text or Tag, or <code>null</code> if no more lexemes are present.
   * @exception ParserException If there is a problem with the underlying page.
   */
  public Node nextNode(boolean quotesmart) throws ParserException {
    int start;
    char ch;
    Node ret;

    // debugging suppport
    if (-1 != mDebugLineTrigger) {
      Page page = getPage();
      int lineno = page.row(mCursor);
      if (mDebugLineTrigger < lineno) mDebugLineTrigger = lineno + 1; // trigger on next line too
    }
    start = mCursor.getPosition();
    ch = mPage.getCharacter(mCursor);
    switch (ch) {
      case Page.EOF:
        ret = null;
        break;
      case '<':
        ch = mPage.getCharacter(mCursor);
        if (Page.EOF == ch) ret = makeString(start, mCursor.getPosition());
        else if ('%' == ch) {
          mPage.ungetCharacter(mCursor);
          ret = parseJsp(start);
        } else if ('?' == ch) {
          mPage.ungetCharacter(mCursor);
          ret = parsePI(start);
        } else if ('/' == ch || '%' == ch || Character.isLetter(ch)) {
          mPage.ungetCharacter(mCursor);
          ret = parseTag(start);
        } else if ('!' == ch) {
          ch = mPage.getCharacter(mCursor);
          if (Page.EOF == ch) ret = makeString(start, mCursor.getPosition());
          else {
            if ('>' == ch) // handle <!>
            ret = makeRemark(start, mCursor.getPosition());
            else {
              mPage.ungetCharacter(mCursor); // remark/tag need this char
              if ('-' == ch) ret = parseRemark(start, quotesmart);
              else {
                mPage.ungetCharacter(mCursor); // tag needs prior one too
                ret = parseTag(start);
              }
            }
          }
        } else ret = parseString(start, quotesmart);
        break;
      default:
        mPage.ungetCharacter(mCursor); // string needs to see leading foreslash
        ret = parseString(start, quotesmart);
        break;
    }

    return (ret);
  }
示例#3
0
  /**
   * Advance the cursor through a JIS escape sequence.
   *
   * @param cursor A cursor positioned within the escape sequence.
   * @exception ParserException If a problem occurs reading from the source.
   */
  protected void scanJIS(Cursor cursor) throws ParserException {
    boolean done;
    char ch;
    int state;

    done = false;
    state = 0;
    while (!done) {
      ch = mPage.getCharacter(cursor);
      if (Page.EOF == ch) done = true;
      else
        switch (state) {
          case 0:
            if (0x1b == ch) // escape
            state = 1;
            break;
          case 1:
            if ('(' == ch) state = 2;
            else state = 0;
            break;
          case 2:
            if ('J' == ch) done = true;
            else state = 0;
            break;
          default:
            throw new IllegalStateException("state " + state);
        }
    }
  }
示例#4
0
  /**
   * Parse a tag. Parse the name and attributes from a start tag.
   *
   * <p>From the <a href="http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2">HTML 4.01
   * Specification, W3C Recommendation 24 December 1999</a>
   * http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2
   *
   * <p><cite> 3.2.2 Attributes
   *
   * <p>Elements may have associated properties, called attributes, which may have values (by
   * default, or set by authors or scripts). Attribute/value pairs appear before the final ">" of an
   * element's start tag. Any number of (legal) attribute value pairs, separated by spaces, may
   * appear in an element's start tag. They may appear in any order.
   *
   * <p>In this example, the id attribute is set for an H1 element: <code>
   * &lt;H1 id="section1"&gt;
   * </code> This is an identified heading thanks to the id attribute <code>
   * &lt;/H1&gt;
   * </code> By default, SGML requires that all attribute values be delimited using either double
   * quotation marks (ASCII decimal 34) or single quotation marks (ASCII decimal 39). Single quote
   * marks can be included within the attribute value when the value is delimited by double quote
   * marks, and vice versa. Authors may also use numeric character references to represent double
   * quotes (&amp;#34;) and single quotes (&amp;#39;). For doublequotes authors can also use the
   * character entity reference &amp;quot;.
   *
   * <p>In certain cases, authors may specify the value of an attribute without any quotation marks.
   * The attribute value may only contain letters (a-z and A-Z), digits (0-9), hyphens (ASCII
   * decimal 45), periods (ASCII decimal 46), underscores (ASCII decimal 95), and colons (ASCII
   * decimal 58). We recommend using quotation marks even when it is possible to eliminate them.
   *
   * <p>Attribute names are always case-insensitive.
   *
   * <p>Attribute values are generally case-insensitive. The definition of each attribute in the
   * reference manual indicates whether its value is case-insensitive.
   *
   * <p>All the attributes defined by this specification are listed in the attribute index.
   *
   * <p></cite>
   *
   * <p>This method uses a state machine with the following states:
   *
   * <ol>
   *   <li>state 0 - outside of any attribute
   *   <li>state 1 - within attributre name
   *   <li>state 2 - equals hit
   *   <li>state 3 - within naked attribute value.
   *   <li>state 4 - within single quoted attribute value
   *   <li>state 5 - within double quoted attribute value
   *   <li>state 6 - whitespaces after attribute name could lead to state 2 (=)or state 0
   * </ol>
   *
   * <p>The starting point for the various components is stored in an array of integers that match
   * the initiation point for the states one-for-one, i.e. bookmarks[0] is where state 0 began,
   * bookmarks[1] is where state 1 began, etc. Attributes are stored in a <code>Vector</code> having
   * one slot for each whitespace or attribute/value pair. The first slot is for attribute name
   * (kind of like a standalone attribute).
   *
   * @param start The position at which to start scanning.
   * @return The parsed tag.
   * @exception ParserException If a problem occurs reading from the source.
   */
  protected Node parseTag(int start) throws ParserException {
    boolean done;
    char ch;
    int state;
    int[] bookmarks;
    Vector attributes;

    done = false;
    attributes = new Vector();
    state = 0;
    bookmarks = new int[8];
    bookmarks[0] = mCursor.getPosition();
    while (!done) {
      bookmarks[state + 1] = mCursor.getPosition();
      ch = mPage.getCharacter(mCursor);
      switch (state) {
        case 0: // outside of any attribute
          if ((Page.EOF == ch) || ('>' == ch) || ('<' == ch)) {
            if ('<' == ch) {
              // don't consume the opening angle
              mPage.ungetCharacter(mCursor);
              bookmarks[state + 1] = mCursor.getPosition();
            }
            whitespace(attributes, bookmarks);
            done = true;
          } else if (!Character.isWhitespace(ch)) {
            whitespace(attributes, bookmarks);
            state = 1;
          }
          break;
        case 1: // within attribute name
          if ((Page.EOF == ch) || ('>' == ch) || ('<' == ch)) {
            if ('<' == ch) {
              // don't consume the opening angle
              mPage.ungetCharacter(mCursor);
              bookmarks[state + 1] = mCursor.getPosition();
            }
            standalone(attributes, bookmarks);
            done = true;
          } else if (Character.isWhitespace(ch)) {
            // whitespaces might be followed by next attribute or an equal sign
            // see Bug #891058 Bug in lexer.
            bookmarks[6] = bookmarks[2]; // setting the bookmark[0] is done in state 6 if applicable
            state = 6;
          } else if ('=' == ch) state = 2;
          break;
        case 2: // equals hit
          if ((Page.EOF == ch) || ('>' == ch)) {
            empty(attributes, bookmarks);
            done = true;
          } else if ('\'' == ch) {
            state = 4;
            bookmarks[4] = bookmarks[3];
          } else if ('"' == ch) {
            state = 5;
            bookmarks[5] = bookmarks[3];
          } else if (Character.isWhitespace(ch)) {
            // collect white spaces after "=" into the assignment string;
            // do nothing
            // see Bug #891058 Bug in lexer.
          } else state = 3;
          break;
        case 3: // within naked attribute value
          if ((Page.EOF == ch) || ('>' == ch)) {
            naked(attributes, bookmarks);
            done = true;
          } else if (Character.isWhitespace(ch)) {
            naked(attributes, bookmarks);
            bookmarks[0] = bookmarks[4];
            state = 0;
          }
          break;
        case 4: // within single quoted attribute value
          if (Page.EOF == ch) {
            single_quote(attributes, bookmarks);
            done = true; // complain?
          } else if ('\'' == ch) {
            single_quote(attributes, bookmarks);
            bookmarks[0] = bookmarks[5] + 1;
            state = 0;
          }
          break;
        case 5: // within double quoted attribute value
          if (Page.EOF == ch) {
            double_quote(attributes, bookmarks);
            done = true; // complain?
          } else if ('"' == ch) {
            double_quote(attributes, bookmarks);
            bookmarks[0] = bookmarks[6] + 1;
            state = 0;
          }
          break;
          // patch for lexer state correction by
          // Gernot Fricke
          // See Bug # 891058 Bug in lexer.
        case 6: // undecided for state 0 or 2
          // we have read white spaces after an attributte name
          if (Page.EOF == ch) {
            // same as last else clause
            standalone(attributes, bookmarks);
            bookmarks[0] = bookmarks[6];
            mPage.ungetCharacter(mCursor);
            state = 0;
          } else if (Character.isWhitespace(ch)) {
            // proceed
          } else if ('=' == ch) // yepp. the white spaces belonged to the equal.
          {
            bookmarks[2] = bookmarks[6];
            bookmarks[3] = bookmarks[7];
            state = 2;
          } else {
            // white spaces were not ended by equal
            // meaning the attribute was a stand alone attribute
            // now: create the stand alone attribute and rewind
            // the cursor to the end of the white spaces
            // and restart scanning as whitespace attribute.
            standalone(attributes, bookmarks);
            bookmarks[0] = bookmarks[6];
            mPage.ungetCharacter(mCursor);
            state = 0;
          }
          break;
        default:
          throw new IllegalStateException("how the f**k did we get in state " + state);
      }
    }

    return (makeTag(start, mCursor.getPosition(), attributes));
  }
示例#5
0
  /**
   * Parse a string node. Scan characters until "&lt;/", "&lt;%", "&lt;!" or &lt; followed by a
   * letter is encountered, or the input stream is exhausted, in which case <code>null</code> is
   * returned.
   *
   * @param start The position at which to start scanning.
   * @param quotesmart If <code>true</code>, strings ignore quoted contents.
   * @return The parsed node.
   * @exception ParserException If a problem occurs reading from the source.
   */
  protected Node parseString(int start, boolean quotesmart) throws ParserException {
    boolean done;
    char ch;
    char quote;

    done = false;
    quote = 0;
    while (!done) {
      ch = mPage.getCharacter(mCursor);
      if (Page.EOF == ch) done = true;
      else if (0x1b == ch) // escape
      {
        ch = mPage.getCharacter(mCursor);
        if (Page.EOF == ch) done = true;
        else if ('$' == ch) {
          ch = mPage.getCharacter(mCursor);
          if (Page.EOF == ch) done = true;
          else if ('B' == ch) scanJIS(mCursor);
          else {
            mPage.ungetCharacter(mCursor);
            mPage.ungetCharacter(mCursor);
          }
        } else mPage.ungetCharacter(mCursor);
      } else if (quotesmart && (0 == quote) && (('\'' == ch) || ('"' == ch)))
        quote = ch; // enter quoted state
      // patch from Gernot Fricke to handle escaped closing quote
      else if (quotesmart && (0 != quote) && ('\\' == ch)) {
        ch = mPage.getCharacter(mCursor); // try to consume escape
        if ((Page.EOF != ch)
            && ('\\' != ch) // escaped backslash
            && (ch != quote)) // escaped quote character
          // ( reflects ["] or [']  whichever opened the quotation)
          mPage.ungetCharacter(mCursor); // unconsume char if char not an escape
      } else if (quotesmart && (ch == quote)) quote = 0; // exit quoted state
      else if (quotesmart && (0 == quote) && (ch == '/')) {
        // handle multiline and double slash comments (with a quote)
        // in script like:
        // I can't handle single quotations.
        ch = mPage.getCharacter(mCursor);
        if (Page.EOF == ch) done = true;
        else if ('/' == ch) {
          do ch = mPage.getCharacter(mCursor);
          while ((Page.EOF != ch) && ('\n' != ch));
        } else if ('*' == ch) {
          do {
            do ch = mPage.getCharacter(mCursor);
            while ((Page.EOF != ch) && ('*' != ch));
            ch = mPage.getCharacter(mCursor);
            if (ch == '*') mPage.ungetCharacter(mCursor);
          } while ((Page.EOF != ch) && ('/' != ch));
        } else mPage.ungetCharacter(mCursor);
      } else if ((0 == quote) && ('<' == ch)) {
        ch = mPage.getCharacter(mCursor);
        if (Page.EOF == ch) done = true;
        // the order of these tests might be optimized for speed:
        else if ('/' == ch || Character.isLetter(ch) || '!' == ch || '%' == ch || '?' == ch) {
          done = true;
          mPage.ungetCharacter(mCursor);
          mPage.ungetCharacter(mCursor);
        } else {
          // it's not a tag, so keep going, but check for quotes
          mPage.ungetCharacter(mCursor);
        }
      }
    }

    return (makeString(start, mCursor.getPosition()));
  }
示例#6
0
  /**
   * Return CDATA as a text node. Slightly less rigid than {@link #parseCDATA()} this method
   * provides for parsing CDATA that may contain quoted strings that have embedded ETAGO ("&lt;/")
   * delimiters and skips single and multiline comments.
   *
   * @param quotesmart If <code>true</code> the strict definition of CDATA is extended to allow for
   *     single or double quoted ETAGO ("&lt;/") sequences.
   * @return The <code>TextNode</code> of the CDATA or <code>null</code> if none.
   * @see #parseCDATA()
   * @exception ParserException If a problem occurs reading from the source.
   */
  public Node parseCDATA(boolean quotesmart) throws ParserException {
    int start;
    int state;
    boolean done;
    char quote;
    char ch;
    int end;
    boolean comment;

    start = mCursor.getPosition();
    state = 0;
    done = false;
    quote = 0;
    comment = false;

    while (!done) {
      ch = mPage.getCharacter(mCursor);
      switch (state) {
        case 0: // prior to ETAGO
          switch (ch) {
            case Page.EOF:
              done = true;
              break;
            case '\'':
              if (quotesmart && !comment)
                if (0 == quote) quote = '\''; // enter quoted state
                else if ('\'' == quote) quote = 0; // exit quoted state
              break;
            case '"':
              if (quotesmart && !comment)
                if (0 == quote) quote = '"'; // enter quoted state
                else if ('"' == quote) quote = 0; // exit quoted state
              break;
            case '\\':
              if (quotesmart)
                if (0 != quote) {
                  ch = mPage.getCharacter(mCursor); // try to consume escaped character
                  if (Page.EOF == ch) done = true;
                  else if ((ch != '\\') && (ch != quote))
                    // unconsume char if character was not an escapable char.
                    mPage.ungetCharacter(mCursor);
                }
              break;
            case '/':
              if (quotesmart)
                if (0 == quote) {
                  // handle multiline and double slash comments (with a quote)
                  ch = mPage.getCharacter(mCursor);
                  if (Page.EOF == ch) done = true;
                  else if ('/' == ch) comment = true;
                  else if ('*' == ch) {
                    do {
                      do ch = mPage.getCharacter(mCursor);
                      while ((Page.EOF != ch) && ('*' != ch));
                      ch = mPage.getCharacter(mCursor);
                      if (ch == '*') mPage.ungetCharacter(mCursor);
                    } while ((Page.EOF != ch) && ('/' != ch));
                  } else mPage.ungetCharacter(mCursor);
                }
              break;
            case '\n':
              comment = false;
              break;
            case '<':
              if (quotesmart) {
                if (0 == quote) state = 1;
              } else state = 1;
              break;
            default:
              break;
          }
          break;
        case 1: // <
          switch (ch) {
            case Page.EOF:
              done = true;
              break;
            case '/':
              state = 2;
              break;
            case '!':
              ch = mPage.getCharacter(mCursor);
              if (Page.EOF == ch) done = true;
              else if ('-' == ch) {
                ch = mPage.getCharacter(mCursor);
                if (Page.EOF == ch) done = true;
                else if ('-' == ch) state = 3;
                else state = 0;
              } else state = 0;
              break;
            default:
              state = 0;
              break;
          }
          break;
        case 2: // </
          comment = false;
          if (Page.EOF == ch) done = true;
          else if (Character.isLetter(ch)) {
            done = true;
            // back up to the start of ETAGO
            mPage.ungetCharacter(mCursor);
            mPage.ungetCharacter(mCursor);
            mPage.ungetCharacter(mCursor);
          } else state = 0;
          break;
        case 3: // <!
          comment = false;
          if (Page.EOF == ch) done = true;
          else if ('-' == ch) {
            ch = mPage.getCharacter(mCursor);
            if (Page.EOF == ch) done = true;
            else if ('-' == ch) {
              ch = mPage.getCharacter(mCursor);
              if (Page.EOF == ch) done = true;
              else if ('>' == ch) state = 0;
              else {
                mPage.ungetCharacter(mCursor);
                mPage.ungetCharacter(mCursor);
              }
            } else mPage.ungetCharacter(mCursor);
          }
          break;
        default:
          throw new IllegalStateException("how the f**k did we get in state " + state);
      }
    }
    end = mCursor.getPosition();

    return (makeString(start, end));
  }
示例#7
0
  /**
   * Parse an XML processing instruction. Scan characters until "?&gt;" is encountered, or the input
   * stream is exhausted, in which case <code>null</code> is returned.
   *
   * @param start The position at which to start scanning.
   * @return The parsed node.
   * @exception ParserException If a problem occurs reading from the source.
   */
  protected Node parsePI(int start) throws ParserException {
    boolean done;
    char ch;
    int state;
    Vector attributes;
    int code;

    done = false;
    state = 0;
    code = 0;
    attributes = new Vector();
    // <?xyz?>
    // 011112d
    while (!done) {
      ch = mPage.getCharacter(mCursor);
      switch (state) {
        case 0: // prior to the question mark
          switch (ch) {
            case '?': // <?
              code = mCursor.getPosition();
              attributes.addElement(new PageAttribute(mPage, start + 1, code, -1, -1, (char) 0));
              state = 1;
              break;
              // case Page.EOF: // <\0
              // case '>': // <>
            default:
              done = true;
              break;
          }
          break;
        case 1: // prior to the closing question mark
          switch (ch) {
            case Page.EOF: // <?x\0
            case '>': // <?x>
              done = true;
              break;
            case '\'':
            case '"': // <?..."
              state = ch;
              break;
            case '?': // <?...?
              state = 2;
              break;
            default: // <?...x
              break;
          }
          break;
        case 2:
          switch (ch) {
            case Page.EOF: // <?x..?\0
              done = true;
              break;
            case '>':
              state = 3;
              done = true;
              break;
            default: // <?...?x
              state = 1;
              break;
          }
          break;
        case '"':
          switch (ch) {
            case Page.EOF: // <?x.."\0
              done = true;
              break;
            case '"':
              state = 1;
              break;
            default: // <?...'.x
              break;
          }
          break;
        case '\'':
          switch (ch) {
            case Page.EOF: // <?x..'\0
              done = true;
              break;
            case '\'':
              state = 1;
              break;
            default: // <?..."..x
              break;
          }
          break;
        default:
          throw new IllegalStateException("how the f**k did we get in state " + state);
      }
    }

    if (3 == state) // normal exit
    {
      if (0 != code) {
        state = mCursor.getPosition() - 2; // reuse state
        attributes.addElement(new PageAttribute(mPage, code, state, -1, -1, (char) 0));
        attributes.addElement(new PageAttribute(mPage, state, state + 1, -1, -1, (char) 0));
      } else throw new IllegalStateException("processing instruction with no content");
    } else return (parseString(start, true)); // hmmm, true?

    return (makeTag(start, mCursor.getPosition(), attributes));
  }
示例#8
0
  /**
   * Parse a java server page node. Scan characters until "%&gt;" is encountered, or the input
   * stream is exhausted, in which case <code>null</code> is returned.
   *
   * @param start The position at which to start scanning.
   * @return The parsed node.
   * @exception ParserException If a problem occurs reading from the source.
   */
  protected Node parseJsp(int start) throws ParserException {
    boolean done;
    char ch;
    int state;
    Vector attributes;
    int code;

    done = false;
    state = 0;
    code = 0;
    attributes = new Vector();
    // <%xyz%>
    // 012223d
    // <%=xyz%>
    // 0122223d
    // <%@xyz%d
    // 0122223d
    while (!done) {
      ch = mPage.getCharacter(mCursor);
      switch (state) {
        case 0: // prior to the percent
          switch (ch) {
            case '%': // <%
              state = 1;
              break;
              // case Page.EOF: // <\0
              // case '>': // <>
            default:
              done = true;
              break;
          }
          break;
        case 1: // prior to the optional qualifier
          switch (ch) {
            case Page.EOF: // <%\0
            case '>': // <%>
              done = true;
              break;
            case '=': // <%=
            case '@': // <%@
              code = mCursor.getPosition();
              attributes.addElement(new PageAttribute(mPage, start + 1, code, -1, -1, (char) 0));
              state = 2;
              break;
            default: // <%x
              code = mCursor.getPosition() - 1;
              attributes.addElement(new PageAttribute(mPage, start + 1, code, -1, -1, (char) 0));
              state = 2;
              break;
          }
          break;
        case 2: // prior to the closing percent
          switch (ch) {
            case Page.EOF: // <%x\0
            case '>': // <%x>
              done = true;
              break;
            case '\'':
            case '"': // <%???"
              state = ch;
              break;
            case '%': // <%???%
              state = 3;
              break;
            case '/': // // or /*
              ch = mPage.getCharacter(mCursor);
              if (ch == '/') { // find the \n or \r
                while (true) {
                  ch = mPage.getCharacter(mCursor);
                  if (ch == Page.EOF) {
                    done = true;
                    break;
                  } else if (ch == '\n' || ch == '\r') {
                    break;
                  }
                }
              } else if (ch == '*') {
                do {
                  do ch = mPage.getCharacter(mCursor);
                  while ((Page.EOF != ch) && ('*' != ch));
                  ch = mPage.getCharacter(mCursor);
                  if (ch == '*') mPage.ungetCharacter(mCursor);
                } while ((Page.EOF != ch) && ('/' != ch));
              } else mPage.ungetCharacter(mCursor);
              break;
            default: // <%???x
              break;
          }
          break;
        case 3:
          switch (ch) {
            case Page.EOF: // <%x??%\0
              done = true;
              break;
            case '>':
              state = 4;
              done = true;
              break;
            default: // <%???%x
              state = 2;
              break;
          }
          break;
        case '"':
          switch (ch) {
            case Page.EOF: // <%x??"\0
              done = true;
              break;
            case '"':
              state = 2;
              break;
            default: // <%???'??x
              break;
          }
          break;
        case '\'':
          switch (ch) {
            case Page.EOF: // <%x??'\0
              done = true;
              break;
            case '\'':
              state = 2;
              break;
            default: // <%???"??x
              break;
          }
          break;
        default:
          throw new IllegalStateException("how the f**k did we get in state " + state);
      }
    }

    if (4 == state) // normal exit
    {
      if (0 != code) {
        state = mCursor.getPosition() - 2; // reuse state
        attributes.addElement(new PageAttribute(mPage, code, state, -1, -1, (char) 0));
        attributes.addElement(new PageAttribute(mPage, state, state + 1, -1, -1, (char) 0));
      } else throw new IllegalStateException("jsp with no code!");
    } else return (parseString(start, true)); // hmmm, true?

    return (makeTag(start, mCursor.getPosition(), attributes));
  }