Exemple #1
0
 public Token(int tokenClass, String lexeme) {
   // InputBuffer's escapeString handles all escapes in a string, including u's
   // RegExp literals should not escape the u's either
   if (tokenClass != STRINGLITERAL_TOKEN
       && lexeme.indexOf("\\u") != -1) // contains a unicode escape char?
   {
     StringBuffer buffer = new StringBuffer();
     int len = lexeme.length();
     for (int x = 0; x < len; x++) {
       if (x + 2 < len && lexeme.charAt(x) == '\\' && lexeme.charAt(x + 1) == 'u') {
         if (x != 0 && lexeme.charAt(x - 1) == '\\') // watch out for '\\u'
         {
           buffer.append(lexeme.charAt(x));
           continue;
         }
         int thisChar = 0;
         int y, digit;
         // calculate numeric value, bail if invalid
         for (y = x + 2; y < x + 6 && y < len; y++) {
           digit = Character.digit(lexeme.charAt(y), 16);
           if (digit == -1) break;
           thisChar = (thisChar << 4) + digit;
         }
         if (y != x + 6
             || Character.isDefined((char) thisChar)
                 == false) // if there was a problem or the char is invalid just escape the '\''u'
         // with 'u'
         {
           buffer.append(lexeme.charAt(++x));
         } else // use Character class to convert unicode codePoint into a char ( note, this will
         // handle a wider set of unicode codepoints than the c++ impl does).
         {
           // jdk 1.5.2 only, but handles extended chars:  char[] ca = Character.toChars(thisChar);
           char c = (char) thisChar;
           buffer.append(c);
           x += 5;
         }
       } else {
         buffer.append(lexeme.charAt(x));
       }
     }
     this.tokenClass = tokenClass;
     this.lexeme = buffer.toString();
   } else {
     this.tokenClass = tokenClass;
     this.lexeme = lexeme;
   }
 }
Exemple #2
0
  public int nexttoken(boolean resetState) {
    String doctagname = "description";
    StringBuilder doctextbuf = null;
    int startofxml = pos();
    StringBuilder blockcommentbuf = null;
    char regexp_flags =
        0; // used to track option flags encountered in a regexp expression.  Initialized in
           // regexp_state
    boolean maybe_reserved = false;
    char c = 0;

    if (resetState) {
      isFirstTokenOnLine = false;
    }

    while (true) {
      if (debug) {
        System.out.println("state = " + state + ", next = " + pos());
      }

      switch (state) {
        case start_state:
          {
            c = nextchar();
            mark();

            switch (c) {
              case 'a':
              case 'b':
              case 'c':
              case 'd':
              case 'e':
              case 'f':
              case 'g':
              case 'h':
              case 'i':
              case 'j':
              case 'k':
              case 'l':
              case 'm':
              case 'n':
              case 'o':
              case 'p':
              case 'q':
              case 'r':
              case 's':
              case 't':
              case 'u':
              case 'v':
              case 'w':
              case 'x':
              case 'y':
              case 'z':
                maybe_reserved = true;
              case 'A':
              case 'B':
              case 'C':
              case 'D':
              case 'E':
              case 'F':
              case 'G':
              case 'H':
              case 'I':
              case 'J':
              case 'K':
              case 'L':
              case 'M':
              case 'N':
              case 'O':
              case 'P':
              case 'Q':
              case 'R':
              case 'S':
              case 'T':
              case 'U':
              case 'V':
              case 'W':
              case 'X':
              case 'Y':
              case 'Z':
              case '_':
              case '$':
                state = A_state;
                continue;

              case 0xffef: // could not have worked...case 0xffffffef: // ??? not in Character type
                           // range ???
                if (nextchar() == 0xffffffbb && nextchar() == 0xffffffbf) {
                  // ISSUE: set encoding scheme to utf-8, and implement support for utf8
                  state = start_state;
                } else {
                  state = error_state;
                }
                continue;

              case '@':
                return makeToken(ATSIGN_TOKEN);

              case '\'':
              case '\"':
                {
                  char startquote = (char) c;
                  boolean needs_escape = false;

                  while ((c = nextchar()) != startquote) {
                    if (c == '\\') {
                      needs_escape = true;
                      c = nextchar();

                      // special case: escaped eol strips crlf or lf

                      if (c == '\r') c = nextchar();
                      if (c == '\n') continue;
                    } else if (c == '\r' || c == '\n') {
                      if (startquote == '\'')
                        error(kError_Lexical_LineTerminatorInSingleQuotedStringLiteral);
                      else error(kError_Lexical_LineTerminatorInDoubleQuotedStringLiteral);
                      break;
                    } else if (c == 0) {
                      error(kError_Lexical_EndOfStreamInStringLiteral);
                      return makeToken(EOS_TOKEN);
                    }
                  }
                  return makeToken(
                      STRINGLITERAL_TOKEN, input.copyReplaceStringEscapes(needs_escape));
                }

              case '-': // tokens: -- -= -
                switch (nextchar()) {
                  case '-':
                    return makeToken(MINUSMINUS_TOKEN);
                  case '=':
                    return makeToken(MINUSASSIGN_TOKEN);
                  default:
                    retract();
                    return makeToken(MINUS_TOKEN);
                }

              case '!': // tokens: ! != !===
                if (nextchar() == '=') {
                  if (nextchar() == '=') return makeToken(STRICTNOTEQUALS_TOKEN);
                  retract();
                  return makeToken(NOTEQUALS_TOKEN);
                }
                retract();
                return makeToken(NOT_TOKEN);

              case '%': // tokens: % %=
                switch (nextchar()) {
                  case '=':
                    return makeToken(MODULUSASSIGN_TOKEN);
                  default:
                    retract();
                    return makeToken(MODULUS_TOKEN);
                }

              case '&': // tokens: & &= && &&=
                c = nextchar();
                if (c == '=') return makeToken(BITWISEANDASSIGN_TOKEN);
                if (c == '&') {
                  if (nextchar() == '=') return makeToken(LOGICALANDASSIGN_TOKEN);
                  retract();
                  return makeToken(LOGICALAND_TOKEN);
                }
                retract();
                return makeToken(BITWISEAND_TOKEN);

              case '#': // # is short for use
                if (HAS_HASHPRAGMAS) {
                  return makeToken(USE_TOKEN);
                }
                state = error_state;
                continue;

              case '(':
                return makeToken(LEFTPAREN_TOKEN);

              case ')':
                return makeToken(RIGHTPAREN_TOKEN);

              case '*': // tokens: *=  *
                if (nextchar() == '=') return makeToken(MULTASSIGN_TOKEN);
                retract();
                return makeToken(MULT_TOKEN);

              case ',':
                return makeToken(COMMA_TOKEN);

              case '.':
                state = dot_state;
                continue;

              case '/':
                state = slash_state;
                continue;

              case ':': // tokens: : ::
                if (nextchar() == ':') {
                  return makeToken(DOUBLECOLON_TOKEN);
                }
                retract();
                return makeToken(COLON_TOKEN);

              case ';':
                return makeToken(SEMICOLON_TOKEN);

              case '?':
                return makeToken(QUESTIONMARK_TOKEN);

              case '[':
                return makeToken(LEFTBRACKET_TOKEN);

              case ']':
                return makeToken(RIGHTBRACKET_TOKEN);

              case '^': // tokens: ^=  ^
                if (nextchar() == '=') return makeToken(BITWISEXORASSIGN_TOKEN);
                retract();
                return makeToken(BITWISEXOR_TOKEN);

              case '{':
                return makeToken(LEFTBRACE_TOKEN);

              case '|': // tokens: | |= || ||=
                c = nextchar();
                if (c == '=') return makeToken(BITWISEORASSIGN_TOKEN);
                if (c == '|') {
                  if (nextchar() == '=') return makeToken(LOGICALORASSIGN_TOKEN);
                  retract();
                  return makeToken(LOGICALOR_TOKEN);
                }
                retract();
                return makeToken(BITWISEOR_TOKEN);

              case '}':
                return makeToken(RIGHTBRACE_TOKEN);

              case '~':
                return makeToken(BITWISENOT_TOKEN);

              case '+': // tokens: ++ += +
                c = nextchar();
                if (c == '+') return makeToken(PLUSPLUS_TOKEN);
                if (c == '=') return makeToken(PLUSASSIGN_TOKEN);
                retract();
                return makeToken(PLUS_TOKEN);

              case '<':
                switch (nextchar()) {
                  case '<': // tokens: << <<=
                    if (nextchar() == '=') return makeToken(LEFTSHIFTASSIGN_TOKEN);
                    retract();
                    return makeToken(LEFTSHIFT_TOKEN);

                  case '=':
                    return makeToken(LESSTHANOREQUALS_TOKEN);

                  case '/':
                    return makeToken(XMLTAGSTARTEND_TOKEN);
                  case '!':
                    state = xmlcommentorcdatastart_state;
                    continue;
                  case '?':
                    state = xmlpi_state;
                    continue;
                }
                retract();
                return makeToken(LESSTHAN_TOKEN);

              case '=': // tokens: === == =
                if (nextchar() == '=') {
                  if (nextchar() == '=') return makeToken(STRICTEQUALS_TOKEN);
                  retract();
                  return makeToken(EQUALS_TOKEN);
                }
                retract();
                return makeToken(ASSIGN_TOKEN);

              case '>': // tokens: > >= >> >>= >>> >>>=
                state = start_state;

                switch (nextchar()) {
                  case '>':
                    switch (nextchar()) {
                      case '>':
                        if (nextchar() == '=') return makeToken(UNSIGNEDRIGHTSHIFTASSIGN_TOKEN);
                        retract();
                        return makeToken(UNSIGNEDRIGHTSHIFT_TOKEN);
                      case '=':
                        return makeToken(RIGHTSHIFTASSIGN_TOKEN);
                      default:
                        retract();
                        return makeToken(RIGHTSHIFT_TOKEN);
                    }

                  case '=':
                    return makeToken(GREATERTHANOREQUALS_TOKEN);
                }
                retract();
                return makeToken(GREATERTHAN_TOKEN);

              case '0':
                state = zero_state;
                continue;

              case '1':
              case '2':
              case '3':
              case '4':
              case '5':
              case '6':
              case '7':
              case '8':
              case '9':
                state = decimalinteger_state;
                continue;

              case ' ': // ascii range white space
              case '\t':
              case 0x000b:
              case 0x000c:
              case 0x0085:
              case 0x00a0:
                continue;

              case '\n': // ascii line terminators.
              case '\r':
                isFirstTokenOnLine = true;
                continue;

              case 0:
                return makeToken(EOS_TOKEN);

              default:
                switch (input.nextcharClass((char) c, true)) {
                  case Lu:
                  case Ll:
                  case Lt:
                  case Lm:
                  case Lo:
                  case Nl:
                    maybe_reserved = false;
                    state = A_state;
                    continue;

                  case Zs: // unicode whitespace and control-characters
                  case Cc:
                  case Cf:
                    continue;

                  case Zl: // unicode line terminators
                  case Zp:
                    isFirstTokenOnLine = true;
                    continue;

                  default:
                    state = error_state;
                    continue;
                }
            }
          }

          /*
           * prefix: <letter>
           */

        case A_state:
          {
            boolean needs_escape =
                c == '\\'; // ??? really should only be true if the word started with a backslash

            while (true) {
              c = nextchar();
              if (c >= 'a' && c <= 'z') {
                continue;
              }
              if ((c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '$' || c == '_') {
                maybe_reserved = false;
                continue;
              }
              if (c <= 0x7f) // in ascii range & mostly not a valid char
              {
                if (c == '\\') {
                  needs_escape =
                      true; // close enough, we just want to minimize rescans for unicode escapes
                } else {
                  retract();
                  break;
                }
              }

              // else outside ascii range (or an escape sequence )

              switch (input.nextcharClass(c, false)) {
                case Lu:
                case Ll:
                case Lt:
                case Lm:
                case Lo:
                case Nl:
                case Mn:
                case Mc:
                case Nd:
                case Pc:
                  maybe_reserved = false;
                  input.nextcharClass(c, true); // advance input cursor
                  continue;
              }

              retract();
              break;
            }

            state = start_state;
            String s = input.copyReplaceUnicodeEscapes(needs_escape);
            if (maybe_reserved) {
              Integer i = reservedWord.get(s);
              if (i != null) return makeToken((int) i);
            }
            return makeToken(IDENTIFIER_TOKEN, s);
          }

          /*
           * prefix: 0
           * accepts: 0x... | 0X... | 01... | 0... | 0
           */

        case zero_state:
          switch (nextchar()) {
            case 'x':
            case 'X':
              switch (nextchar()) {
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                case 'a':
                case 'b':
                case 'c':
                case 'd':
                case 'e':
                case 'f':
                case 'A':
                case 'B':
                case 'C':
                case 'D':
                case 'E':
                case 'F':
                  state = hexinteger_state;
                  break;
                default:
                  state = start_state;
                  error(kError_Lexical_General);
              }
              continue;

            case '.':
              state = decimal_state;
              continue;

            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
              state = decimalinteger_state;
              continue;
            case 'E':
            case 'e':
              state = exponentstart_state;
              continue;
            case 'd':
            case 'm':
            case 'i':
            case 'u':
              if (!ctx.statics.es4_numerics) retract();
              state = start_state;
              return makeToken(NUMBERLITERAL_TOKEN, input.copy());
            default:
              retract();
              state = start_state;
              return makeToken(NUMBERLITERAL_TOKEN, input.copy());
          }

          /*
           * prefix: 0x<hex digits>
           * accepts: 0x123f
           */

        case hexinteger_state:
          switch (nextchar()) {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
            case 'a':
            case 'b':
            case 'c':
            case 'd':
            case 'e':
            case 'f':
            case 'A':
            case 'B':
            case 'C':
            case 'D':
            case 'E':
            case 'F':
              state = hexinteger_state;
              continue;
            case 'u':
            case 'i':
              if (!ctx.statics.es4_numerics) retract();
              state = start_state;
              return makeToken(NUMBERLITERAL_TOKEN, input.copy());
            default:
              retract();
              state = start_state;
              return makeToken(NUMBERLITERAL_TOKEN, input.copy());
          }

          /*
           * prefix: .
           * accepts: .123 | .
           */

        case dot_state:
          switch (nextchar()) {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
              state = decimal_state;
              continue;

            case '.':
              state = start_state;
              if (nextchar() == '.') return makeToken(TRIPLEDOT_TOKEN);
              retract();
              return makeToken(DOUBLEDOT_TOKEN);

            case '<':
              state = start_state;
              return makeToken(DOTLESSTHAN_TOKEN);

            default:
              retract();
              state = start_state;
              return makeToken(DOT_TOKEN);
          }

          /*
           * prefix: N
           * accepts: 0.123 | 1.23 | 123 | 1e23 | 1e-23
           */

        case decimalinteger_state:
          switch (nextchar()) {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
              state = decimalinteger_state;
              continue;
            case '.':
              state = decimal_state;
              continue;
            case 'd':
            case 'm':
            case 'u':
            case 'i':
              if (!ctx.statics.es4_numerics) retract();
              state = start_state;
              return makeToken(NUMBERLITERAL_TOKEN, input.copy());
            case 'E':
            case 'e':
              state = exponentstart_state;
              continue;
            default:
              retract();
              state = start_state;
              return makeToken(NUMBERLITERAL_TOKEN, input.copy());
          }

          /*
           * prefix: N.
           * accepts: 0.1 | 1e23 | 1e-23
           */

        case decimal_state:
          switch (nextchar()) {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
              state = decimal_state;
              continue;
            case 'd':
            case 'm':
              if (!ctx.statics.es4_numerics) retract();
              state = start_state;
              return makeToken(NUMBERLITERAL_TOKEN, input.copy());
            case 'E':
            case 'e':
              state = exponentstart_state;
              continue;
            default:
              retract();
              state = start_state;
              return makeToken(NUMBERLITERAL_TOKEN, input.copy());
          }

          /*
           * prefix: ..e
           * accepts: ..eN | ..e+N | ..e-N
           */

        case exponentstart_state:
          switch (nextchar()) {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
            case '+':
            case '-':
              state = exponent_state;
              continue;
            default:
              error(kError_Lexical_General);
              state = start_state;
              continue;
              // Issue: needs specific error here.
          }

          /*
           * prefix: ..e
           * accepts: ..eN | ..e+N | ..e-N
           */

        case exponent_state:
          switch (nextchar()) {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
              state = exponent_state;
              continue;
            case 'd':
            case 'm':
              if (!ctx.statics.es4_numerics) retract();
              state = start_state;
              return makeToken(NUMBERLITERAL_TOKEN, input.copy());
            default:
              retract();
              state = start_state;
              return makeToken(NUMBERLITERAL_TOKEN, input.copy());
          }

          /*
           * prefix: /
           */

        case slash_state:
          {
            c = nextchar();

            switch (c) {
              case '/': // line comment
                state = start_state;
                line_comment:
                while ((c = nextchar()) != 0) {
                  if (c == '\r' || c == '\n') {
                    isFirstTokenOnLine = true;
                    if (save_comments == false) {
                      break line_comment;
                    }
                    retract(); // don't include newline in line comment. (Sec 7.3)
                    return makeCommentToken(
                        SLASHSLASHCOMMENT_TOKEN, input.copyReplaceUnicodeEscapes());
                  }
                }
                continue;

              case '*':
                if (save_comments == false) {
                  block_comment:
                  while ((c = nextchar()) != 0) {
                    if (c == '\r' || c == '\n') isFirstTokenOnLine = true;

                    if (c == '*') {
                      c = nextchar();
                      if (c == '/') {
                        break block_comment;
                      }
                      retract();
                    }
                  }
                  state = start_state;
                } else {
                  if (blockcommentbuf == null) blockcommentbuf = new StringBuilder();
                  blockcommentbuf.append("/*");
                  state = blockcommentstart_state;
                }
                continue;

              case '>':
                if (inXML > 0) // ignore this if outside an XML context
                {
                  state = start_state;
                  return makeToken(XMLTAGENDEND_TOKEN);
                }
                // FALL THROUGH
              default:
                // If the last token read is any of these, then the '/' must start a div or
                // div_assign...

                int lb = currentToken.lookback;

                if (lb == IDENTIFIER_TOKEN
                    || lb == NUMBERLITERAL_TOKEN
                    || lb == RIGHTPAREN_TOKEN
                    || lb == RIGHTBRACE_TOKEN
                    || lb == RIGHTBRACKET_TOKEN) {
                  /*
                   * tokens: /= /
                   */

                  state = start_state;
                  if (c == '=') return makeToken(DIVASSIGN_TOKEN);
                  retract();
                  return makeToken(DIV_TOKEN);
                }
                state = slashregexp_state;
                retract();
                continue;
            }
          }

          /*
           * tokens: /<regexpbody>/<regexpflags>
           */

        case slashregexp_state:
          switch (nextchar()) {
            case '\\':
              nextchar();
              continue;
            case '/':
              regexp_flags = 0;
              state = regexp_state;
              continue;
            case 0:
            case '\n':
            case '\r':
              error(kError_Lexical_General);
              state = start_state;
              continue;
            default:
              state = slashregexp_state;
              continue;
          }

          /*
           * tokens: g | i | m | s | x  .  Note that s and x are custom extentions to match perl's functionality
           *   Also note we handle this via an array of boolean flags intead of state change logic.
           *   (5,1) + (5,2) + (5,3) + (5,4) + (5,5) is just too many states to handle this via state logic
           */

        case regexp_state:
          c = nextchar();
          switch (c) {
            case 'g':
              if ((regexp_flags & 0x01) == 0) {
                regexp_flags |= 0x01;
                continue;
              }
              error(kError_Lexical_General);
              state = start_state;
              continue;

            case 'i':
              if ((regexp_flags & 0x02) == 0) {
                regexp_flags |= 0x02;
                continue;
              }
              error(kError_Lexical_General);
              state = start_state;
              continue;

            case 'm':
              if ((regexp_flags & 0x04) == 0) {
                regexp_flags |= 0x04;
                continue;
              }
              error(kError_Lexical_General);
              state = start_state;
              continue;

            case 's':
              if ((regexp_flags & 0x08) == 0) {
                regexp_flags |= 0x08;
                continue;
              }
              error(kError_Lexical_General);
              state = start_state;
              continue;

            case 'x':
              if ((regexp_flags & 0x10) == 0) {
                regexp_flags |= 0x10;
                continue;
              }
              error(kError_Lexical_General);
              state = start_state;
              continue;

            default:
              if (Character.isJavaIdentifierPart(c)) {
                error(kError_Lexical_General);
                state = start_state;
                continue;
              }
              retract();
              state = start_state;
              return makeToken(REGEXPLITERAL_TOKEN, input.copyReplaceUnicodeEscapes());
          }

          /*
           * prefix: <!
           */

        case xmlcommentorcdatastart_state:
          switch (nextchar()) {
            case '[':
              if (nextchar() == 'C'
                  && nextchar() == 'D'
                  && nextchar() == 'A'
                  && nextchar() == 'T'
                  && nextchar() == 'A'
                  && nextchar() == '[') {
                state = xmlcdata_state;
                continue;
              }
              break; // error

            case '-':
              if (nextchar() == '-') {
                state = xmlcomment_state;
                continue;
              }
          }
          error(kError_Lexical_General);
          state = start_state;
          continue;

        case xmlcdata_state:
          switch (nextchar()) {
            case ']':
              if (nextchar() == ']' && nextchar() == '>') {
                state = start_state;
                return makeToken(
                    XMLMARKUP_TOKEN, input.substringReplaceUnicodeEscapes(startofxml, pos()));
              }
              continue;

            case 0:
              error(kError_Lexical_General);
              state = start_state;
          }
          continue;

        case xmlcomment_state:
          while ((c = nextchar()) != '-' && c != 0) ;

          if (c == '-' && nextchar() != '-') {
            continue;
          }

          // got -- if next is > ok else error

          if (nextchar() == '>') {
            state = start_state;
            return makeToken(
                XMLMARKUP_TOKEN, input.substringReplaceUnicodeEscapes(startofxml, pos()));
          }

          error(kError_Lexical_General);
          state = start_state;
          continue;

        case xmlpi_state:
          while ((c = nextchar()) != '?' && c != 0) ;

          if (c == '?' && nextchar() == '>') {
            state = start_state;
            return makeToken(
                XMLMARKUP_TOKEN, input.substringReplaceUnicodeEscapes(startofxml, pos()));
          }

          if (c == 0) {
            error(kError_Lexical_General);
            state = start_state;
          }
          continue;

        case xmltext_state:
          {
            switch (nextchar()) {
              case '<':
              case '{':
                {
                  retract();
                  String xmltext = input.substringReplaceUnicodeEscapes(startofxml, pos());
                  if (xmltext != null) {
                    state = start_state;
                    return makeToken(XMLTEXT_TOKEN, xmltext);
                  } else // if there is no leading text, then just return punctuation token to avoid
                         // empty text tokens
                  {
                    switch (nextchar()) {
                      case '<':
                        switch (nextchar()) {
                          case '/':
                            state = start_state;
                            return makeToken(XMLTAGSTARTEND_TOKEN);
                          case '!':
                            state = xmlcommentorcdatastart_state;
                            continue;
                          case '?':
                            state = xmlpi_state;
                            continue;
                          default:
                            retract();
                            state = start_state;
                            return makeToken(LESSTHAN_TOKEN);
                        }
                      case '{':
                        state = start_state;
                        return makeToken(LEFTBRACE_TOKEN);
                    }
                  }
                }
              case 0:
                state = start_state;
                return makeToken(EOS_TOKEN);
            }
            continue;
          }

        case xmlliteral_state:
          switch (nextchar()) {
            case '{': // return makeToken( XMLPART_TOKEN
              return makeToken(
                  XMLPART_TOKEN, input.substringReplaceUnicodeEscapes(startofxml, pos() - 1));

            case '<':
              if (nextchar() == '/') {
                --level;
                nextchar();
                mark();
                retract();
                state = endxmlname_state;
              } else {
                ++level;
                state = xmlliteral_state;
              }
              continue;

            case '/':
              if (nextchar() == '>') {
                --level;
                if (level == 0) {
                  state = start_state;
                  return makeToken(
                      XMLLITERAL_TOKEN,
                      input.substringReplaceUnicodeEscapes(startofxml, pos() + 1));
                }
              }
              continue;

            case 0:
              retract();
              error(kError_Lexical_NoMatchingTag);
              state = start_state;
              continue;

            default:
              continue;
          }

        case endxmlname_state:
          c = nextchar();
          if (Character.isJavaIdentifierPart(c) || c == ':') {
            continue;
          }

          switch (c) {
            case '{': // return makeToken( XMLPART_TOKEN
              {
                String xmltext = input.substringReplaceUnicodeEscapes(startofxml, pos() - 1);
                return makeToken(XMLPART_TOKEN, xmltext);
              }
            case '>':
              retract();
              nextchar();
              if (level == 0) {
                String xmltext = input.substringReplaceUnicodeEscapes(startofxml, pos() + 1);
                state = start_state;
                return makeToken(XMLLITERAL_TOKEN, xmltext);
              }
              state = xmlliteral_state;
              continue;

            default:
              state = xmlliteral_state;
              continue;
          }

          /*
           * prefix: /*
           */

        case blockcommentstart_state:
          {
            c = nextchar();
            blockcommentbuf.append(c);
            switch (c) {
              case '*':
                if (nextchar() == '/') {
                  state = start_state;
                  return makeCommentToken(BLOCKCOMMENT_TOKEN, new String());
                }
                retract();
                state = doccomment_state;
                continue;

              case 0:
                error(kError_BlockCommentNotTerminated);
                state = start_state;
                continue;

              case '\n':
              case '\r':
                isFirstTokenOnLine = true;
              default:
                state = blockcomment_state;
                continue;
            }
          }

          /*
           * prefix: /**
           */

        case doccomment_state:
          {
            c = nextchar();
            blockcommentbuf.append(c);
            switch (c) {
              case '*':
                state = doccommentstar_state;
                continue;

              case '@':
                if (doctextbuf == null) doctextbuf = getDocTextBuffer(doctagname);
                if (doctagname.length() > 0) {
                  doctextbuf.append("]]></").append(doctagname).append(">");
                }
                doctagname = "";
                state = doccommenttag_state;
                continue;

              case '\r':
              case '\n':
                isFirstTokenOnLine = true;
                if (doctextbuf == null) doctextbuf = getDocTextBuffer(doctagname);
                doctextbuf.append('\n');
                continue;

              case 0:
                error(kError_BlockCommentNotTerminated);
                state = start_state;
                continue;

              default:
                if (doctextbuf == null) doctextbuf = getDocTextBuffer(doctagname);
                doctextbuf.append((char) (c));
                continue;
            }
          }

        case doccommentstar_state:
          {
            c = nextchar();
            blockcommentbuf.append(c);
            switch (c) {
              case '/':
                {
                  if (doctextbuf == null) doctextbuf = getDocTextBuffer(doctagname);
                  if (doctagname.length() > 0) {
                    doctextbuf.append("]]></").append(doctagname).append(">");
                  }
                  String doctext =
                      doctextbuf.toString(); // ??? does this needs escape conversion ???
                  state = start_state;
                  return makeCommentToken(DOCCOMMENT_TOKEN, doctext);
                }

              case '*':
                continue;

              case 0:
                error(kError_BlockCommentNotTerminated);
                state = start_state;
                continue;

              default:
                state = doccomment_state;
                continue;
                // if not a slash, then keep looking for an end comment.
            }
          }

          /*
           * prefix: @
           */

        case doccommenttag_state:
          {
            c = nextchar();
            switch (c) {
              case '*':
                state = doccommentstar_state;
                continue;

              case ' ':
              case '\t':
              case '\r':
              case '\n':
                {
                  if (doctextbuf == null) doctextbuf = getDocTextBuffer(doctagname);

                  // skip extra whitespace --fixes bug on tag text parsing
                  // --but really, the problem is not here, it's in whatever reads asdoc output..
                  // --So if that gets fixed, feel free to delete the following.

                  while ((c = nextchar()) == ' ' || c == '\t') ;
                  retract();

                  if (doctagname.length() > 0) {
                    doctextbuf.append("\n<").append(doctagname).append("><![CDATA[");
                  }
                  state = doccomment_state;
                  continue;
                }

              case 0:
                error(kError_BlockCommentNotTerminated);
                state = start_state;
                continue;

              default:
                doctagname += (char) (c);
                continue;
            }
          }

          /*
           * prefix: /**
           */

        case doccommentvalue_state:
          switch (nextchar()) {
            case '*':
              state = doccommentstar_state;
              continue;

            case '@':
              state = doccommenttag_state;
              continue;

            case 0:
              error(kError_BlockCommentNotTerminated);
              state = start_state;
              continue;

            default:
              state = doccomment_state;
              continue;
          }

          /*
           * prefix: /*
           */

        case blockcomment_state:
          {
            c = nextchar();
            blockcommentbuf.append(c);
            switch (c) {
              case '*':
                c = nextchar();
                if (c == '/') {
                  state = start_state;
                  blockcommentbuf.append(c);
                  String blocktext = blockcommentbuf.toString(); // ??? needs escape conversion
                  return makeCommentToken(BLOCKCOMMENT_TOKEN, blocktext);
                }
                retract();
                break;

              case '\r':
              case '\n':
                isFirstTokenOnLine = true;
                break;

              case 0:
                error(kError_BlockCommentNotTerminated);
                state = start_state;
                break;
            }
            continue;
          }

          /*
           * skip error
           */

        case error_state:
          error(kError_Lexical_General);
          skiperror();
          state = start_state;
          continue;

        default:
          error("invalid scanner state");
          state = start_state;
          return makeToken(EOS_TOKEN);
      }
    }
  }