Пример #1
0
    /** Scans the XPointer Expression */
    private boolean scanExpr(
        SymbolTable symbolTable, Tokens tokens, String data, int currentOffset, int endOffset)
        throws XNIException {

      int ch;
      int openParen = 0;
      int closeParen = 0;
      int nameOffset, dataOffset;
      boolean isQName = false;
      String name = null;
      String prefix = null;
      String schemeData = null;
      StringBuffer schemeDataBuff = new StringBuffer();

      while (true) {

        if (currentOffset == endOffset) {
          break;
        }
        ch = data.charAt(currentOffset);

        //
        while (ch == ' ' || ch == 0x0A || ch == 0x09 || ch == 0x0D) {
          if (++currentOffset == endOffset) {
            break;
          }
          ch = data.charAt(currentOffset);
        }
        if (currentOffset == endOffset) {
          break;
        }

        //
        // [1]    Pointer      ::=    Shorthand | SchemeBased
        // [2]    Shorthand    ::=    NCName
        // [3]    SchemeBased  ::=    PointerPart (S? PointerPart)*
        // [4]    PointerPart  ::=    SchemeName '(' SchemeData ')'
        // [5]    SchemeName   ::=    QName
        // [6]    SchemeData   ::=    EscapedData*
        // [7]    EscapedData  ::=    NormalChar | '^(' | '^)' | '^^' | '(' SchemeData ')'
        // [8]    NormalChar   ::=    UnicodeChar - [()^]
        // [9]    UnicodeChar  ::=    [#x0-#x10FFFF]
        // [?]    QName        ::=    (NCName ':')? NCName
        // [?]    NCName       ::=    (Letter | '_') (NCNameChar)*
        // [?]    NCNameChar   ::=    Letter | Digit | '.' | '-' | '_'  (ascii subset of
        // 'NCNameChar')
        // [?]    Letter       ::=    [A-Za-z]                              (ascii subset of
        // 'Letter')
        // [?]    Digit        ::=    [0-9]                                  (ascii subset of
        // 'Digit')
        //
        byte chartype = (ch >= 0x80) ? CHARTYPE_NONASCII : fASCIICharMap[ch];

        switch (chartype) {
          case CHARTYPE_OPEN_PAREN: // '('
            addToken(tokens, Tokens.XPTRTOKEN_OPEN_PAREN);
            openParen++;
            ++currentOffset;
            break;

          case CHARTYPE_CLOSE_PAREN: // ')'
            addToken(tokens, Tokens.XPTRTOKEN_CLOSE_PAREN);
            closeParen++;
            ++currentOffset;
            break;

          case CHARTYPE_CARRET:
          case CHARTYPE_COLON:
          case CHARTYPE_DIGIT:
          case CHARTYPE_EQUAL:
          case CHARTYPE_LETTER:
          case CHARTYPE_MINUS:
          case CHARTYPE_NONASCII:
          case CHARTYPE_OTHER:
          case CHARTYPE_PERIOD:
          case CHARTYPE_SLASH:
          case CHARTYPE_UNDERSCORE:
          case CHARTYPE_WHITESPACE:
            // Scanning SchemeName | Shorthand
            if (openParen == 0) {
              nameOffset = currentOffset;
              currentOffset = scanNCName(data, endOffset, currentOffset);

              if (currentOffset == nameOffset) {
                reportError("InvalidShortHandPointer", new Object[] {data});
                return false;
              }

              if (currentOffset < endOffset) {
                ch = data.charAt(currentOffset);
              } else {
                ch = -1;
              }

              name = symbolTable.addSymbol(data.substring(nameOffset, currentOffset));
              prefix = XMLSymbols.EMPTY_STRING;

              // The name is a QName => a SchemeName
              if (ch == ':') {
                if (++currentOffset == endOffset) {
                  return false;
                }

                ch = data.charAt(currentOffset);
                prefix = name;
                nameOffset = currentOffset;
                currentOffset = scanNCName(data, endOffset, currentOffset);

                if (currentOffset == nameOffset) {
                  return false;
                }

                if (currentOffset < endOffset) {
                  ch = data.charAt(currentOffset);
                } else {
                  ch = -1;
                }

                isQName = true;
                name = symbolTable.addSymbol(data.substring(nameOffset, currentOffset));
              }

              // REVISIT:
              if (currentOffset != endOffset) {
                addToken(tokens, Tokens.XPTRTOKEN_SCHEMENAME);
                tokens.addToken(prefix);
                tokens.addToken(name);
                isQName = false;
              } else if (currentOffset == endOffset) {
                // NCName => Shorthand
                addToken(tokens, Tokens.XPTRTOKEN_SHORTHAND);
                tokens.addToken(name);
                isQName = false;
              }

              // reset open/close paren for the next pointer part
              closeParen = 0;

              break;

            } else if (openParen > 0 && closeParen == 0 && name != null) {
              // Scanning SchemeData
              dataOffset = currentOffset;
              currentOffset = scanData(data, schemeDataBuff, endOffset, currentOffset);

              if (currentOffset == dataOffset) {
                reportError("InvalidSchemeDataInXPointer", new Object[] {data});
                return false;
              }

              if (currentOffset < endOffset) {
                ch = data.charAt(currentOffset);
              } else {
                ch = -1;
              }

              schemeData = symbolTable.addSymbol(schemeDataBuff.toString());
              addToken(tokens, Tokens.XPTRTOKEN_SCHEMEDATA);
              tokens.addToken(schemeData);

              // reset open/close paren for the next pointer part
              openParen = 0;
              schemeDataBuff.delete(0, schemeDataBuff.length());

            } else {
              // ex. schemeName()
              // Should we throw an exception with a more suitable message instead??
              return false;
            }
        }
      } // end while
      return true;
    }
Пример #2
0
 /**
  * This method adds the specified token to the token list. By default, this method allows all
  * tokens. However, subclasses of the XPathExprScanner can override this method in order to
  * disallow certain tokens from being used in the scanned XPath expression. This is a convenient
  * way of allowing only a subset of XPath.
  */
 protected void addToken(Tokens tokens, int token) throws XNIException {
   tokens.addToken(token);
 } // addToken(int)
    /**
     * {@inheritDoc}
     *
     * @see org.modeshape.common.text.TokenStream.Tokenizer#tokenize(CharacterStream, Tokens)
     */
    @Override
    public void tokenize(CharacterStream input, Tokens tokens) throws ParsingException {
      int startIndex;
      int endIndex;
      while (input.hasNext()) {
        char c = input.next();
        switch (c) {
          case ' ':
          case '\t':
          case '\n':
          case '\r':
            // Just skip these whitespace characters ...
            break;
            // ==============================================================================================
            // DDL Comments token = "--"
            // ==============================================================================================
          case '-':
            {
              startIndex = input.index();
              Position startPosition = input.position(startIndex);
              if (input.isNext('-')) {
                // -- END OF LINE comment ...
                boolean foundLineTerminator = false;
                while (input.hasNext()) {
                  c = input.next();
                  if (c == '\n' || c == '\r') {
                    foundLineTerminator = true;
                    break;
                  }
                }
                endIndex = input.index(); // the token won't include the '\n' or '\r' character(s)
                if (!foundLineTerminator) ++endIndex; // must point beyond last char
                if (c == '\r' && input.isNext('\n')) input.next();

                // Check for PARSER_ID

                if (useComments) {
                  tokens.addToken(startPosition, startIndex, endIndex, COMMENT);
                }

              } else {
                // just a regular dash ...
                tokens.addToken(startPosition, startIndex, startIndex + 1, SYMBOL);
              }
              break;
            }
            // ==============================================================================================
          case '(':
          case ')':
          case '{':
          case '}':
          case '*':
          case ',':
          case ';':
          case '+':
          case '%':
          case '?':
          case '[':
          case ']':
          case '!':
          case '<':
          case '>':
          case '|':
          case '=':
          case ':':
            tokens.addToken(
                input.position(input.index()), input.index(), input.index() + 1, SYMBOL);
            break;
          case '.':
            tokens.addToken(
                input.position(input.index()), input.index(), input.index() + 1, DECIMAL);
            break;
          case '\"':
            startIndex = input.index();
            Position startingPosition = input.position(startIndex);
            boolean foundClosingQuote = false;
            while (input.hasNext()) {
              c = input.next();
              if ((c == '\\' || c == '"') && input.isNext('"')) {
                c = input.next(); // consume the ' character since it is escaped
              } else if (c == '"') {
                foundClosingQuote = true;
                break;
              }
            }
            if (!foundClosingQuote) {
              String msg =
                  CommonI18n.noMatchingDoubleQuoteFound.text(
                      startingPosition.getLine(), startingPosition.getColumn());
              throw new ParsingException(startingPosition, msg);
            }
            endIndex = input.index() + 1; // beyond last character read
            tokens.addToken(startingPosition, startIndex, endIndex, DOUBLE_QUOTED_STRING);
            break;
          case '\u2019': // '’':
          case '\'':
            char quoteChar = c;
            startIndex = input.index();
            startingPosition = input.position(startIndex);
            foundClosingQuote = false;
            while (input.hasNext()) {
              c = input.next();
              if ((c == '\\' || c == quoteChar) && input.isNext(quoteChar)) {
                c = input.next(); // consume the ' character since it is escaped
              } else if (c == quoteChar) {
                foundClosingQuote = true;
                break;
              }
            }
            if (!foundClosingQuote) {
              String msg =
                  CommonI18n.noMatchingSingleQuoteFound.text(
                      startingPosition.getLine(), startingPosition.getColumn());
              throw new ParsingException(startingPosition, msg);
            }
            endIndex = input.index() + 1; // beyond last character read
            tokens.addToken(startingPosition, startIndex, endIndex, SINGLE_QUOTED_STRING);
            break;
          case '/':
            startIndex = input.index();
            startingPosition = input.position(startIndex);
            if (input.isNext('/')) {
              // End-of-line comment ...
              boolean foundLineTerminator = false;
              while (input.hasNext()) {
                c = input.next();
                if (c == '\n' || c == '\r') {
                  foundLineTerminator = true;
                  break;
                }
              }
              endIndex = input.index(); // the token won't include the '\n' or '\r' character(s)
              if (!foundLineTerminator) ++endIndex; // must point beyond last char
              if (c == '\r' && input.isNext('\n')) input.next();
              if (useComments) {
                tokens.addToken(startingPosition, startIndex, endIndex, COMMENT);
              }

            } else if (input.isNext('*')) {
              // Multi-line comment ...
              while (input.hasNext() && !input.isNext('*', '/')) {
                c = input.next();
              }
              if (input.hasNext()) input.next(); // consume the '*'
              if (input.hasNext()) input.next(); // consume the '/'

              endIndex = input.index() + 1; // the token will include the '/' and '*' characters
              if (useComments) {
                tokens.addToken(startingPosition, startIndex, endIndex, COMMENT);
              }

            } else {
              // just a regular slash ...
              tokens.addToken(startingPosition, startIndex, startIndex + 1, SYMBOL);
            }
            break;
          default:
            startIndex = input.index();
            Position startPosition = input.position(startIndex);
            // Read until another whitespace/symbol/decimal/slash is found
            while (input.hasNext()
                && !(input.isNextWhitespace() || input.isNextAnyOf("/.-(){}*,;+%?[]!<>|=:"))) {
              c = input.next();
            }
            endIndex = input.index() + 1; // beyond last character that was included
            tokens.addToken(startPosition, startIndex, endIndex, WORD);
        }
      }
    }