/** Scans the XPointer Expression */ private boolean scanExpr( SymbolTable symbolTable, Tokens tokens, String data, int currentOffset, int endOffset) throws XNIException { int ch; int openParen = 0; int closeParen = 0; int nameOffset, dataOffset; boolean isQName = false; String name = null; String prefix = null; String schemeData = null; StringBuffer schemeDataBuff = new StringBuffer(); while (true) { if (currentOffset == endOffset) { break; } ch = data.charAt(currentOffset); // while (ch == ' ' || ch == 0x0A || ch == 0x09 || ch == 0x0D) { if (++currentOffset == endOffset) { break; } ch = data.charAt(currentOffset); } if (currentOffset == endOffset) { break; } // // [1] Pointer ::= Shorthand | SchemeBased // [2] Shorthand ::= NCName // [3] SchemeBased ::= PointerPart (S? PointerPart)* // [4] PointerPart ::= SchemeName '(' SchemeData ')' // [5] SchemeName ::= QName // [6] SchemeData ::= EscapedData* // [7] EscapedData ::= NormalChar | '^(' | '^)' | '^^' | '(' SchemeData ')' // [8] NormalChar ::= UnicodeChar - [()^] // [9] UnicodeChar ::= [#x0-#x10FFFF] // [?] QName ::= (NCName ':')? NCName // [?] NCName ::= (Letter | '_') (NCNameChar)* // [?] NCNameChar ::= Letter | Digit | '.' | '-' | '_' (ascii subset of // 'NCNameChar') // [?] Letter ::= [A-Za-z] (ascii subset of // 'Letter') // [?] Digit ::= [0-9] (ascii subset of // 'Digit') // byte chartype = (ch >= 0x80) ? CHARTYPE_NONASCII : fASCIICharMap[ch]; switch (chartype) { case CHARTYPE_OPEN_PAREN: // '(' addToken(tokens, Tokens.XPTRTOKEN_OPEN_PAREN); openParen++; ++currentOffset; break; case CHARTYPE_CLOSE_PAREN: // ')' addToken(tokens, Tokens.XPTRTOKEN_CLOSE_PAREN); closeParen++; ++currentOffset; break; case CHARTYPE_CARRET: case CHARTYPE_COLON: case CHARTYPE_DIGIT: case CHARTYPE_EQUAL: case CHARTYPE_LETTER: case CHARTYPE_MINUS: case CHARTYPE_NONASCII: case CHARTYPE_OTHER: case CHARTYPE_PERIOD: case CHARTYPE_SLASH: case CHARTYPE_UNDERSCORE: case CHARTYPE_WHITESPACE: // Scanning SchemeName | Shorthand if (openParen == 0) { nameOffset = currentOffset; currentOffset = scanNCName(data, endOffset, currentOffset); if (currentOffset == nameOffset) { reportError("InvalidShortHandPointer", new Object[] {data}); return false; } if (currentOffset < endOffset) { ch = data.charAt(currentOffset); } else { ch = -1; } name = symbolTable.addSymbol(data.substring(nameOffset, currentOffset)); prefix = XMLSymbols.EMPTY_STRING; // The name is a QName => a SchemeName if (ch == ':') { if (++currentOffset == endOffset) { return false; } ch = data.charAt(currentOffset); prefix = name; nameOffset = currentOffset; currentOffset = scanNCName(data, endOffset, currentOffset); if (currentOffset == nameOffset) { return false; } if (currentOffset < endOffset) { ch = data.charAt(currentOffset); } else { ch = -1; } isQName = true; name = symbolTable.addSymbol(data.substring(nameOffset, currentOffset)); } // REVISIT: if (currentOffset != endOffset) { addToken(tokens, Tokens.XPTRTOKEN_SCHEMENAME); tokens.addToken(prefix); tokens.addToken(name); isQName = false; } else if (currentOffset == endOffset) { // NCName => Shorthand addToken(tokens, Tokens.XPTRTOKEN_SHORTHAND); tokens.addToken(name); isQName = false; } // reset open/close paren for the next pointer part closeParen = 0; break; } else if (openParen > 0 && closeParen == 0 && name != null) { // Scanning SchemeData dataOffset = currentOffset; currentOffset = scanData(data, schemeDataBuff, endOffset, currentOffset); if (currentOffset == dataOffset) { reportError("InvalidSchemeDataInXPointer", new Object[] {data}); return false; } if (currentOffset < endOffset) { ch = data.charAt(currentOffset); } else { ch = -1; } schemeData = symbolTable.addSymbol(schemeDataBuff.toString()); addToken(tokens, Tokens.XPTRTOKEN_SCHEMEDATA); tokens.addToken(schemeData); // reset open/close paren for the next pointer part openParen = 0; schemeDataBuff.delete(0, schemeDataBuff.length()); } else { // ex. schemeName() // Should we throw an exception with a more suitable message instead?? return false; } } } // end while return true; }
/** * This method adds the specified token to the token list. By default, this method allows all * tokens. However, subclasses of the XPathExprScanner can override this method in order to * disallow certain tokens from being used in the scanned XPath expression. This is a convenient * way of allowing only a subset of XPath. */ protected void addToken(Tokens tokens, int token) throws XNIException { tokens.addToken(token); } // addToken(int)
/** * {@inheritDoc} * * @see org.modeshape.common.text.TokenStream.Tokenizer#tokenize(CharacterStream, Tokens) */ @Override public void tokenize(CharacterStream input, Tokens tokens) throws ParsingException { int startIndex; int endIndex; while (input.hasNext()) { char c = input.next(); switch (c) { case ' ': case '\t': case '\n': case '\r': // Just skip these whitespace characters ... break; // ============================================================================================== // DDL Comments token = "--" // ============================================================================================== case '-': { startIndex = input.index(); Position startPosition = input.position(startIndex); if (input.isNext('-')) { // -- END OF LINE comment ... boolean foundLineTerminator = false; while (input.hasNext()) { c = input.next(); if (c == '\n' || c == '\r') { foundLineTerminator = true; break; } } endIndex = input.index(); // the token won't include the '\n' or '\r' character(s) if (!foundLineTerminator) ++endIndex; // must point beyond last char if (c == '\r' && input.isNext('\n')) input.next(); // Check for PARSER_ID if (useComments) { tokens.addToken(startPosition, startIndex, endIndex, COMMENT); } } else { // just a regular dash ... tokens.addToken(startPosition, startIndex, startIndex + 1, SYMBOL); } break; } // ============================================================================================== case '(': case ')': case '{': case '}': case '*': case ',': case ';': case '+': case '%': case '?': case '[': case ']': case '!': case '<': case '>': case '|': case '=': case ':': tokens.addToken( input.position(input.index()), input.index(), input.index() + 1, SYMBOL); break; case '.': tokens.addToken( input.position(input.index()), input.index(), input.index() + 1, DECIMAL); break; case '\"': startIndex = input.index(); Position startingPosition = input.position(startIndex); boolean foundClosingQuote = false; while (input.hasNext()) { c = input.next(); if ((c == '\\' || c == '"') && input.isNext('"')) { c = input.next(); // consume the ' character since it is escaped } else if (c == '"') { foundClosingQuote = true; break; } } if (!foundClosingQuote) { String msg = CommonI18n.noMatchingDoubleQuoteFound.text( startingPosition.getLine(), startingPosition.getColumn()); throw new ParsingException(startingPosition, msg); } endIndex = input.index() + 1; // beyond last character read tokens.addToken(startingPosition, startIndex, endIndex, DOUBLE_QUOTED_STRING); break; case '\u2019': // '’': case '\'': char quoteChar = c; startIndex = input.index(); startingPosition = input.position(startIndex); foundClosingQuote = false; while (input.hasNext()) { c = input.next(); if ((c == '\\' || c == quoteChar) && input.isNext(quoteChar)) { c = input.next(); // consume the ' character since it is escaped } else if (c == quoteChar) { foundClosingQuote = true; break; } } if (!foundClosingQuote) { String msg = CommonI18n.noMatchingSingleQuoteFound.text( startingPosition.getLine(), startingPosition.getColumn()); throw new ParsingException(startingPosition, msg); } endIndex = input.index() + 1; // beyond last character read tokens.addToken(startingPosition, startIndex, endIndex, SINGLE_QUOTED_STRING); break; case '/': startIndex = input.index(); startingPosition = input.position(startIndex); if (input.isNext('/')) { // End-of-line comment ... boolean foundLineTerminator = false; while (input.hasNext()) { c = input.next(); if (c == '\n' || c == '\r') { foundLineTerminator = true; break; } } endIndex = input.index(); // the token won't include the '\n' or '\r' character(s) if (!foundLineTerminator) ++endIndex; // must point beyond last char if (c == '\r' && input.isNext('\n')) input.next(); if (useComments) { tokens.addToken(startingPosition, startIndex, endIndex, COMMENT); } } else if (input.isNext('*')) { // Multi-line comment ... while (input.hasNext() && !input.isNext('*', '/')) { c = input.next(); } if (input.hasNext()) input.next(); // consume the '*' if (input.hasNext()) input.next(); // consume the '/' endIndex = input.index() + 1; // the token will include the '/' and '*' characters if (useComments) { tokens.addToken(startingPosition, startIndex, endIndex, COMMENT); } } else { // just a regular slash ... tokens.addToken(startingPosition, startIndex, startIndex + 1, SYMBOL); } break; default: startIndex = input.index(); Position startPosition = input.position(startIndex); // Read until another whitespace/symbol/decimal/slash is found while (input.hasNext() && !(input.isNextWhitespace() || input.isNextAnyOf("/.-(){}*,;+%?[]!<>|=:"))) { c = input.next(); } endIndex = input.index() + 1; // beyond last character that was included tokens.addToken(startPosition, startIndex, endIndex, WORD); } } }