/** Advance to the next token. */ public void nextToken() { previousLine = line; previousColumn = column; // Advance the line counter to the current position. while (pos < matcher.regionStart()) { if (text.charAt(pos) == '\n') { ++line; column = 0; } else { ++column; } ++pos; } // Match the next token. if (matcher.regionStart() == matcher.regionEnd()) { // EOF currentToken = ""; } else { matcher.usePattern(TOKEN); if (matcher.lookingAt()) { currentToken = matcher.group(); matcher.region(matcher.end(), matcher.regionEnd()); } else { // Take one character. currentToken = String.valueOf(text.charAt(pos)); matcher.region(pos + 1, matcher.regionEnd()); } skipWhitespace(); } }
public void test_regionStart() { String testPattern = "(abb)"; String testString = "cccabbabbabbabbabb"; Pattern pat = Pattern.compile(testPattern); Matcher mat = pat.matcher(testString); assertEquals("Region sould start from 0 position", 0, mat.regionStart()); mat.region(1, 10); assertEquals( "Region sould start from 1 position after setting new region", 1, mat.regionStart()); mat.reset(); assertEquals("Region sould start from 0 position after reset", 0, mat.regionStart()); }
/** * Initialize derived fields from defining fields. This is called from constructor and from * readObject (de-serialization) * * @param definingCalendar the {@link Calendar} instance used to initialize this FastDateParser */ private void init(Calendar definingCalendar) { final StringBuilder regex = new StringBuilder(); final List<Strategy> collector = new ArrayList<Strategy>(); final Matcher patternMatcher = formatPattern.matcher(pattern); if (!patternMatcher.lookingAt()) { throw new IllegalArgumentException( "Illegal pattern character '" + pattern.charAt(patternMatcher.regionStart()) + "'"); } currentFormatField = patternMatcher.group(); Strategy currentStrategy = getStrategy(currentFormatField, definingCalendar); for (; ; ) { patternMatcher.region(patternMatcher.end(), patternMatcher.regionEnd()); if (!patternMatcher.lookingAt()) { nextStrategy = null; break; } final String nextFormatField = patternMatcher.group(); nextStrategy = getStrategy(nextFormatField, definingCalendar); if (currentStrategy.addRegex(this, regex)) { collector.add(currentStrategy); } currentFormatField = nextFormatField; currentStrategy = nextStrategy; } if (patternMatcher.regionStart() != patternMatcher.regionEnd()) { throw new IllegalArgumentException( "Failed to parse \"" + pattern + "\" ; gave up at index " + patternMatcher.regionStart()); } if (currentStrategy.addRegex(this, regex)) { collector.add(currentStrategy); } currentFormatField = null; strategies = collector.toArray(new Strategy[collector.size()]); parsePattern = Pattern.compile(regex.toString()); }
/** Returns the next token found in the given matcher, advancing the matcher */ protected ILexToken getToken(Matcher matcher) throws ParserException { ILexToken token = null; if (matcher.lookingAt()) { prefixCommentText = null; if (matcher.groupCount() >= 1 && matcher.end(1) != matcher.start(1)) { prefixCommentText = matcher.group(1); if (prefixCommentText.startsWith("\n")) prefixCommentText = prefixCommentText.substring(1); else if (prefixCommentText.startsWith("\r\n")) prefixCommentText = prefixCommentText.substring(2); } int end = matcher.end(2); // System.out.println("MATCHED RANGE " + matcher.start() + " " + matcher.end() + " !" + // matcher.group() + "!"); // for (int i=3; i<=matcher.groupCount(); i++) { // if (matcher.group(i) != null) { // System.out.println("MATCHED " + i + " RANGE " + matcher.start(i) + " " + matcher.end(i) // + " !" + matcher.group(i) + "!" + (int)matcher.group(i).charAt(0)); // } // } int k; IPos pos; String matched = null; if ((matched = matcher.group(k = 3)) != null) { token = this.LP(matcher.start(k)); } else if ((matched = matcher.group(k = 4)) != null) { token = this.RP(matcher.start(k)); } else if ((matched = matcher.group(k = 5)) != null) { // numeral pos = pos(matcher.start(k), matcher.end(k)); // token = factory.numeral(matched,pos); token = setPos(new LexNumeral(new BigInteger(matched)), pos); end = matcher.end(k); } else if ((matched = matcher.group(k = 6)) != null) { // simple symbol pos = pos(matcher.start(k), matcher.end(k)); // token = factory.symbol(matched,pos); token = setPos(new LexSymbol(matched), pos); } else if ((matched = matcher.group(k = 8)) != null) { // bar-quoted symbol pos = pos(matcher.start(k), matcher.end(k)); // token = factory.symbol(matched,pos); token = setPos(new LexSymbol(matched), pos); } else if ((matched = matcher.group(k = 7)) != null) { // string // The match is just to the initial quote int begin = matcher.start(k); // position of the initial quote int p = begin; try { if (smtConfig.isVersion(SMT.Configuration.SMTLIB.V25)) { // Version 2.5ff while (true) { p++; int c = csr.charAt(p); if (c == '"') { if (p + 1 < csr.length() && csr.charAt(p + 1) == '"') { p++; } else { end = p + 1; matched = csr.subSequence(begin, end).toString(); pos = pos(begin, end); token = setPos(new LexStringLiteral(matched, true), pos); break; } } else { if (c >= ' ' && c <= '~') continue; if (c == '\t' || c == '\r' || c == '\n') continue; if (c == 25) { end = p; matched = csr.subSequence(begin, end).toString(); pos = pos(begin, end); smtConfig.log.logError( smtConfig.responseFactory.error( "String literal is not terminated: " + matched, pos)); token = setPos(new LexError(matched), pos); break; // End of data - no closing right paren } smtConfig.log.logError( smtConfig.responseFactory.error( "Invalid character: ASCII(decimal) = " + (int) c, pos(p, p + 1))); continue; } } } else if (SMT.Configuration.SMTLIB .V20 .toString() .equals(smtConfig.smtlib)) { // Version 2.0 while (true) { p++; int c = csr.charAt(p); if (c == '\\') { c = csr.charAt(++p); // \\ is translated to \ and \" to " // \x for anything else is just \x // if (c == '\\' || c == '"') { // continue; // } else { // smtConfig.log.logError(smtConfig.responseFactory.error("Invalid escape // sequence " + (char)c + " (decimal ASCII = " + (int)c + ")", // pos(p,p+1))); // } } else if (c == '"') { end = p + 1; matched = csr.subSequence(begin, end).toString(); pos = pos(begin, end); token = setPos(new LexStringLiteral(matched, true), pos); break; } else { if (c >= ' ' && c <= '~') continue; if (c == '\t' || c == '\r' || c == '\n') continue; if (c == 25) { end = p; matched = csr.subSequence(begin, end).toString(); pos = pos(begin, end); smtConfig.log.logError( smtConfig.responseFactory.error( "String literal is not terminated: " + matched, pos)); token = setPos(new LexError(matched), pos); break; // End of data - no closing right paren } smtConfig.log.logError( smtConfig.responseFactory.error( "Invalid character: ASCII(decimal) = " + (int) c, pos(p, p + 1))); continue; } } } } catch (IndexOutOfBoundsException e) { // If the CharSequence does not expand itself and does not terminate // itself with an end of data character, and does not end with a // quote character, we get this exception end = p; matched = csr.subSequence(begin, end).toString(); pos = pos(begin, end); token = setPos(new LexError(matched), pos); smtConfig.log.logError( smtConfig.responseFactory.error( "String literal is not terminated: " + matched, token.pos())); } } else if ((matched = matcher.group(k = 9)) != null) { // colon-initiated keyword pos = pos(matcher.start(k), matcher.end(k)); // token = factory.keyword(matched,pos); token = setPos(new LexKeyword(matched), pos); } else if ((matched = matcher.group(k = 10)) != null) { // decimal pos = pos(matcher.start(k), matcher.end(k)); // token = factory.decimal(matched,pos); // FIXME - use a factory everywhere? token = setPos(new LexDecimal(new BigDecimal(matched)), pos); end = matcher.end(k); } else if ((matched = matcher.group(k = 11)) != null) { pos = pos(matcher.start(k), matcher.end(k)); token = setPos(new LexBinaryLiteral(matcher.group(k + 1)), pos); end = matcher.end(k); } else if ((matched = matcher.group(k = 13)) != null) { pos = pos(matcher.start(k), matcher.end(k)); token = setPos(new LexHexLiteral(matcher.group(k + 1)), pos); end = matcher.end(k); } else if ((matched = matcher.group(k = 15)) != null) { pos = pos(matcher.start(k), matcher.end(k)); token = this.EOD(matcher.start(k)); } else if ((matched = matcher.group(k = 16)) != null) { pos = pos(matcher.start(k), matcher.end(k)); // token = factory.error(matched,pos); token = setPos(new LexError("Bar(|)-enclosed symbol is not terminated: " + matched), pos); smtConfig.log.logError( smtConfig.responseFactory.error( "Bar(|)-enclosed symbol is not terminated: " + matched, token.pos())); // matcher.region(end,csr.length()); // throw new SyntaxException("Invalid token: " + matched,token.pos()); } else if ((matched = matcher.group(k = 17)) != null) { pos = pos(matcher.start(k), matcher.end(k)); // token = factory.error(matched,pos); String msg = "Incorrect format for a number - no leading zeros allowed: "; token = setPos(new LexError(msg + matched), pos); smtConfig.log.logError(smtConfig.responseFactory.error(msg + matched, token.pos())); end = matcher.end(k); // matcher.region(end,csr.length()); // throw new SyntaxException("Leading zeros are not allowed: " + matched,token.pos()); } else if ((matched = matcher.group(k = 18)) != null) { // This case no longer matches since we made a special case of string matching. pos = pos(matcher.start(k), matcher.end(k)); // token = factory.error(matched,pos); token = setPos(new LexError(matched), pos); // smtConfig.log.logError(smtConfig.responseFactory.error("Invalid string: " + matched)); matcher.region(end, csr.length()); // FIXME - decide whether to throw exceptions or emit error messages and error tokens throw new SyntaxException(("Invalid string: " + matched), token.pos()); } else if ((matched = matcher.group(k = 19)) != null) { // System.out.println("Killed"); matcher.region(end, csr.length()); throw new AbortParseException(); } else if ((matched = matcher.group(k = 20)) != null) { pos = pos(matcher.start(k), matcher.end(k)); // token = factory.error(matched,pos); if (matched.charAt(0) < ' ') matched = "(ASCII char " + (int) matched.charAt(0) + " (decimal))"; token = setPos(new LexError("Invalid token: " + matched), pos); smtConfig.log.logError(smtConfig.responseFactory.error("Invalid token: " + matched, pos)); // matcher.region(end,csr.length()); // throw new SyntaxException("Invalid token: " + matched,token.pos()); // SMT.out.println(smtConfig.responseFactory.error("Invalid token: " + matched)); } else if ((matched = matcher.group(k = 21)) != null) { // FIXME - This should never happen either - it is a stopgap hack, because // with whitespace at the very beginning of a file, the whitespace detector is not finding // it matcher.region(end, csr.length()); return getToken(); } else { // Nothing matched - this should not have happened. // lookingAt should not have returned true if no group matched // Check that all alternatives are represented in the cases above int b = matcher.regionStart(); int e = matcher.regionEnd(); String s = csr.subSequence(b, e > b + 100 ? b + 100 : e).toString(); if (matcher.group(1) != null) end = matcher.end(1); else end = matcher.end(); matcher.region(end > b ? end : b + 1, csr.length()); // String group = matcher.group(); throw new SMT.InternalException( "Failed to report which regular expression matched: " + " " + b + " " + e + " " + s); } if (csr != null) matcher.region(end, csr.length()); } else { // FIXME - there is a problem if we have spaces at the very beginning of a file, prior to the // LP // the matcher does not match??? int b = matcher.regionStart(); int e = matcher.regionEnd(); matcher.region(b + 1, e); return getToken(); // Nothing matched - this should not have happened. // There is an error in the regular expression, since it is not even // reporting an error token. // int n = matcher.groupCount(); // String gr = matcher.group(2); // gr = matcher.group(1); // gr = matcher.group(0); // String s = csr.subSequence(b,e>b+100?b+100:e).toString(); // throw new SMT.InternalException("Failed to report any match: something is wrong with the // regular expression used for parsing " // + matcher.regionStart() + " " + matcher.regionEnd() + " " + s); } return token; }
/** Returns the position of the next character to be read */ public int currentPos() { return matcher.regionStart(); }