Example #1
0
    /** Advance to the next token. */
    public void nextToken() {
      previousLine = line;
      previousColumn = column;

      // Advance the line counter to the current position.
      while (pos < matcher.regionStart()) {
        if (text.charAt(pos) == '\n') {
          ++line;
          column = 0;
        } else {
          ++column;
        }
        ++pos;
      }

      // Match the next token.
      if (matcher.regionStart() == matcher.regionEnd()) {
        // EOF
        currentToken = "";
      } else {
        matcher.usePattern(TOKEN);
        if (matcher.lookingAt()) {
          currentToken = matcher.group();
          matcher.region(matcher.end(), matcher.regionEnd());
        } else {
          // Take one character.
          currentToken = String.valueOf(text.charAt(pos));
          matcher.region(pos + 1, matcher.regionEnd());
        }

        skipWhitespace();
      }
    }
  public void test_regionStart() {
    String testPattern = "(abb)";
    String testString = "cccabbabbabbabbabb";
    Pattern pat = Pattern.compile(testPattern);
    Matcher mat = pat.matcher(testString);

    assertEquals("Region sould start from 0 position", 0, mat.regionStart());
    mat.region(1, 10);
    assertEquals(
        "Region sould start from 1 position after setting new region", 1, mat.regionStart());
    mat.reset();
    assertEquals("Region sould start from 0 position after reset", 0, mat.regionStart());
  }
Example #3
0
  /**
   * Initialize derived fields from defining fields. This is called from constructor and from
   * readObject (de-serialization)
   *
   * @param definingCalendar the {@link Calendar} instance used to initialize this FastDateParser
   */
  private void init(Calendar definingCalendar) {

    final StringBuilder regex = new StringBuilder();
    final List<Strategy> collector = new ArrayList<Strategy>();

    final Matcher patternMatcher = formatPattern.matcher(pattern);
    if (!patternMatcher.lookingAt()) {
      throw new IllegalArgumentException(
          "Illegal pattern character '" + pattern.charAt(patternMatcher.regionStart()) + "'");
    }

    currentFormatField = patternMatcher.group();
    Strategy currentStrategy = getStrategy(currentFormatField, definingCalendar);
    for (; ; ) {
      patternMatcher.region(patternMatcher.end(), patternMatcher.regionEnd());
      if (!patternMatcher.lookingAt()) {
        nextStrategy = null;
        break;
      }
      final String nextFormatField = patternMatcher.group();
      nextStrategy = getStrategy(nextFormatField, definingCalendar);
      if (currentStrategy.addRegex(this, regex)) {
        collector.add(currentStrategy);
      }
      currentFormatField = nextFormatField;
      currentStrategy = nextStrategy;
    }
    if (patternMatcher.regionStart() != patternMatcher.regionEnd()) {
      throw new IllegalArgumentException(
          "Failed to parse \"" + pattern + "\" ; gave up at index " + patternMatcher.regionStart());
    }
    if (currentStrategy.addRegex(this, regex)) {
      collector.add(currentStrategy);
    }
    currentFormatField = null;
    strategies = collector.toArray(new Strategy[collector.size()]);
    parsePattern = Pattern.compile(regex.toString());
  }
Example #4
0
 /** Returns the next token found in the given matcher, advancing the matcher */
 protected ILexToken getToken(Matcher matcher) throws ParserException {
   ILexToken token = null;
   if (matcher.lookingAt()) {
     prefixCommentText = null;
     if (matcher.groupCount() >= 1 && matcher.end(1) != matcher.start(1)) {
       prefixCommentText = matcher.group(1);
       if (prefixCommentText.startsWith("\n")) prefixCommentText = prefixCommentText.substring(1);
       else if (prefixCommentText.startsWith("\r\n"))
         prefixCommentText = prefixCommentText.substring(2);
     }
     int end = matcher.end(2);
     //			System.out.println("MATCHED RANGE " + matcher.start() + " " + matcher.end() + " !" +
     // matcher.group() + "!");
     //			for (int i=3; i<=matcher.groupCount(); i++) {
     //				if (matcher.group(i) != null) {
     //					System.out.println("MATCHED " + i + " RANGE " + matcher.start(i) + " " + matcher.end(i)
     // + " !" + matcher.group(i) + "!" + (int)matcher.group(i).charAt(0));
     //				}
     //			}
     int k;
     IPos pos;
     String matched = null;
     if ((matched = matcher.group(k = 3)) != null) {
       token = this.LP(matcher.start(k));
     } else if ((matched = matcher.group(k = 4)) != null) {
       token = this.RP(matcher.start(k));
     } else if ((matched = matcher.group(k = 5)) != null) { // numeral
       pos = pos(matcher.start(k), matcher.end(k));
       // token = factory.numeral(matched,pos);
       token = setPos(new LexNumeral(new BigInteger(matched)), pos);
       end = matcher.end(k);
     } else if ((matched = matcher.group(k = 6)) != null) { // simple symbol
       pos = pos(matcher.start(k), matcher.end(k));
       // token = factory.symbol(matched,pos);
       token = setPos(new LexSymbol(matched), pos);
     } else if ((matched = matcher.group(k = 8)) != null) { // bar-quoted symbol
       pos = pos(matcher.start(k), matcher.end(k));
       // token = factory.symbol(matched,pos);
       token = setPos(new LexSymbol(matched), pos);
     } else if ((matched = matcher.group(k = 7)) != null) { // string
       // The match is just to the initial quote
       int begin = matcher.start(k); // position of the initial quote
       int p = begin;
       try {
         if (smtConfig.isVersion(SMT.Configuration.SMTLIB.V25)) { // Version 2.5ff
           while (true) {
             p++;
             int c = csr.charAt(p);
             if (c == '"') {
               if (p + 1 < csr.length() && csr.charAt(p + 1) == '"') {
                 p++;
               } else {
                 end = p + 1;
                 matched = csr.subSequence(begin, end).toString();
                 pos = pos(begin, end);
                 token = setPos(new LexStringLiteral(matched, true), pos);
                 break;
               }
             } else {
               if (c >= ' ' && c <= '~') continue;
               if (c == '\t' || c == '\r' || c == '\n') continue;
               if (c == 25) {
                 end = p;
                 matched = csr.subSequence(begin, end).toString();
                 pos = pos(begin, end);
                 smtConfig.log.logError(
                     smtConfig.responseFactory.error(
                         "String literal is not terminated: " + matched, pos));
                 token = setPos(new LexError(matched), pos);
                 break; // End of data - no closing right paren
               }
               smtConfig.log.logError(
                   smtConfig.responseFactory.error(
                       "Invalid character: ASCII(decimal) = " + (int) c, pos(p, p + 1)));
               continue;
             }
           }
         } else if (SMT.Configuration.SMTLIB
             .V20
             .toString()
             .equals(smtConfig.smtlib)) { // Version 2.0
           while (true) {
             p++;
             int c = csr.charAt(p);
             if (c == '\\') {
               c = csr.charAt(++p);
               // \\ is translated to \ and \" to "
               // \x for anything else is just \x
               //								if (c == '\\' || c == '"') {
               //									continue;
               //								} else {
               //									smtConfig.log.logError(smtConfig.responseFactory.error("Invalid escape
               // sequence " + (char)c + " (decimal ASCII = " + (int)c + ")",
               //											pos(p,p+1)));
               //								}
             } else if (c == '"') {
               end = p + 1;
               matched = csr.subSequence(begin, end).toString();
               pos = pos(begin, end);
               token = setPos(new LexStringLiteral(matched, true), pos);
               break;
             } else {
               if (c >= ' ' && c <= '~') continue;
               if (c == '\t' || c == '\r' || c == '\n') continue;
               if (c == 25) {
                 end = p;
                 matched = csr.subSequence(begin, end).toString();
                 pos = pos(begin, end);
                 smtConfig.log.logError(
                     smtConfig.responseFactory.error(
                         "String literal is not terminated: " + matched, pos));
                 token = setPos(new LexError(matched), pos);
                 break; // End of data - no closing right paren
               }
               smtConfig.log.logError(
                   smtConfig.responseFactory.error(
                       "Invalid character: ASCII(decimal) = " + (int) c, pos(p, p + 1)));
               continue;
             }
           }
         }
       } catch (IndexOutOfBoundsException e) {
         // If the CharSequence does not expand itself and does not terminate
         // itself with an end of data character, and does not end with a
         // quote character, we get this exception
         end = p;
         matched = csr.subSequence(begin, end).toString();
         pos = pos(begin, end);
         token = setPos(new LexError(matched), pos);
         smtConfig.log.logError(
             smtConfig.responseFactory.error(
                 "String literal is not terminated: " + matched, token.pos()));
       }
     } else if ((matched = matcher.group(k = 9)) != null) { // colon-initiated keyword
       pos = pos(matcher.start(k), matcher.end(k));
       // token = factory.keyword(matched,pos);
       token = setPos(new LexKeyword(matched), pos);
     } else if ((matched = matcher.group(k = 10)) != null) { // decimal
       pos = pos(matcher.start(k), matcher.end(k));
       // token = factory.decimal(matched,pos);   // FIXME - use a factory everywhere?
       token = setPos(new LexDecimal(new BigDecimal(matched)), pos);
       end = matcher.end(k);
     } else if ((matched = matcher.group(k = 11)) != null) {
       pos = pos(matcher.start(k), matcher.end(k));
       token = setPos(new LexBinaryLiteral(matcher.group(k + 1)), pos);
       end = matcher.end(k);
     } else if ((matched = matcher.group(k = 13)) != null) {
       pos = pos(matcher.start(k), matcher.end(k));
       token = setPos(new LexHexLiteral(matcher.group(k + 1)), pos);
       end = matcher.end(k);
     } else if ((matched = matcher.group(k = 15)) != null) {
       pos = pos(matcher.start(k), matcher.end(k));
       token = this.EOD(matcher.start(k));
     } else if ((matched = matcher.group(k = 16)) != null) {
       pos = pos(matcher.start(k), matcher.end(k));
       // token = factory.error(matched,pos);
       token = setPos(new LexError("Bar(|)-enclosed symbol is not terminated: " + matched), pos);
       smtConfig.log.logError(
           smtConfig.responseFactory.error(
               "Bar(|)-enclosed symbol is not terminated: " + matched, token.pos()));
       //				matcher.region(end,csr.length());
       //				throw new SyntaxException("Invalid token: " + matched,token.pos());
     } else if ((matched = matcher.group(k = 17)) != null) {
       pos = pos(matcher.start(k), matcher.end(k));
       // token = factory.error(matched,pos);
       String msg = "Incorrect format for a number - no leading zeros allowed: ";
       token = setPos(new LexError(msg + matched), pos);
       smtConfig.log.logError(smtConfig.responseFactory.error(msg + matched, token.pos()));
       end = matcher.end(k);
       //				matcher.region(end,csr.length());
       //				throw new SyntaxException("Leading zeros are not allowed: " + matched,token.pos());
     } else if ((matched = matcher.group(k = 18)) != null) {
       // This case no longer matches since we made a special case of string matching.
       pos = pos(matcher.start(k), matcher.end(k));
       // token = factory.error(matched,pos);
       token = setPos(new LexError(matched), pos);
       // smtConfig.log.logError(smtConfig.responseFactory.error("Invalid string: " + matched));
       matcher.region(end, csr.length());
       // FIXME - decide whether to throw exceptions or emit error messages and error tokens
       throw new SyntaxException(("Invalid string: " + matched), token.pos());
     } else if ((matched = matcher.group(k = 19)) != null) {
       // System.out.println("Killed");
       matcher.region(end, csr.length());
       throw new AbortParseException();
     } else if ((matched = matcher.group(k = 20)) != null) {
       pos = pos(matcher.start(k), matcher.end(k));
       // token = factory.error(matched,pos);
       if (matched.charAt(0) < ' ')
         matched = "(ASCII char " + (int) matched.charAt(0) + " (decimal))";
       token = setPos(new LexError("Invalid token: " + matched), pos);
       smtConfig.log.logError(smtConfig.responseFactory.error("Invalid token: " + matched, pos));
       //				matcher.region(end,csr.length());
       //				throw new SyntaxException("Invalid token: " + matched,token.pos());
       // SMT.out.println(smtConfig.responseFactory.error("Invalid token: " + matched));
     } else if ((matched = matcher.group(k = 21)) != null) {
       // FIXME - This should never happen either - it is a stopgap hack, because
       // with whitespace at the very beginning of a file, the whitespace detector is not finding
       // it
       matcher.region(end, csr.length());
       return getToken();
     } else {
       // Nothing matched - this should not have happened.
       // lookingAt should not have returned true if no group matched
       // Check that all alternatives are represented in the cases above
       int b = matcher.regionStart();
       int e = matcher.regionEnd();
       String s = csr.subSequence(b, e > b + 100 ? b + 100 : e).toString();
       if (matcher.group(1) != null) end = matcher.end(1);
       else end = matcher.end();
       matcher.region(end > b ? end : b + 1, csr.length());
       // String group = matcher.group();
       throw new SMT.InternalException(
           "Failed to report which regular expression matched: " + " " + b + " " + e + " " + s);
     }
     if (csr != null) matcher.region(end, csr.length());
   } else {
     // FIXME - there is a problem if we have spaces at the very beginning of a file, prior to the
     // LP
     // the matcher does not match???
     int b = matcher.regionStart();
     int e = matcher.regionEnd();
     matcher.region(b + 1, e);
     return getToken();
     // Nothing matched - this should not have happened.
     // There is an error in the regular expression, since it is not even
     // reporting an error token.
     //			int n = matcher.groupCount();
     //			String gr = matcher.group(2);
     //			gr = matcher.group(1);
     //			gr = matcher.group(0);
     //			String s = csr.subSequence(b,e>b+100?b+100:e).toString();
     //			throw new SMT.InternalException("Failed to report any match: something is wrong with the
     // regular expression used for parsing "
     //					+ matcher.regionStart() + " " + matcher.regionEnd() + " " + s);
   }
   return token;
 }
Example #5
0
 /** Returns the position of the next character to be read */
 public int currentPos() {
   return matcher.regionStart();
 }