Example #1
0
  SyntaxTree _parse(char[] expression) throws MalformedPatternException {
    SyntaxTree tree;

    __openParen = __closeParen = 0;
    __regularExpression = expression;
    __bytesRead = 0;
    __expressionLength = expression.length;
    __inCharacterClass = false;

    __position = 0;
    __match(__lookahead); // Call match to read first input.

    if (__lookahead == '^') {
      __beginAnchor = true;
      __match(__lookahead);
    }

    if (__expressionLength > 0 && expression[__expressionLength - 1] == '$') {
      --__expressionLength;
      __endAnchor = true;
    }

    if (__expressionLength > 1 || (__expressionLength == 1 && !__beginAnchor)) {
      CatNode root;
      root = new CatNode();
      root._left = __regex();
      // end marker
      root._right = new TokenNode((char) LeafNode._END_MARKER_TOKEN, __position++);
      tree = new SyntaxTree(root, __position);
    } else tree = new SyntaxTree(new TokenNode((char) LeafNode._END_MARKER_TOKEN, 0), 1);

    tree._computeFollowPositions();

    return tree;
  }
Example #2
0
  private SyntaxNode __branch() throws MalformedPatternException {
    CatNode current;
    SyntaxNode left, root;

    left = __piece();

    if (__lookahead == ')') {
      if (__openParen > __closeParen) return left;
      else
        throw new MalformedPatternException(
            "Parse error: close parenthesis"
                + " without matching open parenthesis at position "
                + __bytesRead);
    } else if (__lookahead == '|' || __lookahead == _END_OF_INPUT) return left;

    root = current = new CatNode();
    current._left = left;

    while (true) {
      left = __piece();

      if (__lookahead == ')') {
        if (__openParen > __closeParen) {
          current._right = left;
          break;
        } else
          throw new MalformedPatternException(
              "Parse error: close parenthesis"
                  + " without matching open parenthesis at position "
                  + __bytesRead);
      } else if (__lookahead == '|' || __lookahead == _END_OF_INPUT) {
        current._right = left;
        break;
      }

      current._right = new CatNode();
      current = (CatNode) current._right;
      current._left = left;
    }

    return root;
  }
Example #3
0
  private SyntaxNode __repetition(SyntaxNode atom) throws MalformedPatternException {
    int min, max, startPosition[];
    SyntaxNode root = null;
    CatNode catNode;

    __match('{');

    min = __parseUnsignedInteger(10, 1, Integer.MAX_VALUE);
    startPosition = new int[1];
    startPosition[0] = __position;

    if (__lookahead == '}') {
      // Match exactly min times.  Concatenate the atom min times.
      __match('}');

      if (min == 0)
        throw new MalformedPatternException(
            "Parse error: Superfluous interval specified at position "
                + __bytesRead
                + ".  Number of occurences was set to zero.");

      if (min == 1) return atom;

      root = catNode = new CatNode();
      catNode._left = atom;

      while (--min > 1) {
        atom = atom._clone(startPosition);

        catNode._right = new CatNode();
        catNode = (CatNode) catNode._right;
        catNode._left = atom;
      }

      catNode._right = atom._clone(startPosition);
    } else if (__lookahead == ',') {
      __match(',');

      if (__lookahead == '}') {
        // match at least min times
        __match('}');

        if (min == 0) return new StarNode(atom);

        if (min == 1) return new PlusNode(atom);

        root = catNode = new CatNode();
        catNode._left = atom;

        while (--min > 0) {
          atom = atom._clone(startPosition);

          catNode._right = new CatNode();
          catNode = (CatNode) catNode._right;
          catNode._left = atom;
        }

        catNode._right = new StarNode(atom._clone(startPosition));
      } else {
        // match at least min times and at most max times
        max = __parseUnsignedInteger(10, 1, Integer.MAX_VALUE);
        __match('}');

        if (max < min)
          throw new MalformedPatternException(
              "Parse error: invalid interval; "
                  + max
                  + " is less than "
                  + min
                  + " at position "
                  + __bytesRead);
        if (max == 0)
          throw new MalformedPatternException(
              "Parse error: Superfluous interval specified at position "
                  + __bytesRead
                  + ".  Number of occurences was set to zero.");

        if (min == 0) {
          if (max == 1) return new QuestionNode(atom);

          root = catNode = new CatNode();
          atom = new QuestionNode(atom);
          catNode._left = atom;

          while (--max > 1) {
            atom = atom._clone(startPosition);

            catNode._right = new CatNode();
            catNode = (CatNode) catNode._right;
            catNode._left = atom;
          }

          catNode._right = atom._clone(startPosition);
        } else if (min == max) {
          if (min == 1) return atom;

          root = catNode = new CatNode();
          catNode._left = atom;

          while (--min > 1) {
            atom = atom._clone(startPosition);

            catNode._right = new CatNode();
            catNode = (CatNode) catNode._right;
            catNode._left = atom;
          }

          catNode._right = atom._clone(startPosition);
        } else {
          int count;

          root = catNode = new CatNode();
          catNode._left = atom;

          for (count = 1; count < min; count++) {
            atom = atom._clone(startPosition);

            catNode._right = new CatNode();
            catNode = (CatNode) catNode._right;
            catNode._left = atom;
          }

          atom = new QuestionNode(atom._clone(startPosition));

          count = max - min;

          if (count == 1) catNode._right = atom;
          else {
            catNode._right = new CatNode();
            catNode = (CatNode) catNode._right;
            catNode._left = atom;

            while (--count > 1) {
              atom = atom._clone(startPosition);

              catNode._right = new CatNode();
              catNode = (CatNode) catNode._right;
              catNode._left = atom;
            }

            catNode._right = atom._clone(startPosition);
          }
        }
      }
    } else
      throw new MalformedPatternException(
          "Parse error: unexpected character "
              + __lookahead
              + " in interval at position "
              + __bytesRead);
    __position = startPosition[0];
    return root;
  }