SyntaxTree _parse(char[] expression) throws MalformedPatternException { SyntaxTree tree; __openParen = __closeParen = 0; __regularExpression = expression; __bytesRead = 0; __expressionLength = expression.length; __inCharacterClass = false; __position = 0; __match(__lookahead); // Call match to read first input. if (__lookahead == '^') { __beginAnchor = true; __match(__lookahead); } if (__expressionLength > 0 && expression[__expressionLength - 1] == '$') { --__expressionLength; __endAnchor = true; } if (__expressionLength > 1 || (__expressionLength == 1 && !__beginAnchor)) { CatNode root; root = new CatNode(); root._left = __regex(); // end marker root._right = new TokenNode((char) LeafNode._END_MARKER_TOKEN, __position++); tree = new SyntaxTree(root, __position); } else tree = new SyntaxTree(new TokenNode((char) LeafNode._END_MARKER_TOKEN, 0), 1); tree._computeFollowPositions(); return tree; }
private SyntaxNode __branch() throws MalformedPatternException { CatNode current; SyntaxNode left, root; left = __piece(); if (__lookahead == ')') { if (__openParen > __closeParen) return left; else throw new MalformedPatternException( "Parse error: close parenthesis" + " without matching open parenthesis at position " + __bytesRead); } else if (__lookahead == '|' || __lookahead == _END_OF_INPUT) return left; root = current = new CatNode(); current._left = left; while (true) { left = __piece(); if (__lookahead == ')') { if (__openParen > __closeParen) { current._right = left; break; } else throw new MalformedPatternException( "Parse error: close parenthesis" + " without matching open parenthesis at position " + __bytesRead); } else if (__lookahead == '|' || __lookahead == _END_OF_INPUT) { current._right = left; break; } current._right = new CatNode(); current = (CatNode) current._right; current._left = left; } return root; }
private SyntaxNode __repetition(SyntaxNode atom) throws MalformedPatternException { int min, max, startPosition[]; SyntaxNode root = null; CatNode catNode; __match('{'); min = __parseUnsignedInteger(10, 1, Integer.MAX_VALUE); startPosition = new int[1]; startPosition[0] = __position; if (__lookahead == '}') { // Match exactly min times. Concatenate the atom min times. __match('}'); if (min == 0) throw new MalformedPatternException( "Parse error: Superfluous interval specified at position " + __bytesRead + ". Number of occurences was set to zero."); if (min == 1) return atom; root = catNode = new CatNode(); catNode._left = atom; while (--min > 1) { atom = atom._clone(startPosition); catNode._right = new CatNode(); catNode = (CatNode) catNode._right; catNode._left = atom; } catNode._right = atom._clone(startPosition); } else if (__lookahead == ',') { __match(','); if (__lookahead == '}') { // match at least min times __match('}'); if (min == 0) return new StarNode(atom); if (min == 1) return new PlusNode(atom); root = catNode = new CatNode(); catNode._left = atom; while (--min > 0) { atom = atom._clone(startPosition); catNode._right = new CatNode(); catNode = (CatNode) catNode._right; catNode._left = atom; } catNode._right = new StarNode(atom._clone(startPosition)); } else { // match at least min times and at most max times max = __parseUnsignedInteger(10, 1, Integer.MAX_VALUE); __match('}'); if (max < min) throw new MalformedPatternException( "Parse error: invalid interval; " + max + " is less than " + min + " at position " + __bytesRead); if (max == 0) throw new MalformedPatternException( "Parse error: Superfluous interval specified at position " + __bytesRead + ". Number of occurences was set to zero."); if (min == 0) { if (max == 1) return new QuestionNode(atom); root = catNode = new CatNode(); atom = new QuestionNode(atom); catNode._left = atom; while (--max > 1) { atom = atom._clone(startPosition); catNode._right = new CatNode(); catNode = (CatNode) catNode._right; catNode._left = atom; } catNode._right = atom._clone(startPosition); } else if (min == max) { if (min == 1) return atom; root = catNode = new CatNode(); catNode._left = atom; while (--min > 1) { atom = atom._clone(startPosition); catNode._right = new CatNode(); catNode = (CatNode) catNode._right; catNode._left = atom; } catNode._right = atom._clone(startPosition); } else { int count; root = catNode = new CatNode(); catNode._left = atom; for (count = 1; count < min; count++) { atom = atom._clone(startPosition); catNode._right = new CatNode(); catNode = (CatNode) catNode._right; catNode._left = atom; } atom = new QuestionNode(atom._clone(startPosition)); count = max - min; if (count == 1) catNode._right = atom; else { catNode._right = new CatNode(); catNode = (CatNode) catNode._right; catNode._left = atom; while (--count > 1) { atom = atom._clone(startPosition); catNode._right = new CatNode(); catNode = (CatNode) catNode._right; catNode._left = atom; } catNode._right = atom._clone(startPosition); } } } } else throw new MalformedPatternException( "Parse error: unexpected character " + __lookahead + " in interval at position " + __bytesRead); __position = startPosition[0]; return root; }