public CommentsCollection parse(final InputStream in, final String charsetName) throws IOException, UnsupportedEncodingException { boolean lastWasASlashR = false; BufferedReader br = new BufferedReader(new InputStreamReader(in, charsetName)); CommentsCollection comments = new CommentsCollection(); int r; Deque prevTwoChars = new LinkedList<Character>(Arrays.asList('z', 'z')); State state = State.CODE; LineComment currentLineComment = null; BlockComment currentBlockComment = null; StringBuffer currentContent = null; int currLine = 1; int currCol = 1; while ((r = br.read()) != -1) { char c = (char) r; if (c == '\r') { lastWasASlashR = true; } else if (c == '\n' && lastWasASlashR) { lastWasASlashR = false; continue; } else { lastWasASlashR = false; } switch (state) { case CODE: if (prevTwoChars.peekLast().equals('/') && c == '/') { currentLineComment = new LineComment(); currentLineComment.setBeginLine(currLine); currentLineComment.setBeginColumn(currCol - 1); state = State.IN_LINE_COMMENT; currentContent = new StringBuffer(); } else if (prevTwoChars.peekLast().equals('/') && c == '*') { currentBlockComment = new BlockComment(); currentBlockComment.setBeginLine(currLine); currentBlockComment.setBeginColumn(currCol - 1); state = State.IN_BLOCK_COMMENT; currentContent = new StringBuffer(); } else if (c == '"') { state = State.IN_STRING; } else { // nothing to do } break; case IN_LINE_COMMENT: if (c == '\n' || c == '\r') { currentLineComment.setContent(currentContent.toString()); currentLineComment.setEndLine(currLine); currentLineComment.setEndColumn(currCol); comments.addComment(currentLineComment); state = State.CODE; } else { currentContent.append(c); } break; case IN_BLOCK_COMMENT: if (prevTwoChars.peekLast().equals('*') && c == '/' && !prevTwoChars.peekFirst().equals('/')) { // delete last character String content = currentContent.deleteCharAt(currentContent.toString().length() - 1).toString(); if (content.startsWith("*")) { JavadocComment javadocComment = new JavadocComment(); javadocComment.setContent(content.substring(1)); javadocComment.setBeginLine(currentBlockComment.getBeginLine()); javadocComment.setBeginColumn(currentBlockComment.getBeginColumn()); javadocComment.setEndLine(currLine); javadocComment.setEndColumn(currCol + 1); comments.addComment(javadocComment); } else { currentBlockComment.setContent(content); currentBlockComment.setEndLine(currLine); currentBlockComment.setEndColumn(currCol + 1); comments.addComment(currentBlockComment); } state = State.CODE; } else { currentContent.append(c == '\r' ? '\n' : c); } break; case IN_STRING: if (!prevTwoChars.peekLast().equals('\\') && c == '"') { state = State.CODE; } break; default: throw new RuntimeException("Unexpected"); } switch (c) { case '\n': case '\r': currLine += 1; currCol = 1; break; case '\t': currCol += COLUMNS_PER_TAB; break; default: currCol += 1; } prevTwoChars.remove(); prevTwoChars.add(c); } if (state == State.IN_LINE_COMMENT) { currentLineComment.setContent(currentContent.toString()); currentLineComment.setEndLine(currLine); currentLineComment.setEndColumn(currCol); comments.addComment(currentLineComment); } return comments; }