public void processWords(final CharSequence fileText, final Processor<WordOccurrence> processor) { myLexer.start(fileText, 0, fileText.length(), 0); WordOccurrence occurrence = null; // shared occurrence while (myLexer.getTokenType() != null) { final IElementType type = myLexer.getTokenType(); if (type == FanTokenTypes.IDENTIFIER || FanTokenTypes.FAN_SYS_TYPE == type) { if (occurrence == null) { occurrence = new WordOccurrence( fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.CODE); } else { occurrence.init( fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.CODE); } if (!processor.process(occurrence)) { return; } } else if (FanTokenTypes.COMMENTS.contains(type)) { if (!stripWords( processor, fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.COMMENTS, occurrence)) { return; } } else if (FanTokenTypes.STRING_LITERALS.contains(type)) { if (!stripWords( processor, fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.LITERALS, occurrence)) { return; } if (type == FanTokenTypes.STRING_LITERAL) { if (!stripWords( processor, fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.CODE, occurrence)) { return; } } } myLexer.advance(); } }
public String transform(String testName, String[] data) throws Exception { String fileText = data[0]; Lexer lexer = new ScalaLexer(); lexer.start(fileText); StringBuilder buffer = new StringBuilder(); IElementType type; while ((type = lexer.getTokenType()) != null) { CharSequence s = lexer.getBufferSequence(); s = s.subSequence(lexer.getTokenStart(), lexer.getTokenEnd()); buffer.append(type.toString()).append(" {").append(s).append("}"); lexer.advance(); if (lexer.getTokenType() != null) { buffer.append("\n"); } } Console.println("------------------------ " + testName + " ------------------------"); Console.println(buffer.toString()); Console.println(""); return buffer.toString(); }
private void doSetText(final CharSequence text) { final TokenProcessor processor = createTokenProcessor(0); myLexer.start(text, 0, text.length(), myInitialState); mySegments.removeAll(); int i = 0; while (true) { final IElementType tokenType = myLexer.getTokenType(); if (tokenType == null) break; int data = packData(tokenType, myLexer.getState()); processor.addToken(i, myLexer.getTokenStart(), myLexer.getTokenEnd(), data, tokenType); i++; myLexer.advance(); } processor.finish(); if (myEditor != null && !ApplicationManager.getApplication().isHeadlessEnvironment()) { UIUtil.invokeLaterIfNeeded( new DumbAwareRunnable() { @Override public void run() { myEditor.repaint(0, text.length()); } }); } }
public static void doLexerTest( String text, Lexer lexer, boolean checkTokenText, String... expectedTokens) { lexer.start(text); int idx = 0; int tokenPos = 0; while (lexer.getTokenType() != null) { if (idx >= expectedTokens.length) { StringBuilder remainingTokens = new StringBuilder("\"" + lexer.getTokenType().toString() + "\""); lexer.advance(); while (lexer.getTokenType() != null) { remainingTokens.append(","); remainingTokens .append(" \"") .append(checkTokenText ? lexer.getTokenText() : lexer.getTokenType().toString()) .append("\""); lexer.advance(); } fail("Too many tokens. Following tokens: " + remainingTokens.toString()); } assertEquals("Token offset mismatch at position " + idx, tokenPos, lexer.getTokenStart()); String tokenName = checkTokenText ? lexer.getTokenText() : lexer.getTokenType().toString(); assertEquals("Token mismatch at position " + idx, expectedTokens[idx], tokenName); idx++; tokenPos = lexer.getTokenEnd(); lexer.advance(); } if (idx < expectedTokens.length) fail("Not enough tokens"); }
private boolean checkNotNextXmlBegin(Lexer lexer) { String text = lexer.getBufferSequence().toString(); int beginIndex = lexer.getTokenEnd(); if (beginIndex < text.length()) { text = text.substring(beginIndex).trim(); if (text.length() > 2) { text = text.substring(0, 2); } return !text.matches(XML_BEGIN_PATTERN); } return true; }
@Nullable private static String parseAttributeValue(final Lexer lexer, CharSequence data) { lexer.advance(); IElementType tokenType = lexer.getTokenType(); if (XmlElementType.XML_EQ == tokenType) { lexer.advance(); tokenType = lexer.getTokenType(); if (tokenType == XmlElementType.XML_ATTRIBUTE_VALUE_START_DELIMITER) { lexer.advance(); tokenType = lexer.getTokenType(); if (XmlElementType.XML_ATTRIBUTE_VALUE_TOKEN == tokenType) { return data.subSequence(lexer.getTokenStart(), lexer.getTokenEnd()).toString(); } } else if (tokenType != XmlTokenType.XML_TAG_END && tokenType != XmlTokenType.XML_EMPTY_ELEMENT_END) { return data.subSequence(lexer.getTokenStart(), lexer.getTokenEnd()).toString(); } } return null; }
private static String getTokenText(Lexer lexer) { final IElementType tokenType = lexer.getTokenType(); if (tokenType instanceof TokenWrapper) { return ((TokenWrapper) tokenType).getValue(); } String text = lexer .getBufferSequence() .subSequence(lexer.getTokenStart(), lexer.getTokenEnd()) .toString(); text = StringUtil.replace(text, "\n", "\\n"); return text; }
protected OuterLanguageElementImpl createOuterLanguageElement( final Lexer lexer, final CharTable table, final IElementType outerElementType) { final CharSequence buffer = lexer.getBufferSequence(); final int tokenStart = lexer.getTokenStart(); if (tokenStart < 0 || tokenStart > buffer.length()) { LOG.error("Invalid start: " + tokenStart + "; " + lexer); } final int tokenEnd = lexer.getTokenEnd(); if (tokenEnd < 0 || tokenEnd > buffer.length()) { LOG.error("Invalid end: " + tokenEnd + "; " + lexer); } return new OuterLanguageElementImpl( outerElementType, table.intern(buffer, tokenStart, tokenEnd)); }
private static void mapHtml(FileContent inputData, Language language, List<LinkInfo> result) { final Lexer original = HTMLLanguage.INSTANCE == language ? new HtmlHighlightingLexer() : new XHtmlHighlightingLexer(); final Lexer lexer = new FilterLexer( original, new FilterLexer.Filter() { public boolean reject(final IElementType type) { return XmlElementType.XML_WHITE_SPACE == type; } }); final CharSequence data = inputData.getContentAsText(); lexer.start(data); IElementType tokenType = lexer.getTokenType(); boolean linkTag = false; while (tokenType != null) { if (XmlElementType.XML_TAG_NAME == tokenType) { final String tagName = data.subSequence(lexer.getTokenStart(), lexer.getTokenEnd()).toString(); linkTag = LINK.equalsIgnoreCase(tagName); // if (BODY_TAG.equalsIgnoreCase(tagName)) { // break; // there are no LINK tags under the body // } } if (linkTag && XmlElementType.XML_NAME == tokenType) { int linkTagOffset = lexer.getTokenStart(); String href = null; String type = null; String media = null; String rel = null; String title = null; while (true) { if (tokenType == null || tokenType == XmlTokenType.XML_END_TAG_START || tokenType == XmlTokenType.XML_EMPTY_ELEMENT_END || tokenType == XmlTokenType.XML_START_TAG_START) { break; } if (XmlElementType.XML_NAME == tokenType) { final String attrName = data.subSequence(lexer.getTokenStart(), lexer.getTokenEnd()).toString(); if (HREF_ATTR.equalsIgnoreCase(attrName)) { href = parseAttributeValue(lexer, data); } else if (MEDIA_ATTR.equalsIgnoreCase(attrName)) { media = parseAttributeValue(lexer, data); } else if (TYPE_ATTR.equalsIgnoreCase(attrName)) { type = parseAttributeValue(lexer, data); } else if (REL_ATTR.equalsIgnoreCase(attrName)) { rel = parseAttributeValue(lexer, data); } else if (TITLE_ATTR.equalsIgnoreCase(attrName)) { title = parseAttributeValue(lexer, data); } } lexer.advance(); tokenType = lexer.getTokenType(); } addResult(result, linkTagOffset, href, media, type, rel, title, false); } lexer.advance(); tokenType = lexer.getTokenType(); } }
private void locateToken() { if (myTokenType == null) { IElementType type = myCurrentLexer.getTokenType(); int start = myCurrentLexer.getTokenStart(); String tokenText = myCurrentLexer .getBufferSequence() .subSequence(start, myCurrentLexer.getTokenEnd()) .toString(); if (myCurrentLexer == myXmlLexer && xmlSteps == 0) { myCurrentLexer = myScalaPlainLexer; myCurrentLexer.start(getBufferSequence(), start, myXmlLexer.getBufferEnd(), 0); } --xmlSteps; if (type == SCALA_XML_CONTENT_START) { final XmlTagValidator xmlTagValidator = new XmlTagValidator(myCurrentLexer); if (!xmlTagValidator.validate()) { xmlSteps = xmlTagValidator.step; } myCurrentLexer = myXmlLexer; myXmlState = 0; myCurrentLexer.start(getBufferSequence(), start, myBufferEnd, 0); myLayeredTagStack.push(new Stack<MyOpenXmlTag>()); myLayeredTagStack.peek().push(new MyOpenXmlTag()); myTokenType = myCurrentLexer.getTokenType(); locateTextRange(); } else if (( /*type == XML_ATTRIBUTE_VALUE_TOKEN || */ type == XML_DATA_CHARACTERS) && // todo: Dafuq??? tokenText.startsWith("{") && !tokenText.startsWith("{{") && !inCdata) { myXmlState = myCurrentLexer.getState(); (myCurrentLexer = myScalaPlainLexer).start(getBufferSequence(), start, myBufferEnd, 0); locateTextRange(); myBraceStack.push(1); myTokenType = SCALA_IN_XML_INJECTION_START; } else if (type == ScalaTokenTypes.tRBRACE && myBraceStack.size() > 0) { int currentLayer = myBraceStack.pop(); if (currentLayer == 1) { locateTextRange(); (myCurrentLexer = myXmlLexer) .start(getBufferSequence(), start + 1, myBufferEnd, myXmlState); myTokenType = SCALA_IN_XML_INJECTION_END; } else { myBraceStack.push(--currentLayer); } } else if (type == ScalaTokenTypes.tLBRACE && myBraceStack.size() > 0) { int currentLayer = myBraceStack.pop(); myBraceStack.push(++currentLayer); } else if ((XML_START_TAG_START == type || XML_COMMENT_START == type || XML_CDATA_START == type || XML_PI_START == type) && !myLayeredTagStack.isEmpty()) { if (type == XML_CDATA_START) { inCdata = true; } myLayeredTagStack.peek().push(new MyOpenXmlTag()); } else if (XML_EMPTY_ELEMENT_END == type && !myLayeredTagStack.isEmpty() && !myLayeredTagStack.peek().isEmpty() && myLayeredTagStack.peek().peek().state == TAG_STATE.UNDEFINED) { myLayeredTagStack.peek().pop(); if (myLayeredTagStack.peek().isEmpty() && checkNotNextXmlBegin(myCurrentLexer)) { myLayeredTagStack.pop(); locateTextRange(); startScalaPlainLexer(start + 2); myTokenType = XML_EMPTY_ELEMENT_END; return; } } else if (XML_TAG_END == type && !myLayeredTagStack.isEmpty() && !myLayeredTagStack.peek().isEmpty()) { MyOpenXmlTag tag = myLayeredTagStack.peek().peek(); if (tag.state == TAG_STATE.UNDEFINED) { tag.state = TAG_STATE.NONEMPTY; } else if (tag.state == TAG_STATE.NONEMPTY) { myLayeredTagStack.peek().pop(); } if (myLayeredTagStack.peek().isEmpty() && checkNotNextXmlBegin(myCurrentLexer)) { myLayeredTagStack.pop(); locateTextRange(); startScalaPlainLexer(start + 1); myTokenType = XML_TAG_END; return; } } else if (XML_PI_END == type && !myLayeredTagStack.isEmpty() && !myLayeredTagStack.peek().isEmpty() && myLayeredTagStack.peek().peek().state == TAG_STATE.UNDEFINED) { myLayeredTagStack.peek().pop(); if (myLayeredTagStack.peek().isEmpty() && checkNotNextXmlBegin(myCurrentLexer)) { myLayeredTagStack.pop(); locateTextRange(); startScalaPlainLexer(start + 2); myTokenType = XML_PI_END; return; } } else if (XML_COMMENT_END == type && !myLayeredTagStack.isEmpty() && !myLayeredTagStack.peek().isEmpty() && myLayeredTagStack.peek().peek().state == TAG_STATE.UNDEFINED) { myLayeredTagStack.peek().pop(); if (myLayeredTagStack.peek().isEmpty() && checkNotNextXmlBegin(myCurrentLexer)) { myLayeredTagStack.pop(); locateTextRange(); startScalaPlainLexer(start + 3); myTokenType = XML_COMMENT_END; return; } } else if (XML_CDATA_END == type && !myLayeredTagStack.isEmpty() && !myLayeredTagStack.peek().isEmpty() && myLayeredTagStack.peek().peek().state == TAG_STATE.UNDEFINED) { inCdata = false; myLayeredTagStack.peek().pop(); if (myLayeredTagStack.peek().isEmpty() && checkNotNextXmlBegin(myCurrentLexer)) { myLayeredTagStack.pop(); locateTextRange(); startScalaPlainLexer(start + 3); myTokenType = XML_CDATA_END; return; } } else if (type == XML_DATA_CHARACTERS && tokenText.indexOf('{') != -1 && !inCdata) { int scalaToken = tokenText.indexOf('{'); while (scalaToken != -1 && scalaToken + 1 < tokenText.length() && tokenText.charAt(scalaToken + 1) == '{') scalaToken = tokenText.indexOf('{', scalaToken + 2); if (scalaToken != -1) { myTokenType = XML_DATA_CHARACTERS; myTokenStart = myCurrentLexer.getTokenStart(); myTokenEnd = myTokenStart + scalaToken; myCurrentLexer.start( getBufferSequence(), myTokenEnd, myBufferEnd, myCurrentLexer.getState()); } } else if ((type == XML_REAL_WHITE_SPACE || type == XML_WHITE_SPACE || type == TAG_WHITE_SPACE) && tokenText.matches("\\s*\n(\n|\\s)*")) { type = ScalaTokenTypes.tWHITE_SPACE_IN_LINE; } else if (!(type instanceof IXmlLeafElementType)) { ++xmlSteps; } if (myTokenType == null) { myTokenType = type; if (myTokenType == null) return; locateTextRange(); } // we have to advance current lexer only if we didn't start scala plain lexer on this // iteration // because of wrong behaviour of the latter ScalaPlainLexer myCurrentLexer.advance(); } }
private void locateTextRange() { myTokenStart = myCurrentLexer.getTokenStart(); myTokenEnd = myCurrentLexer.getTokenEnd(); }
@Override public synchronized void documentChanged(DocumentEvent e) { final Document document = e.getDocument(); if (document instanceof DocumentEx && ((DocumentEx) document).isInBulkUpdate()) { mySegments.removeAll(); return; } if (mySegments.getSegmentCount() == 0) { setText(document.getCharsSequence()); return; } CharSequence text = document.getCharsSequence(); int oldStartOffset = e.getOffset(); final int segmentIndex; try { segmentIndex = mySegments.findSegmentIndex(oldStartOffset) - 2; } catch (IndexOutOfBoundsException ex) { throw new IndexOutOfBoundsException(ex.getMessage() + " Lexer: " + myLexer); } final int oldStartIndex = Math.max(0, segmentIndex); int startIndex = oldStartIndex; int data; do { data = mySegments.getSegmentData(startIndex); if (isInitialState(data) || startIndex == 0) break; startIndex--; } while (true); int startOffset = mySegments.getSegmentStart(startIndex); int newEndOffset = e.getOffset() + e.getNewLength(); myLexer.start(text, startOffset, text.length(), myInitialState); int lastTokenStart = -1; int lastLexerState = -1; while (myLexer.getTokenType() != null) { if (startIndex >= oldStartIndex) break; int tokenStart = myLexer.getTokenStart(); int lexerState = myLexer.getState(); if (tokenStart == lastTokenStart && lexerState == lastLexerState) { throw new IllegalStateException( "Error while updating lexer: " + e + " document text: " + document.getText()); } int tokenEnd = myLexer.getTokenEnd(); data = packData(myLexer.getTokenType(), lexerState); if (mySegments.getSegmentStart(startIndex) != tokenStart || mySegments.getSegmentEnd(startIndex) != tokenEnd || mySegments.getSegmentData(startIndex) != data) { break; } startIndex++; myLexer.advance(); lastTokenStart = tokenStart; lastLexerState = lexerState; } startOffset = mySegments.getSegmentStart(startIndex); int repaintEnd = -1; int insertSegmentCount = 0; int oldEndIndex = -1; SegmentArrayWithData insertSegments = new SegmentArrayWithData(); while (myLexer.getTokenType() != null) { int tokenStart = myLexer.getTokenStart(); int lexerState = myLexer.getState(); if (tokenStart == lastTokenStart && lexerState == lastLexerState) { throw new IllegalStateException( "Error while updating lexer: " + e + " document text: " + document.getText()); } lastTokenStart = tokenStart; lastLexerState = lexerState; int tokenEnd = myLexer.getTokenEnd(); data = packData(myLexer.getTokenType(), lexerState); if (tokenStart >= newEndOffset && lexerState == myInitialState) { int shiftedTokenStart = tokenStart - e.getNewLength() + e.getOldLength(); int index = mySegments.findSegmentIndex(shiftedTokenStart); if (mySegments.getSegmentStart(index) == shiftedTokenStart && mySegments.getSegmentData(index) == data) { repaintEnd = tokenStart; oldEndIndex = index; break; } } insertSegments.setElementAt(insertSegmentCount, tokenStart, tokenEnd, data); insertSegmentCount++; myLexer.advance(); } final int shift = e.getNewLength() - e.getOldLength(); if (repaintEnd > 0) { while (insertSegmentCount > 0 && oldEndIndex > startIndex) { if (!segmentsEqual( mySegments, oldEndIndex - 1, insertSegments, insertSegmentCount - 1, shift)) { break; } insertSegmentCount--; oldEndIndex--; repaintEnd = insertSegments.getSegmentStart(insertSegmentCount); insertSegments.remove(insertSegmentCount, insertSegmentCount + 1); } } if (repaintEnd == -1) { repaintEnd = text.length(); } if (oldEndIndex < 0) { oldEndIndex = mySegments.getSegmentCount(); } mySegments.shiftSegments(oldEndIndex, shift); mySegments.replace(startIndex, oldEndIndex, insertSegments); if (insertSegmentCount == 0 || oldEndIndex == startIndex + 1 && insertSegmentCount == 1 && data == mySegments.getSegmentData(startIndex)) { return; } myEditor.repaint(startOffset, repaintEnd); }
private static boolean isCommentComplete( PsiComment comment, CodeDocumentationAwareCommenter commenter, Editor editor) { for (CommentCompleteHandler handler : Extensions.getExtensions(CommentCompleteHandler.EP_NAME)) { if (handler.isApplicable(comment, commenter)) { return handler.isCommentComplete(comment, commenter, editor); } } String commentText = comment.getText(); final boolean docComment = isDocComment(comment, commenter); final String expectedCommentEnd = docComment ? commenter.getDocumentationCommentSuffix() : commenter.getBlockCommentSuffix(); if (!commentText.endsWith(expectedCommentEnd)) return false; final PsiFile containingFile = comment.getContainingFile(); final Language language = containingFile.getLanguage(); ParserDefinition parserDefinition = LanguageParserDefinitions.INSTANCE.forLanguage(language); if (parserDefinition == null) { return true; } Lexer lexer = parserDefinition.createLexer(containingFile.getProject()); final String commentPrefix = docComment ? commenter.getDocumentationCommentPrefix() : commenter.getBlockCommentPrefix(); lexer.start( commentText, commentPrefix == null ? 0 : commentPrefix.length(), commentText.length()); QuoteHandler fileTypeHandler = TypedHandler.getQuoteHandler(containingFile, editor); JavaLikeQuoteHandler javaLikeQuoteHandler = fileTypeHandler instanceof JavaLikeQuoteHandler ? (JavaLikeQuoteHandler) fileTypeHandler : null; while (true) { IElementType tokenType = lexer.getTokenType(); if (tokenType == null) { return false; } if (javaLikeQuoteHandler != null && javaLikeQuoteHandler.getStringTokenTypes() != null && javaLikeQuoteHandler.getStringTokenTypes().contains(tokenType)) { String text = commentText.substring(lexer.getTokenStart(), lexer.getTokenEnd()); int endOffset = comment.getTextRange().getEndOffset(); if (text.endsWith(expectedCommentEnd) && endOffset < containingFile.getTextLength() && containingFile.getText().charAt(endOffset) == '\n') { return true; } } if (tokenType == commenter.getDocumentationCommentTokenType() || tokenType == commenter.getBlockCommentTokenType()) { return false; } if (tokenType == commenter.getLineCommentTokenType() && lexer.getTokenText().contains(commentPrefix)) { return false; } if (lexer.getTokenEnd() == commentText.length()) { if (tokenType == commenter.getLineCommentTokenType()) { String prefix = commenter.getLineCommentPrefix(); lexer.start( commentText, lexer.getTokenStart() + (prefix == null ? 0 : prefix.length()), commentText.length()); lexer.advance(); continue; } else if (isInvalidPsi(comment)) { return false; } return true; } lexer.advance(); } }
/** * Override to modify appended token text. For example: replace a trailing space with a newline. * Should append exact <code>lexer.getTokenEnd() - lexer.getTokenStart()</code> characters! */ protected StringBuilder appendCurrentTemplateToken( StringBuilder result, CharSequence buf, Lexer lexer) { return result.append(buf, lexer.getTokenStart(), lexer.getTokenEnd()); }
@Nullable private TextRange findCommentedRange(final Commenter commenter) { final CharSequence text = myDocument.getCharsSequence(); final FileType fileType = myFile.getFileType(); if (fileType instanceof CustomSyntaxTableFileType) { Lexer lexer = new CustomFileTypeLexer(((CustomSyntaxTableFileType) fileType).getSyntaxTable()); final int caretOffset = myCaret.getOffset(); int commentStart = CharArrayUtil.lastIndexOf(text, commenter.getBlockCommentPrefix(), caretOffset); if (commentStart == -1) return null; lexer.start(text, commentStart, text.length()); if (lexer.getTokenType() == CustomHighlighterTokenType.MULTI_LINE_COMMENT && lexer.getTokenEnd() >= caretOffset) { return new TextRange(commentStart, lexer.getTokenEnd()); } return null; } final String prefix; final String suffix; // Custom uncommenter is able to find commented block inside of selected text final String selectedText = myCaret.getSelectedText(); if ((commenter instanceof CustomUncommenter) && selectedText != null) { final TextRange commentedRange = ((CustomUncommenter) commenter).findMaximumCommentedRange(selectedText); if (commentedRange == null) { return null; } // Uncommenter returns range relative to text start, so we need to shift it to make abosolute. return commentedRange.shiftRight(myCaret.getSelectionStart()); } if (commenter instanceof SelfManagingCommenter) { SelfManagingCommenter selfManagingCommenter = (SelfManagingCommenter) commenter; prefix = selfManagingCommenter.getBlockCommentPrefix( myCaret.getSelectionStart(), myDocument, mySelfManagedCommenterData); suffix = selfManagingCommenter.getBlockCommentSuffix( myCaret.getSelectionEnd(), myDocument, mySelfManagedCommenterData); } else { prefix = trim(commenter.getBlockCommentPrefix()); suffix = trim(commenter.getBlockCommentSuffix()); } if (prefix == null || suffix == null) return null; TextRange commentedRange; if (commenter instanceof SelfManagingCommenter) { commentedRange = ((SelfManagingCommenter) commenter) .getBlockCommentRange( myCaret.getSelectionStart(), myCaret.getSelectionEnd(), myDocument, mySelfManagedCommenterData); } else { if (!testSelectionForNonComments()) { return null; } commentedRange = getSelectedComments(text, prefix, suffix); } if (commentedRange == null) { PsiElement comment = findCommentAtCaret(); if (comment != null) { String commentText = comment.getText(); if (commentText.startsWith(prefix) && commentText.endsWith(suffix)) { commentedRange = comment.getTextRange(); } } } return commentedRange; }