public static void doLexerTest( String text, Lexer lexer, boolean checkTokenText, String... expectedTokens) { lexer.start(text); int idx = 0; int tokenPos = 0; while (lexer.getTokenType() != null) { if (idx >= expectedTokens.length) { StringBuilder remainingTokens = new StringBuilder("\"" + lexer.getTokenType().toString() + "\""); lexer.advance(); while (lexer.getTokenType() != null) { remainingTokens.append(","); remainingTokens .append(" \"") .append(checkTokenText ? lexer.getTokenText() : lexer.getTokenType().toString()) .append("\""); lexer.advance(); } fail("Too many tokens. Following tokens: " + remainingTokens.toString()); } assertEquals("Token offset mismatch at position " + idx, tokenPos, lexer.getTokenStart()); String tokenName = checkTokenText ? lexer.getTokenText() : lexer.getTokenType().toString(); assertEquals("Token mismatch at position " + idx, expectedTokens[idx], tokenName); idx++; tokenPos = lexer.getTokenEnd(); lexer.advance(); } if (idx < expectedTokens.length) fail("Not enough tokens"); }
public String transform(String testName, String[] data) throws Exception { String fileText = data[0]; Lexer lexer = new ScalaLexer(); lexer.start(fileText); StringBuilder buffer = new StringBuilder(); IElementType type; while ((type = lexer.getTokenType()) != null) { CharSequence s = lexer.getBufferSequence(); s = s.subSequence(lexer.getTokenStart(), lexer.getTokenEnd()); buffer.append(type.toString()).append(" {").append(s).append("}"); lexer.advance(); if (lexer.getTokenType() != null) { buffer.append("\n"); } } Console.println("------------------------ " + testName + " ------------------------"); Console.println(buffer.toString()); Console.println(""); return buffer.toString(); }
public double isBashFile(File file, String data, Project project) { ParserDefinition definition = LanguageParserDefinitions.INSTANCE.forLanguage(BashFileType.BASH_LANGUAGE); Lexer lexer = definition.createLexer(project); lexer.start(data); int tokenCount = 0; Set<IElementType> tokenSet = Sets.newHashSet(); Set<Integer> modeSet = Sets.newHashSet(); while (lexer.getTokenType() != BashTokenTypes.BAD_CHARACTER && lexer.getTokenType() != null) { tokenSet.add(lexer.getTokenType()); modeSet.add(lexer.getState()); lexer.advance(); tokenCount++; } double score = 0; if (lexer.getTokenType() == BashTokenTypes.BAD_CHARACTER) { score -= badCharacterWeight; } if (tokenCount > 4) { score += tokenLimitWeight; } score += Math.min(0.45, (double) tokenSet.size() * tokenWeight); score += Math.min(0.45, (double) modeSet.size() * modeWeight); return score; }
private void doSetText(final CharSequence text) { final TokenProcessor processor = createTokenProcessor(0); myLexer.start(text, 0, text.length(), myInitialState); mySegments.removeAll(); int i = 0; while (true) { final IElementType tokenType = myLexer.getTokenType(); if (tokenType == null) break; int data = packData(tokenType, myLexer.getState()); processor.addToken(i, myLexer.getTokenStart(), myLexer.getTokenEnd(), data, tokenType); i++; myLexer.advance(); } processor.finish(); if (myEditor != null && !ApplicationManager.getApplication().isHeadlessEnvironment()) { UIUtil.invokeLaterIfNeeded( new DumbAwareRunnable() { @Override public void run() { myEditor.repaint(0, text.length()); } }); } }
protected void checkCorrectRestart(String text) { Lexer mainLexer = createLexer(); String allTokens = printTokens(text, 0, mainLexer); Lexer auxLexer = createLexer(); auxLexer.start(text); while (true) { IElementType type = auxLexer.getTokenType(); if (type == null) { break; } if (auxLexer.getState() == 0) { int tokenStart = auxLexer.getTokenStart(); String subTokens = printTokens(text, tokenStart, mainLexer); if (!allTokens.endsWith(subTokens)) { assertEquals( "Restarting impossible from offset " + tokenStart + "; lexer state should not return 0 at this point", allTokens, subTokens); } } auxLexer.advance(); } }
public void processWords(final CharSequence fileText, final Processor<WordOccurrence> processor) { myLexer.start(fileText, 0, fileText.length(), 0); WordOccurrence occurrence = null; // shared occurrence while (myLexer.getTokenType() != null) { final IElementType type = myLexer.getTokenType(); if (type == FanTokenTypes.IDENTIFIER || FanTokenTypes.FAN_SYS_TYPE == type) { if (occurrence == null) { occurrence = new WordOccurrence( fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.CODE); } else { occurrence.init( fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.CODE); } if (!processor.process(occurrence)) { return; } } else if (FanTokenTypes.COMMENTS.contains(type)) { if (!stripWords( processor, fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.COMMENTS, occurrence)) { return; } } else if (FanTokenTypes.STRING_LITERALS.contains(type)) { if (!stripWords( processor, fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.LITERALS, occurrence)) { return; } if (type == FanTokenTypes.STRING_LITERAL) { if (!stripWords( processor, fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.CODE, occurrence)) { return; } } } myLexer.advance(); } }
private CharSequence createTemplateText(CharSequence buf, Lexer lexer) { StringBuilder result = new StringBuilder(buf.length()); lexer.start(buf); while (lexer.getTokenType() != null) { if (lexer.getTokenType() == myTemplateElementType) { appendCurrentTemplateToken(result, buf, lexer); } lexer.advance(); } return result; }
@Nullable private static String parseAttributeValue(final Lexer lexer, CharSequence data) { lexer.advance(); IElementType tokenType = lexer.getTokenType(); if (XmlElementType.XML_EQ == tokenType) { lexer.advance(); tokenType = lexer.getTokenType(); if (tokenType == XmlElementType.XML_ATTRIBUTE_VALUE_START_DELIMITER) { lexer.advance(); tokenType = lexer.getTokenType(); if (XmlElementType.XML_ATTRIBUTE_VALUE_TOKEN == tokenType) { return data.subSequence(lexer.getTokenStart(), lexer.getTokenEnd()).toString(); } } else if (tokenType != XmlTokenType.XML_TAG_END && tokenType != XmlTokenType.XML_EMPTY_ELEMENT_END) { return data.subSequence(lexer.getTokenStart(), lexer.getTokenEnd()).toString(); } } return null; }
public static String printTokens(CharSequence text, int start, Lexer lexer) { lexer.start(text, start, text.length()); String result = ""; while (true) { IElementType tokenType = lexer.getTokenType(); if (tokenType == null) { break; } String tokenText = getTokenText(lexer); String tokenTypeName = tokenType.toString(); String line = tokenTypeName + " ('" + tokenText + "')\n"; result += line; lexer.advance(); } return result; }
private void insertOuters(TreeElement root, Lexer lexer, final CharTable table) { TreePatcher patcher = TREE_PATCHER.forLanguage(root.getPsi().getLanguage()); int treeOffset = 0; LeafElement leaf = TreeUtil.findFirstLeaf(root); while (lexer.getTokenType() != null) { IElementType tt = lexer.getTokenType(); if (tt != myTemplateElementType) { while (leaf != null && treeOffset < lexer.getTokenStart()) { treeOffset += leaf.getTextLength(); if (treeOffset > lexer.getTokenStart()) { leaf = patcher.split( leaf, leaf.getTextLength() - (treeOffset - lexer.getTokenStart()), table); treeOffset = lexer.getTokenStart(); } leaf = (LeafElement) TreeUtil.nextLeaf(leaf); } if (leaf == null) break; final OuterLanguageElementImpl newLeaf = createOuterLanguageElement(lexer, table, myOuterElementType); patcher.insert(leaf.getTreeParent(), leaf, newLeaf); leaf.getTreeParent().subtreeChanged(); leaf = newLeaf; } lexer.advance(); } if (lexer.getTokenType() != null) { assert lexer.getTokenType() != myTemplateElementType; final OuterLanguageElementImpl newLeaf = createOuterLanguageElement(lexer, table, myOuterElementType); ((CompositeElement) root).rawAddChildren(newLeaf); ((CompositeElement) root).subtreeChanged(); } }
private static void mapHtml(FileContent inputData, Language language, List<LinkInfo> result) { final Lexer original = HTMLLanguage.INSTANCE == language ? new HtmlHighlightingLexer() : new XHtmlHighlightingLexer(); final Lexer lexer = new FilterLexer( original, new FilterLexer.Filter() { public boolean reject(final IElementType type) { return XmlElementType.XML_WHITE_SPACE == type; } }); final CharSequence data = inputData.getContentAsText(); lexer.start(data); IElementType tokenType = lexer.getTokenType(); boolean linkTag = false; while (tokenType != null) { if (XmlElementType.XML_TAG_NAME == tokenType) { final String tagName = data.subSequence(lexer.getTokenStart(), lexer.getTokenEnd()).toString(); linkTag = LINK.equalsIgnoreCase(tagName); // if (BODY_TAG.equalsIgnoreCase(tagName)) { // break; // there are no LINK tags under the body // } } if (linkTag && XmlElementType.XML_NAME == tokenType) { int linkTagOffset = lexer.getTokenStart(); String href = null; String type = null; String media = null; String rel = null; String title = null; while (true) { if (tokenType == null || tokenType == XmlTokenType.XML_END_TAG_START || tokenType == XmlTokenType.XML_EMPTY_ELEMENT_END || tokenType == XmlTokenType.XML_START_TAG_START) { break; } if (XmlElementType.XML_NAME == tokenType) { final String attrName = data.subSequence(lexer.getTokenStart(), lexer.getTokenEnd()).toString(); if (HREF_ATTR.equalsIgnoreCase(attrName)) { href = parseAttributeValue(lexer, data); } else if (MEDIA_ATTR.equalsIgnoreCase(attrName)) { media = parseAttributeValue(lexer, data); } else if (TYPE_ATTR.equalsIgnoreCase(attrName)) { type = parseAttributeValue(lexer, data); } else if (REL_ATTR.equalsIgnoreCase(attrName)) { rel = parseAttributeValue(lexer, data); } else if (TITLE_ATTR.equalsIgnoreCase(attrName)) { title = parseAttributeValue(lexer, data); } } lexer.advance(); tokenType = lexer.getTokenType(); } addResult(result, linkTagOffset, href, media, type, rel, title, false); } lexer.advance(); tokenType = lexer.getTokenType(); } }
private void locateToken() { if (myTokenType == null) { IElementType type = myCurrentLexer.getTokenType(); int start = myCurrentLexer.getTokenStart(); String tokenText = myCurrentLexer .getBufferSequence() .subSequence(start, myCurrentLexer.getTokenEnd()) .toString(); if (myCurrentLexer == myXmlLexer && xmlSteps == 0) { myCurrentLexer = myScalaPlainLexer; myCurrentLexer.start(getBufferSequence(), start, myXmlLexer.getBufferEnd(), 0); } --xmlSteps; if (type == SCALA_XML_CONTENT_START) { final XmlTagValidator xmlTagValidator = new XmlTagValidator(myCurrentLexer); if (!xmlTagValidator.validate()) { xmlSteps = xmlTagValidator.step; } myCurrentLexer = myXmlLexer; myXmlState = 0; myCurrentLexer.start(getBufferSequence(), start, myBufferEnd, 0); myLayeredTagStack.push(new Stack<MyOpenXmlTag>()); myLayeredTagStack.peek().push(new MyOpenXmlTag()); myTokenType = myCurrentLexer.getTokenType(); locateTextRange(); } else if (( /*type == XML_ATTRIBUTE_VALUE_TOKEN || */ type == XML_DATA_CHARACTERS) && // todo: Dafuq??? tokenText.startsWith("{") && !tokenText.startsWith("{{") && !inCdata) { myXmlState = myCurrentLexer.getState(); (myCurrentLexer = myScalaPlainLexer).start(getBufferSequence(), start, myBufferEnd, 0); locateTextRange(); myBraceStack.push(1); myTokenType = SCALA_IN_XML_INJECTION_START; } else if (type == ScalaTokenTypes.tRBRACE && myBraceStack.size() > 0) { int currentLayer = myBraceStack.pop(); if (currentLayer == 1) { locateTextRange(); (myCurrentLexer = myXmlLexer) .start(getBufferSequence(), start + 1, myBufferEnd, myXmlState); myTokenType = SCALA_IN_XML_INJECTION_END; } else { myBraceStack.push(--currentLayer); } } else if (type == ScalaTokenTypes.tLBRACE && myBraceStack.size() > 0) { int currentLayer = myBraceStack.pop(); myBraceStack.push(++currentLayer); } else if ((XML_START_TAG_START == type || XML_COMMENT_START == type || XML_CDATA_START == type || XML_PI_START == type) && !myLayeredTagStack.isEmpty()) { if (type == XML_CDATA_START) { inCdata = true; } myLayeredTagStack.peek().push(new MyOpenXmlTag()); } else if (XML_EMPTY_ELEMENT_END == type && !myLayeredTagStack.isEmpty() && !myLayeredTagStack.peek().isEmpty() && myLayeredTagStack.peek().peek().state == TAG_STATE.UNDEFINED) { myLayeredTagStack.peek().pop(); if (myLayeredTagStack.peek().isEmpty() && checkNotNextXmlBegin(myCurrentLexer)) { myLayeredTagStack.pop(); locateTextRange(); startScalaPlainLexer(start + 2); myTokenType = XML_EMPTY_ELEMENT_END; return; } } else if (XML_TAG_END == type && !myLayeredTagStack.isEmpty() && !myLayeredTagStack.peek().isEmpty()) { MyOpenXmlTag tag = myLayeredTagStack.peek().peek(); if (tag.state == TAG_STATE.UNDEFINED) { tag.state = TAG_STATE.NONEMPTY; } else if (tag.state == TAG_STATE.NONEMPTY) { myLayeredTagStack.peek().pop(); } if (myLayeredTagStack.peek().isEmpty() && checkNotNextXmlBegin(myCurrentLexer)) { myLayeredTagStack.pop(); locateTextRange(); startScalaPlainLexer(start + 1); myTokenType = XML_TAG_END; return; } } else if (XML_PI_END == type && !myLayeredTagStack.isEmpty() && !myLayeredTagStack.peek().isEmpty() && myLayeredTagStack.peek().peek().state == TAG_STATE.UNDEFINED) { myLayeredTagStack.peek().pop(); if (myLayeredTagStack.peek().isEmpty() && checkNotNextXmlBegin(myCurrentLexer)) { myLayeredTagStack.pop(); locateTextRange(); startScalaPlainLexer(start + 2); myTokenType = XML_PI_END; return; } } else if (XML_COMMENT_END == type && !myLayeredTagStack.isEmpty() && !myLayeredTagStack.peek().isEmpty() && myLayeredTagStack.peek().peek().state == TAG_STATE.UNDEFINED) { myLayeredTagStack.peek().pop(); if (myLayeredTagStack.peek().isEmpty() && checkNotNextXmlBegin(myCurrentLexer)) { myLayeredTagStack.pop(); locateTextRange(); startScalaPlainLexer(start + 3); myTokenType = XML_COMMENT_END; return; } } else if (XML_CDATA_END == type && !myLayeredTagStack.isEmpty() && !myLayeredTagStack.peek().isEmpty() && myLayeredTagStack.peek().peek().state == TAG_STATE.UNDEFINED) { inCdata = false; myLayeredTagStack.peek().pop(); if (myLayeredTagStack.peek().isEmpty() && checkNotNextXmlBegin(myCurrentLexer)) { myLayeredTagStack.pop(); locateTextRange(); startScalaPlainLexer(start + 3); myTokenType = XML_CDATA_END; return; } } else if (type == XML_DATA_CHARACTERS && tokenText.indexOf('{') != -1 && !inCdata) { int scalaToken = tokenText.indexOf('{'); while (scalaToken != -1 && scalaToken + 1 < tokenText.length() && tokenText.charAt(scalaToken + 1) == '{') scalaToken = tokenText.indexOf('{', scalaToken + 2); if (scalaToken != -1) { myTokenType = XML_DATA_CHARACTERS; myTokenStart = myCurrentLexer.getTokenStart(); myTokenEnd = myTokenStart + scalaToken; myCurrentLexer.start( getBufferSequence(), myTokenEnd, myBufferEnd, myCurrentLexer.getState()); } } else if ((type == XML_REAL_WHITE_SPACE || type == XML_WHITE_SPACE || type == TAG_WHITE_SPACE) && tokenText.matches("\\s*\n(\n|\\s)*")) { type = ScalaTokenTypes.tWHITE_SPACE_IN_LINE; } else if (!(type instanceof IXmlLeafElementType)) { ++xmlSteps; } if (myTokenType == null) { myTokenType = type; if (myTokenType == null) return; locateTextRange(); } // we have to advance current lexer only if we didn't start scala plain lexer on this // iteration // because of wrong behaviour of the latter ScalaPlainLexer myCurrentLexer.advance(); } }
@Override public synchronized void documentChanged(DocumentEvent e) { final Document document = e.getDocument(); if (document instanceof DocumentEx && ((DocumentEx) document).isInBulkUpdate()) { mySegments.removeAll(); return; } if (mySegments.getSegmentCount() == 0) { setText(document.getCharsSequence()); return; } CharSequence text = document.getCharsSequence(); int oldStartOffset = e.getOffset(); final int segmentIndex; try { segmentIndex = mySegments.findSegmentIndex(oldStartOffset) - 2; } catch (IndexOutOfBoundsException ex) { throw new IndexOutOfBoundsException(ex.getMessage() + " Lexer: " + myLexer); } final int oldStartIndex = Math.max(0, segmentIndex); int startIndex = oldStartIndex; int data; do { data = mySegments.getSegmentData(startIndex); if (isInitialState(data) || startIndex == 0) break; startIndex--; } while (true); int startOffset = mySegments.getSegmentStart(startIndex); int newEndOffset = e.getOffset() + e.getNewLength(); myLexer.start(text, startOffset, text.length(), myInitialState); int lastTokenStart = -1; int lastLexerState = -1; while (myLexer.getTokenType() != null) { if (startIndex >= oldStartIndex) break; int tokenStart = myLexer.getTokenStart(); int lexerState = myLexer.getState(); if (tokenStart == lastTokenStart && lexerState == lastLexerState) { throw new IllegalStateException( "Error while updating lexer: " + e + " document text: " + document.getText()); } int tokenEnd = myLexer.getTokenEnd(); data = packData(myLexer.getTokenType(), lexerState); if (mySegments.getSegmentStart(startIndex) != tokenStart || mySegments.getSegmentEnd(startIndex) != tokenEnd || mySegments.getSegmentData(startIndex) != data) { break; } startIndex++; myLexer.advance(); lastTokenStart = tokenStart; lastLexerState = lexerState; } startOffset = mySegments.getSegmentStart(startIndex); int repaintEnd = -1; int insertSegmentCount = 0; int oldEndIndex = -1; SegmentArrayWithData insertSegments = new SegmentArrayWithData(); while (myLexer.getTokenType() != null) { int tokenStart = myLexer.getTokenStart(); int lexerState = myLexer.getState(); if (tokenStart == lastTokenStart && lexerState == lastLexerState) { throw new IllegalStateException( "Error while updating lexer: " + e + " document text: " + document.getText()); } lastTokenStart = tokenStart; lastLexerState = lexerState; int tokenEnd = myLexer.getTokenEnd(); data = packData(myLexer.getTokenType(), lexerState); if (tokenStart >= newEndOffset && lexerState == myInitialState) { int shiftedTokenStart = tokenStart - e.getNewLength() + e.getOldLength(); int index = mySegments.findSegmentIndex(shiftedTokenStart); if (mySegments.getSegmentStart(index) == shiftedTokenStart && mySegments.getSegmentData(index) == data) { repaintEnd = tokenStart; oldEndIndex = index; break; } } insertSegments.setElementAt(insertSegmentCount, tokenStart, tokenEnd, data); insertSegmentCount++; myLexer.advance(); } final int shift = e.getNewLength() - e.getOldLength(); if (repaintEnd > 0) { while (insertSegmentCount > 0 && oldEndIndex > startIndex) { if (!segmentsEqual( mySegments, oldEndIndex - 1, insertSegments, insertSegmentCount - 1, shift)) { break; } insertSegmentCount--; oldEndIndex--; repaintEnd = insertSegments.getSegmentStart(insertSegmentCount); insertSegments.remove(insertSegmentCount, insertSegmentCount + 1); } } if (repaintEnd == -1) { repaintEnd = text.length(); } if (oldEndIndex < 0) { oldEndIndex = mySegments.getSegmentCount(); } mySegments.shiftSegments(oldEndIndex, shift); mySegments.replace(startIndex, oldEndIndex, insertSegments); if (insertSegmentCount == 0 || oldEndIndex == startIndex + 1 && insertSegmentCount == 1 && data == mySegments.getSegmentData(startIndex)) { return; } myEditor.repaint(startOffset, repaintEnd); }
private static boolean isCommentComplete( PsiComment comment, CodeDocumentationAwareCommenter commenter, Editor editor) { for (CommentCompleteHandler handler : Extensions.getExtensions(CommentCompleteHandler.EP_NAME)) { if (handler.isApplicable(comment, commenter)) { return handler.isCommentComplete(comment, commenter, editor); } } String commentText = comment.getText(); final boolean docComment = isDocComment(comment, commenter); final String expectedCommentEnd = docComment ? commenter.getDocumentationCommentSuffix() : commenter.getBlockCommentSuffix(); if (!commentText.endsWith(expectedCommentEnd)) return false; final PsiFile containingFile = comment.getContainingFile(); final Language language = containingFile.getLanguage(); ParserDefinition parserDefinition = LanguageParserDefinitions.INSTANCE.forLanguage(language); if (parserDefinition == null) { return true; } Lexer lexer = parserDefinition.createLexer(containingFile.getProject()); final String commentPrefix = docComment ? commenter.getDocumentationCommentPrefix() : commenter.getBlockCommentPrefix(); lexer.start( commentText, commentPrefix == null ? 0 : commentPrefix.length(), commentText.length()); QuoteHandler fileTypeHandler = TypedHandler.getQuoteHandler(containingFile, editor); JavaLikeQuoteHandler javaLikeQuoteHandler = fileTypeHandler instanceof JavaLikeQuoteHandler ? (JavaLikeQuoteHandler) fileTypeHandler : null; while (true) { IElementType tokenType = lexer.getTokenType(); if (tokenType == null) { return false; } if (javaLikeQuoteHandler != null && javaLikeQuoteHandler.getStringTokenTypes() != null && javaLikeQuoteHandler.getStringTokenTypes().contains(tokenType)) { String text = commentText.substring(lexer.getTokenStart(), lexer.getTokenEnd()); int endOffset = comment.getTextRange().getEndOffset(); if (text.endsWith(expectedCommentEnd) && endOffset < containingFile.getTextLength() && containingFile.getText().charAt(endOffset) == '\n') { return true; } } if (tokenType == commenter.getDocumentationCommentTokenType() || tokenType == commenter.getBlockCommentTokenType()) { return false; } if (tokenType == commenter.getLineCommentTokenType() && lexer.getTokenText().contains(commentPrefix)) { return false; } if (lexer.getTokenEnd() == commentText.length()) { if (tokenType == commenter.getLineCommentTokenType()) { String prefix = commenter.getLineCommentPrefix(); lexer.start( commentText, lexer.getTokenStart() + (prefix == null ? 0 : prefix.length()), commentText.length()); lexer.advance(); continue; } else if (isInvalidPsi(comment)) { return false; } return true; } lexer.advance(); } }