public void processWords(final CharSequence fileText, final Processor<WordOccurrence> processor) { myLexer.start(fileText, 0, fileText.length(), 0); WordOccurrence occurrence = null; // shared occurrence while (myLexer.getTokenType() != null) { final IElementType type = myLexer.getTokenType(); if (type == FanTokenTypes.IDENTIFIER || FanTokenTypes.FAN_SYS_TYPE == type) { if (occurrence == null) { occurrence = new WordOccurrence( fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.CODE); } else { occurrence.init( fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.CODE); } if (!processor.process(occurrence)) { return; } } else if (FanTokenTypes.COMMENTS.contains(type)) { if (!stripWords( processor, fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.COMMENTS, occurrence)) { return; } } else if (FanTokenTypes.STRING_LITERALS.contains(type)) { if (!stripWords( processor, fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.LITERALS, occurrence)) { return; } if (type == FanTokenTypes.STRING_LITERAL) { if (!stripWords( processor, fileText, myLexer.getTokenStart(), myLexer.getTokenEnd(), WordOccurrence.Kind.CODE, occurrence)) { return; } } } myLexer.advance(); } }
private static boolean stripWords( final Processor<WordOccurrence> processor, final CharSequence tokenText, int from, int to, final WordOccurrence.Kind kind, WordOccurrence occurrence) { // This code seems strange but it is more effective as Character.isJavaIdentifier_xxx_ is quite // costly operation due to unicode int index = from; ScanWordsLoop: while (true) { while (true) { if (index == to) { break ScanWordsLoop; } char c = tokenText.charAt(index); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || (Character.isJavaIdentifierStart(c) && c != '$')) { break; } index++; } int index1 = index; while (true) { index++; if (index == to) { break; } char c = tokenText.charAt(index); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) { continue; } if (!Character.isJavaIdentifierPart(c) || c == '$') { break; } } if (occurrence == null) { occurrence = new WordOccurrence(tokenText, index1, index, kind); } else { occurrence.init(tokenText, index1, index, kind); } if (!processor.process(occurrence)) { return false; } } return true; }