private String tokenizerToString(Tokenizer tokenizer) throws Exception { OffsetAttribute extOffset = tokenizer.addAttribute(OffsetAttribute.class); PositionIncrementAttribute posIncrAtt = tokenizer.addAttribute(PositionIncrementAttribute.class); PositionLengthAttribute posLengthAtt = tokenizer.addAttribute(PositionLengthAttribute.class); CharTermAttribute term = tokenizer.addAttribute(CharTermAttribute.class); TypeAttribute type = tokenizer.addAttribute(TypeAttribute.class); SemanticClassAttribute semanticClass = tokenizer.addAttribute(SemanticClassAttribute.class); PartOfSpeechAttribute pos = tokenizer.addAttribute(PartOfSpeechAttribute.class); StringBuilder result = new StringBuilder(); tokenizer.reset(); while (tokenizer.incrementToken() == true) { result.append(new String(term.buffer(), 0, term.length())).append(":"); result.append(type.type()).append(":"); result.append(pos.partOfSpeech()).append(":"); result.append(semanticClass.semanticClass()).append(":"); result.append(String.valueOf(posIncrAtt.getPositionIncrement())).append(":"); result.append(String.valueOf(posLengthAtt.getPositionLength())).append(":"); result.append(String.valueOf(extOffset.startOffset())).append(":"); result.append(String.valueOf(extOffset.endOffset())); result.append(","); } tokenizer.end(); return result.toString(); }