@Test public void testPrefixTokensWithInputCol() { String[] rawText1 = new String[] {"Harold's NOT around.", "Anymore, I mean."}; String[] tokens1 = new String[] {"harold's", "not", "around", "anymore", "i", "mean"}; String[] rawText2 = new String[] {"The dog's nose KNOWS!", "Good, fine, great..."}; String[] tokens2 = new String[] {"the", "dog's", "nose", "knows", "good", "fine", "great"}; List<String> tokenList = new ArrayList<>(); List<String> prefixedTokenList = new ArrayList<>(); for (String token : tokens1) { tokenList.add(token); prefixedTokenList.add("rawText1=" + token); } for (String token : tokens2) { tokenList.add(token); prefixedTokenList.add("rawText2=" + token); } String[] tokens = tokenList.toArray(new String[tokenList.size()]); String[] prefixedTokens = prefixedTokenList.toArray(new String[prefixedTokenList.size()]); // First transform without token prefixes LuceneAnalyzer analyzer = new LuceneAnalyzer() .setInputCols(new String[] {"rawText1", "rawText2"}) .setOutputCol("tokens"); assertExpectedTokens( analyzer, Collections.singletonList(new MV_MV_TokenizerTestData(rawText1, rawText2, tokens))); // Then transform with token prefixes analyzer.setPrefixTokensWithInputCol(true); assertExpectedTokens( analyzer, Collections.singletonList(new MV_MV_TokenizerTestData(rawText1, rawText2, prefixedTokens))); }