@Override public void apply(TokenStream stream) throws TokenizerException { // TODO Auto-generated method stub if (stream != null) { String token; Stemmer s; while (stream.hasNext()) { token = stream.next(); if (token != null) { token = token.toLowerCase(); if (isLettersOnly(token)) { s = new Stemmer(); for (char c : token.toCharArray()) { s.add(c); } s.stem(); stream.set(s.toString()); } } } stream.reset(); } }
@Override public void apply(TokenStream stream) throws TokenizerException { if (stream != null) { String token; while (stream.hasNext()) { token = stream.next(); stream.previous(); if (token != null) { token = replaceDate(token); if (token.isEmpty()) stream.remove(); else { stream.set(token); stream.next(); } } } } stream.reset(); }
/** * Test method for {@link * edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#set(java.lang.String[])}. */ @Test public void testSet() { // set on null and empty streams TokenStream stream = new TokenStream((String) null); stream.set("invalid"); assertNull(stream.getAllTokens()); stream = null; stream = new TokenStream(""); stream.set("invalid"); assertNull(stream.getAllTokens()); stream = null; // valid posiiton, null or empty tokens stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.set((String) null); assertEquals("this", stream.next()); stream.previous(); stream.set(""); assertEquals("this", stream.next()); stream = null; // valid new token, invalid position stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.seekEnd(); stream.set("valid"); assertEquals("stream", stream.previous()); stream = null; // correct set, single token stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.set("that"); assertEquals(5, stream.getAllTokens().size()); assertEquals("that", stream.next()); stream = null; // correct set, multiple tokens at the end stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.seekEnd(); stream.previous(); stream.set("of", "the", "set", "method"); assertEquals(8, stream.getAllTokens().size()); assertEquals("method", stream.next()); stream = null; // correct set, multiple tokens at the start stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.set("you", "think", "styx"); assertEquals(7, stream.getAllTokens().size()); assertEquals("styx", stream.next()); stream = null; // correct set, multiple tokens in the middle stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.seekEnd(); stream.previous(); stream.previous(); stream.set("really", "interesting"); assertEquals(6, stream.getAllTokens().size()); assertEquals("interesting", stream.next()); assertEquals("stream", stream.next()); assertFalse(stream.hasNext()); stream = null; }