@Override public void apply(TokenStream stream) throws TokenizerException { // TODO Auto-generated method stub if (stream != null) { String token; Stemmer s; while (stream.hasNext()) { token = stream.next(); if (token != null) { token = token.toLowerCase(); if (isLettersOnly(token)) { s = new Stemmer(); for (char c : token.toCharArray()) { s.add(c); } s.stem(); stream.set(s.toString()); } } } stream.reset(); } }
/** Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#next()}. */ @Test public void testNext() { // null TokenStream stream = new TokenStream((String) null); assertNull(stream.next()); stream = null; // empty str stream = new TokenStream(""); assertNull(stream.next()); stream = null; // fwd iteration stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); assertEquals("this", stream.next()); assertEquals("is", stream.next()); assertEquals("a", stream.next()); assertEquals("test", stream.next()); assertEquals("stream", stream.next()); assertNull(stream.next()); stream = null; // fwd and reverse stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); assertEquals("this", stream.next()); stream.previous(); assertEquals("this", stream.next()); assertEquals("is", stream.next()); assertEquals("a", stream.next()); stream.reset(); assertEquals("this", stream.next()); stream = null; // with remove stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.remove(); assertEquals("is", stream.next()); stream.remove(); assertEquals("test", stream.next()); stream = null; // with merge with previous stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.next(); stream.mergeWithPrevious(); assertEquals("this is", stream.next()); stream = null; // with merge with next stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.mergeWithNext(); assertEquals("this is", stream.next()); stream = null; }
/** Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#reset()}. */ @Test public void testReset() { // empty / null TokenStream stream = new TokenStream((String) null); stream.reset(); assertNull(stream.next()); stream = null; stream = new TokenStream(""); stream.reset(); assertNull(stream.next()); stream = null; // positive run stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.next(); stream.reset(); assertEquals("this", stream.next()); stream = null; }
/** * Test method for {@link * edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#merge(edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream)}. */ @Test public void testMerge() { // merge with null TokenStream stream1 = new TokenStream("this"); stream1.append("is", "a", "test", "stream"); stream1.merge(null); assertEquals(5, stream1.getAllTokens().size()); TokenStream stream2 = new TokenStream((String) null); stream1.merge(stream2); assertEquals(5, stream1.getAllTokens().size()); stream2.merge(stream1); assertEquals(5, stream2.getAllTokens().size()); stream1 = null; stream2 = null; // proper merge stream1 = new TokenStream("this"); stream1.append("is", "a"); stream2 = new TokenStream("test"); stream2.append("stream"); stream1.merge(stream2); assertEquals(5, stream1.getAllTokens().size()); assertEquals(5, stream1.getTokenMap().size()); assertEquals(2, stream2.getAllTokens().size()); assertEquals(2, stream2.getTokenMap().size()); assertFalse(stream1.hasPrevious()); for (int i = 0; i < 4; i++) stream1.mergeWithNext(); stream1.reset(); assertEquals("this is a test stream", stream1.next()); stream1 = null; stream2 = null; // self merge stream1 = new TokenStream("this"); stream1.append("is", "a", "test", "stream"); stream2 = new TokenStream("this"); stream2.append("is", "a", "test", "stream"); stream1.merge(stream2); assertEquals(10, stream1.getAllTokens().size()); assertEquals(5, stream1.getTokenMap().size()); assertEquals(5, stream2.getAllTokens().size()); assertEquals(5, stream2.getTokenMap().size()); stream1 = null; stream2 = null; }
@Override public void apply(TokenStream stream) throws TokenizerException { if (stream != null) { String token; while (stream.hasNext()) { token = stream.next(); stream.previous(); if (token != null) { token = replaceDate(token); if (token.isEmpty()) stream.remove(); else { stream.set(token); stream.next(); } } } } stream.reset(); }
/** * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#mergeWithNext()}. */ @Test public void testMergeWithNext() { // everything is null, empty TokenStream stream = new TokenStream((String) null); assertFalse(stream.mergeWithNext()); stream = null; stream = new TokenStream(""); assertFalse(stream.mergeWithNext()); stream = null; // next is null stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.seekEnd(); assertFalse(stream.mergeWithNext()); // proper merge stream.reset(); assertTrue(stream.mergeWithNext()); assertEquals("this is", stream.next()); assertEquals(4, stream.getAllTokens().size()); stream = null; // full merge - reverse stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.seekEnd(); stream.previous(); stream.previous(); assertTrue(stream.mergeWithNext()); assertEquals("test stream", stream.next()); stream.previous(); stream.previous(); assertTrue(stream.mergeWithNext()); assertEquals("a test stream", stream.next()); stream.previous(); stream.previous(); assertTrue(stream.mergeWithNext()); assertEquals("is a test stream", stream.next()); stream.previous(); stream.previous(); assertTrue(stream.mergeWithNext()); assertEquals("this is a test stream", stream.next()); stream.previous(); assertFalse(stream.mergeWithNext()); stream = null; // full merge - forward stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); assertTrue(stream.mergeWithNext()); assertEquals("this is", stream.next()); stream.previous(); assertTrue(stream.mergeWithNext()); assertEquals("this is a", stream.next()); stream.previous(); assertTrue(stream.mergeWithNext()); assertEquals("this is a test", stream.next()); stream.previous(); assertTrue(stream.mergeWithNext()); assertEquals("this is a test stream", stream.next()); assertFalse(stream.mergeWithNext()); stream = null; }
/** Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#hasPrevious()}. */ @Test public void testHasPrevious() { // null TokenStream stream = new TokenStream((String) null); assertEquals(false, stream.hasPrevious()); stream = null; // empty stream = new TokenStream(""); assertEquals(false, stream.hasPrevious()); stream = null; // some text and iteration stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); assertFalse(stream.hasPrevious()); // start of stream stream.seekEnd(); assertTrue(stream.hasPrevious()); stream.previous(); // after this assertTrue(stream.hasPrevious()); stream.previous(); // after is assertTrue(stream.hasPrevious()); stream.previous(); // after a assertTrue(stream.hasPrevious()); stream.previous(); // after test assertTrue(stream.hasPrevious()); stream.previous(); // after stream assertFalse(stream.hasPrevious()); stream = null; // with seek stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.reset(); assertFalse(stream.hasPrevious()); stream = null; // forward and reverse stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.next(); assertTrue(stream.hasPrevious()); stream.previous(); assertFalse(stream.hasPrevious()); stream = null; // with remove stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.remove(); assertFalse(stream.hasPrevious()); stream = null; // with merge with previous stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.next(); stream.mergeWithPrevious(); assertFalse(stream.hasPrevious()); stream = null; // with merge with next stream = new TokenStream("this"); stream.append("is", "a", "test", "stream"); stream.mergeWithNext(); assertFalse(stream.hasPrevious()); stream = null; }