예제 #1
0
  @Override
  public void apply(TokenStream stream) throws TokenizerException {
    // TODO Auto-generated method stub
    if (stream != null) {
      String token;
      Stemmer s;
      while (stream.hasNext()) {
        token = stream.next();
        if (token != null) {
          token = token.toLowerCase();
          if (isLettersOnly(token)) {
            s = new Stemmer();
            for (char c : token.toCharArray()) {
              s.add(c);
            }

            s.stem();
            stream.set(s.toString());
          }
        }
      }

      stream.reset();
    }
  }
예제 #2
0
 @Override
 public void apply(TokenStream stream) throws TokenizerException {
   if (stream != null) {
     String token;
     while (stream.hasNext()) {
       token = stream.next();
       stream.previous();
       if (token != null) {
         token = replaceDate(token);
         if (token.isEmpty()) stream.remove();
         else {
           stream.set(token);
           stream.next();
         }
       }
     }
   }
   stream.reset();
 }
예제 #3
0
  /**
   * Test method for {@link
   * edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#set(java.lang.String[])}.
   */
  @Test
  public void testSet() {
    // set on null and empty streams
    TokenStream stream = new TokenStream((String) null);
    stream.set("invalid");
    assertNull(stream.getAllTokens());
    stream = null;

    stream = new TokenStream("");
    stream.set("invalid");
    assertNull(stream.getAllTokens());
    stream = null;

    // valid posiiton, null or empty tokens
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.set((String) null);
    assertEquals("this", stream.next());
    stream.previous();
    stream.set("");
    assertEquals("this", stream.next());
    stream = null;

    // valid new token, invalid position
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.seekEnd();
    stream.set("valid");
    assertEquals("stream", stream.previous());
    stream = null;

    // correct set, single token
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.set("that");
    assertEquals(5, stream.getAllTokens().size());
    assertEquals("that", stream.next());
    stream = null;

    // correct set, multiple tokens at the end
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.seekEnd();
    stream.previous();
    stream.set("of", "the", "set", "method");
    assertEquals(8, stream.getAllTokens().size());
    assertEquals("method", stream.next());
    stream = null;

    // correct set, multiple tokens at the start
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.set("you", "think", "styx");
    assertEquals(7, stream.getAllTokens().size());
    assertEquals("styx", stream.next());
    stream = null;

    // correct set, multiple tokens in the middle
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.seekEnd();
    stream.previous();
    stream.previous();
    stream.set("really", "interesting");
    assertEquals(6, stream.getAllTokens().size());
    assertEquals("interesting", stream.next());
    assertEquals("stream", stream.next());
    assertFalse(stream.hasNext());
    stream = null;
  }