Exemplo n.º 1
0
  /** Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#remove()}. */
  @Test
  public void testRemove() {
    // remove on null
    TokenStream stream = new TokenStream((String) null);
    stream.remove();
    assertNull(stream.getAllTokens());
    stream = null;

    // remove on empty
    stream = new TokenStream("");
    stream.remove();
    assertNull(stream.getAllTokens());
    stream = null;

    // remove till empty
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");

    int currcnt = 5;
    while (stream.hasNext()) {
      assertEquals(currcnt--, stream.getAllTokens().size());
      stream.remove();
    }
    stream = null;

    // remove from invalid position
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.seekEnd();
    stream.remove();
    assertEquals(5, stream.getAllTokens().size());
    stream = null;
  }
Exemplo n.º 2
0
  @Override
  public void apply(TokenStream stream) throws TokenizerException {
    // TODO Auto-generated method stub
    if (stream != null) {
      String token;
      Stemmer s;
      while (stream.hasNext()) {
        token = stream.next();
        if (token != null) {
          token = token.toLowerCase();
          if (isLettersOnly(token)) {
            s = new Stemmer();
            for (char c : token.toCharArray()) {
              s.add(c);
            }

            s.stem();
            stream.set(s.toString());
          }
        }
      }

      stream.reset();
    }
  }
Exemplo n.º 3
0
 @Override
 public void apply(TokenStream stream) throws TokenizerException {
   if (stream != null) {
     String token;
     while (stream.hasNext()) {
       token = stream.next();
       stream.previous();
       if (token != null) {
         token = replaceDate(token);
         if (token.isEmpty()) stream.remove();
         else {
           stream.set(token);
           stream.next();
         }
       }
     }
   }
   stream.reset();
 }
Exemplo n.º 4
0
  /**
   * Test method for {@link
   * edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#set(java.lang.String[])}.
   */
  @Test
  public void testSet() {
    // set on null and empty streams
    TokenStream stream = new TokenStream((String) null);
    stream.set("invalid");
    assertNull(stream.getAllTokens());
    stream = null;

    stream = new TokenStream("");
    stream.set("invalid");
    assertNull(stream.getAllTokens());
    stream = null;

    // valid posiiton, null or empty tokens
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.set((String) null);
    assertEquals("this", stream.next());
    stream.previous();
    stream.set("");
    assertEquals("this", stream.next());
    stream = null;

    // valid new token, invalid position
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.seekEnd();
    stream.set("valid");
    assertEquals("stream", stream.previous());
    stream = null;

    // correct set, single token
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.set("that");
    assertEquals(5, stream.getAllTokens().size());
    assertEquals("that", stream.next());
    stream = null;

    // correct set, multiple tokens at the end
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.seekEnd();
    stream.previous();
    stream.set("of", "the", "set", "method");
    assertEquals(8, stream.getAllTokens().size());
    assertEquals("method", stream.next());
    stream = null;

    // correct set, multiple tokens at the start
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.set("you", "think", "styx");
    assertEquals(7, stream.getAllTokens().size());
    assertEquals("styx", stream.next());
    stream = null;

    // correct set, multiple tokens in the middle
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.seekEnd();
    stream.previous();
    stream.previous();
    stream.set("really", "interesting");
    assertEquals(6, stream.getAllTokens().size());
    assertEquals("interesting", stream.next());
    assertEquals("stream", stream.next());
    assertFalse(stream.hasNext());
    stream = null;
  }
Exemplo n.º 5
0
  /** Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#hasNext()}. */
  @Test
  public void testHasNext() {
    // null
    TokenStream stream = new TokenStream((String) null);
    assertEquals(false, stream.hasNext());
    stream = null;

    // empty
    stream = new TokenStream("");
    assertEquals(false, stream.hasNext());
    stream = null;

    // some text and iteration
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    assertTrue(stream.hasNext());
    stream.next(); // after this
    assertTrue(stream.hasNext());
    stream.next(); // after is
    assertTrue(stream.hasNext());
    stream.next(); // after a
    assertTrue(stream.hasNext());
    stream.next(); // after test
    assertTrue(stream.hasNext());
    stream.next(); // after stream
    assertFalse(stream.hasNext());
    stream = null;

    // with seek
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.seekEnd();
    assertFalse(stream.hasNext());
    stream = null;

    // forward and reverse
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    while (stream.hasNext()) {
      stream.next();
    }

    stream.previous();
    assertTrue(stream.hasNext());
    stream.next();
    assertFalse(stream.hasNext());
    stream = null;

    // with remove
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.seekEnd();
    stream.previous();
    stream.remove();
    assertFalse(stream.hasNext());
    stream = null;

    // with merge with previous
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.next();
    stream.mergeWithPrevious();
    assertTrue(stream.hasNext());
    stream.seekEnd();
    stream.previous();
    stream.mergeWithPrevious();
    assertTrue(stream.hasNext());
    stream = null;

    // with merge with next
    stream = new TokenStream("this");
    stream.append("is", "a", "test", "stream");
    stream.mergeWithNext();
    assertTrue(stream.hasNext());
    stream.seekEnd();
    stream.previous();
    stream.previous();
    stream.mergeWithNext();
    stream.next();
    assertFalse(stream.hasNext());
    stream = null;
  }