Пример #1
0
 public void testClear() {
   CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
   set.addAll(Arrays.asList(TEST_STOP_WORDS));
   assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
   set.clear();
   assertEquals("not empty", 0, set.size());
   for (int i = 0; i < TEST_STOP_WORDS.length; i++) assertFalse(set.contains(TEST_STOP_WORDS[i]));
   set.addAll(Arrays.asList(TEST_STOP_WORDS));
   assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
   for (int i = 0; i < TEST_STOP_WORDS.length; i++) assertTrue(set.contains(TEST_STOP_WORDS[i]));
 }
Пример #2
0
  /** Test the static #copy() function with a JDK {@link Set} as a source */
  public void testCopyJDKSet() {
    Set<String> set = new HashSet<>();

    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList<>();
    for (String string : stopwords) {
      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
    }
    set.addAll(Arrays.asList(TEST_STOP_WORDS));

    CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set);

    assertEquals(set.size(), copy.size());
    assertEquals(set.size(), copy.size());

    assertTrue(copy.containsAll(stopwords));
    for (String string : stopwordsUpper) {
      assertFalse(copy.contains(string));
    }

    List<String> newWords = new ArrayList<>();
    for (String string : stopwords) {
      newWords.add(string + "_1");
    }
    copy.addAll(newWords);

    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(newWords));
    // new added terms are not in the source set
    for (String string : newWords) {
      assertFalse(set.contains(string));
    }
  }
Пример #3
0
  @SuppressWarnings("deprecated")
  public void testCopyCharArraySetBWCompat() {
    CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList<>();
    for (String string : stopwords) {
      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
    }
    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
    setIngoreCase.add(Integer.valueOf(1));
    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
    setCaseSensitive.add(Integer.valueOf(1));

    CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase);
    CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive);

    assertEquals(setIngoreCase.size(), copy.size());
    assertEquals(setCaseSensitive.size(), copy.size());

    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(stopwordsUpper));
    assertTrue(copyCaseSens.containsAll(stopwords));
    for (String string : stopwordsUpper) {
      assertFalse(copyCaseSens.contains(string));
    }
    // test adding terms to the copy
    List<String> newWords = new ArrayList<>();
    for (String string : stopwords) {
      newWords.add(string + "_1");
    }
    copy.addAll(newWords);

    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(stopwordsUpper));
    assertTrue(copy.containsAll(newWords));
    // new added terms are not in the source set
    for (String string : newWords) {
      assertFalse(setIngoreCase.contains(string));
      assertFalse(setCaseSensitive.contains(string));
    }
  }
Пример #4
0
 static {
   final List<String> stopWords =
       Arrays.asList(
           "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is",
           "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there",
           "these", "they", "this", "to", "was", "will", "with");
   final CharArraySet stopSet = new CharArraySet(Version.LUCENE_CURRENT, stopWords.size(), false);
   stopSet.addAll(stopWords);
   ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
 }
Пример #5
0
  public void testNonZeroOffset() {
    String[] words = {"Hello", "World", "this", "is", "a", "test"};
    char[] findme = "xthisy".toCharArray();
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    set.addAll(Arrays.asList(words));
    assertTrue(set.contains(findme, 1, 4));
    assertTrue(set.contains(new String(findme, 1, 4)));

    // test unmodifiable
    set = CharArraySet.unmodifiableSet(set);
    assertTrue(set.contains(findme, 1, 4));
    assertTrue(set.contains(new String(findme, 1, 4)));
  }
Пример #6
0
 /**
  * Returns as {@link CharArraySet} from wordFiles, which can be a comma-separated list of
  * filenames
  */
 protected final CharArraySet getWordSet(
     ResourceLoader loader, String wordFiles, boolean ignoreCase) throws IOException {
   List<String> files = splitFileNames(wordFiles);
   CharArraySet words = null;
   if (files.size() > 0) {
     // default stopwords list has 35 or so words, but maybe don't make it that
     // big to start
     words = new CharArraySet(files.size() * 10, ignoreCase);
     for (String file : files) {
       List<String> wlist = getLines(loader, file.trim());
       words.addAll(StopFilter.makeStopSet(wlist, ignoreCase));
     }
   }
   return words;
 }
Пример #7
0
  public void testUnmodifiableSet() {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    set.add(Integer.valueOf(1));
    final int size = set.size();
    set = CharArraySet.unmodifiableSet(set);
    assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
    for (String stopword : TEST_STOP_WORDS) {
      assertTrue(set.contains(stopword));
    }
    assertTrue(set.contains(Integer.valueOf(1)));
    assertTrue(set.contains("1"));
    assertTrue(set.contains(new char[] {'1'}));

    try {
      CharArraySet.unmodifiableSet(null);
      fail("can not make null unmodifiable");
    } catch (NullPointerException e) {
      // expected
    }
  }
Пример #8
0
  public void testModifyOnUnmodifiable() {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    final int size = set.size();
    set = CharArraySet.unmodifiableSet(set);
    assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
    String NOT_IN_SET = "SirGallahad";
    assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));

    try {
      set.add(NOT_IN_SET.toCharArray());
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }

    try {
      set.add(NOT_IN_SET);
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }

    try {
      set.add(new StringBuilder(NOT_IN_SET));
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }

    try {
      set.clear();
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    try {
      set.add((Object) NOT_IN_SET);
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }

    // This test was changed in 3.1, as a contains() call on the given Collection using the
    // "correct" iterator's
    // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor
    // never call
    // remove() on the iterator
    try {
      set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true));
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }

    try {
      set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(NOT_IN_SET), true));
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }

    try {
      set.addAll(Arrays.asList(NOT_IN_SET));
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
    }

    for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
      assertTrue(set.contains(TEST_STOP_WORDS[i]));
    }
  }