예제 #1
0
  /**
   * @deprecated (3.1) remove this test when lucene 3.0 "broken unicode 4" support is no longer
   *     needed.
   */
  @Deprecated
  public void testSingleHighSurrogateBWComapt() {
    String missing = "Term %s is missing in the set";
    String falsePos = "Term %s is in the set but shouldn't";
    String[] upperArr =
        new String[] {"ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB"};

    String[] lowerArr =
        new String[] {"abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b"};
    CharArraySet set = new CharArraySet(Version.LUCENE_3_0, Arrays.asList(TEST_STOP_WORDS), true);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
      if (i == lowerArr.length - 1)
        assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
      else assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
    }
    set = new CharArraySet(Version.LUCENE_3_0, Arrays.asList(TEST_STOP_WORDS), false);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
      assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
    }
  }
예제 #2
0
 /**
  * @deprecated (3.1) remove this test when lucene 3.0 "broken unicode 4" support is no longer
  *     needed.
  */
 @Deprecated
 public void testSupplementaryCharsBWCompat() {
   String missing = "Term %s is missing in the set";
   String falsePos = "Term %s is in the set but shouldn't";
   // for reference see
   // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
   String[] upperArr =
       new String[] {"Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
   String[] lowerArr =
       new String[] {"abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
   CharArraySet set = new CharArraySet(Version.LUCENE_3_0, Arrays.asList(TEST_STOP_WORDS), true);
   for (String upper : upperArr) {
     set.add(upper);
   }
   for (int i = 0; i < upperArr.length; i++) {
     assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
     assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
   }
   set = new CharArraySet(Version.LUCENE_3_0, Arrays.asList(TEST_STOP_WORDS), false);
   for (String upper : upperArr) {
     set.add(upper);
   }
   for (int i = 0; i < upperArr.length; i++) {
     assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
     assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
   }
 }
예제 #3
0
 public void testClear() {
   CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
   set.addAll(Arrays.asList(TEST_STOP_WORDS));
   assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
   set.clear();
   assertEquals("not empty", 0, set.size());
   for (int i = 0; i < TEST_STOP_WORDS.length; i++) assertFalse(set.contains(TEST_STOP_WORDS[i]));
   set.addAll(Arrays.asList(TEST_STOP_WORDS));
   assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
   for (int i = 0; i < TEST_STOP_WORDS.length; i++) assertTrue(set.contains(TEST_STOP_WORDS[i]));
 }
예제 #4
0
  public void testNonZeroOffset() {
    String[] words = {"Hello", "World", "this", "is", "a", "test"};
    char[] findme = "xthisy".toCharArray();
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    set.addAll(Arrays.asList(words));
    assertTrue(set.contains(findme, 1, 4));
    assertTrue(set.contains(new String(findme, 1, 4)));

    // test unmodifiable
    set = CharArraySet.unmodifiableSet(set);
    assertTrue(set.contains(findme, 1, 4));
    assertTrue(set.contains(new String(findme, 1, 4)));
  }
예제 #5
0
  /** Test the static #copy() function with a JDK {@link Set} as a source */
  public void testCopyJDKSet() {
    Set<String> set = new HashSet<>();

    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList<>();
    for (String string : stopwords) {
      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
    }
    set.addAll(Arrays.asList(TEST_STOP_WORDS));

    CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set);

    assertEquals(set.size(), copy.size());
    assertEquals(set.size(), copy.size());

    assertTrue(copy.containsAll(stopwords));
    for (String string : stopwordsUpper) {
      assertFalse(copy.contains(string));
    }

    List<String> newWords = new ArrayList<>();
    for (String string : stopwords) {
      newWords.add(string + "_1");
    }
    copy.addAll(newWords);

    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(newWords));
    // new added terms are not in the source set
    for (String string : newWords) {
      assertFalse(set.contains(string));
    }
  }
예제 #6
0
 /** Test for NPE */
 public void testContainsWithNull() {
   CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
   try {
     set.contains((char[]) null, 0, 10);
     fail("null value must raise NPE");
   } catch (NullPointerException e) {
   }
   try {
     set.contains((CharSequence) null);
     fail("null value must raise NPE");
   } catch (NullPointerException e) {
   }
   try {
     set.contains((Object) null);
     fail("null value must raise NPE");
   } catch (NullPointerException e) {
   }
 }
예제 #7
0
  @SuppressWarnings("deprecated")
  public void testCopyCharArraySetBWCompat() {
    CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList<>();
    for (String string : stopwords) {
      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
    }
    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
    setIngoreCase.add(Integer.valueOf(1));
    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
    setCaseSensitive.add(Integer.valueOf(1));

    CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase);
    CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive);

    assertEquals(setIngoreCase.size(), copy.size());
    assertEquals(setCaseSensitive.size(), copy.size());

    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(stopwordsUpper));
    assertTrue(copyCaseSens.containsAll(stopwords));
    for (String string : stopwordsUpper) {
      assertFalse(copyCaseSens.contains(string));
    }
    // test adding terms to the copy
    List<String> newWords = new ArrayList<>();
    for (String string : stopwords) {
      newWords.add(string + "_1");
    }
    copy.addAll(newWords);

    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(stopwordsUpper));
    assertTrue(copy.containsAll(newWords));
    // new added terms are not in the source set
    for (String string : newWords) {
      assertFalse(setIngoreCase.contains(string));
      assertFalse(setCaseSensitive.contains(string));
    }
  }
예제 #8
0
  public void testUnmodifiableSet() {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    set.add(Integer.valueOf(1));
    final int size = set.size();
    set = CharArraySet.unmodifiableSet(set);
    assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
    for (String stopword : TEST_STOP_WORDS) {
      assertTrue(set.contains(stopword));
    }
    assertTrue(set.contains(Integer.valueOf(1)));
    assertTrue(set.contains("1"));
    assertTrue(set.contains(new char[] {'1'}));

    try {
      CharArraySet.unmodifiableSet(null);
      fail("can not make null unmodifiable");
    } catch (NullPointerException e) {
      // expected
    }
  }
예제 #9
0
 public void testObjectContains() {
   CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
   Integer val = Integer.valueOf(1);
   set.add(val);
   assertTrue(set.contains(val));
   assertTrue(set.contains(new Integer(1))); // another integer
   assertTrue(set.contains("1"));
   assertTrue(set.contains(new char[] {'1'}));
   // test unmodifiable
   set = CharArraySet.unmodifiableSet(set);
   assertTrue(set.contains(val));
   assertTrue(set.contains(new Integer(1))); // another integer
   assertTrue(set.contains("1"));
   assertTrue(set.contains(new char[] {'1'}));
 }
  public void processWord(char[] buffer, int offset, int length, int wordCount) {
    if (length < 1) {
      return;
    }
    if (onlyFirstWord && wordCount > 0) {
      for (int i = 0; i < length; i++) {
        buffer[offset + i] = Character.toLowerCase(buffer[offset + i]);
      }
      return;
    }

    if (keep != null && keep.contains(buffer, offset, length)) {
      if (wordCount == 0 && forceFirstLetter) {
        buffer[offset] = Character.toUpperCase(buffer[offset]);
      }
      return;
    }
    if (length < minWordLength) {
      return;
    }
    for (char[] prefix : okPrefix) {
      if (length
          >= prefix.length) { // don't bother checking if the buffer length is less than the prefix
        boolean match = true;
        for (int i = 0; i < prefix.length; i++) {
          if (prefix[i] != buffer[offset + i]) {
            match = false;
            break;
          }
        }
        if (match == true) {
          return;
        }
      }
    }

    // We know it has at least one character
    /*char[] chars = w.toCharArray();
    StringBuilder word = new StringBuilder( w.length() );
    word.append( Character.toUpperCase( chars[0] ) );*/
    buffer[offset] = Character.toUpperCase(buffer[offset]);

    for (int i = 1; i < length; i++) {
      buffer[offset + i] = Character.toLowerCase(buffer[offset + i]);
    }
    // return word.toString();
  }
예제 #11
0
  public void testModifyOnUnmodifiable() {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    final int size = set.size();
    set = CharArraySet.unmodifiableSet(set);
    assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
    String NOT_IN_SET = "SirGallahad";
    assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));

    try {
      set.add(NOT_IN_SET.toCharArray());
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }

    try {
      set.add(NOT_IN_SET);
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }

    try {
      set.add(new StringBuilder(NOT_IN_SET));
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }

    try {
      set.clear();
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    try {
      set.add((Object) NOT_IN_SET);
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }

    // This test was changed in 3.1, as a contains() call on the given Collection using the
    // "correct" iterator's
    // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor
    // never call
    // remove() on the iterator
    try {
      set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true));
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }

    try {
      set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(NOT_IN_SET), true));
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }

    try {
      set.addAll(Arrays.asList(NOT_IN_SET));
      fail("Modified unmodifiable set");
    } catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
    }

    for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
      assertTrue(set.contains(TEST_STOP_WORDS[i]));
    }
  }
예제 #12
0
 public void testRehash() throws Exception {
   CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true);
   for (int i = 0; i < TEST_STOP_WORDS.length; i++) cas.add(TEST_STOP_WORDS[i]);
   assertEquals(TEST_STOP_WORDS.length, cas.size());
   for (int i = 0; i < TEST_STOP_WORDS.length; i++) assertTrue(cas.contains(TEST_STOP_WORDS[i]));
 }