public void testClear() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(Arrays.asList(TEST_STOP_WORDS)); assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size()); set.clear(); assertEquals("not empty", 0, set.size()); for (int i = 0; i < TEST_STOP_WORDS.length; i++) assertFalse(set.contains(TEST_STOP_WORDS[i])); set.addAll(Arrays.asList(TEST_STOP_WORDS)); assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size()); for (int i = 0; i < TEST_STOP_WORDS.length; i++) assertTrue(set.contains(TEST_STOP_WORDS[i])); }
/** Test the static #copy() function with a JDK {@link Set} as a source */ public void testCopyJDKSet() { Set<String> set = new HashSet<>(); List<String> stopwords = Arrays.asList(TEST_STOP_WORDS); List<String> stopwordsUpper = new ArrayList<>(); for (String string : stopwords) { stopwordsUpper.add(string.toUpperCase(Locale.ROOT)); } set.addAll(Arrays.asList(TEST_STOP_WORDS)); CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set); assertEquals(set.size(), copy.size()); assertEquals(set.size(), copy.size()); assertTrue(copy.containsAll(stopwords)); for (String string : stopwordsUpper) { assertFalse(copy.contains(string)); } List<String> newWords = new ArrayList<>(); for (String string : stopwords) { newWords.add(string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set for (String string : newWords) { assertFalse(set.contains(string)); } }
@SuppressWarnings("deprecated") public void testCopyCharArraySetBWCompat() { CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); List<String> stopwords = Arrays.asList(TEST_STOP_WORDS); List<String> stopwordsUpper = new ArrayList<>(); for (String string : stopwords) { stopwordsUpper.add(string.toUpperCase(Locale.ROOT)); } setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS)); setIngoreCase.add(Integer.valueOf(1)); setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS)); setCaseSensitive.add(Integer.valueOf(1)); CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase); CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive); assertEquals(setIngoreCase.size(), copy.size()); assertEquals(setCaseSensitive.size(), copy.size()); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copyCaseSens.containsAll(stopwords)); for (String string : stopwordsUpper) { assertFalse(copyCaseSens.contains(string)); } // test adding terms to the copy List<String> newWords = new ArrayList<>(); for (String string : stopwords) { newWords.add(string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set for (String string : newWords) { assertFalse(setIngoreCase.contains(string)); assertFalse(setCaseSensitive.contains(string)); } }
static { final List<String> stopWords = Arrays.asList( "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"); final CharArraySet stopSet = new CharArraySet(Version.LUCENE_CURRENT, stopWords.size(), false); stopSet.addAll(stopWords); ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet); }
public void testNonZeroOffset() { String[] words = {"Hello", "World", "this", "is", "a", "test"}; char[] findme = "xthisy".toCharArray(); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(Arrays.asList(words)); assertTrue(set.contains(findme, 1, 4)); assertTrue(set.contains(new String(findme, 1, 4))); // test unmodifiable set = CharArraySet.unmodifiableSet(set); assertTrue(set.contains(findme, 1, 4)); assertTrue(set.contains(new String(findme, 1, 4))); }
/** * Returns as {@link CharArraySet} from wordFiles, which can be a comma-separated list of * filenames */ protected final CharArraySet getWordSet( ResourceLoader loader, String wordFiles, boolean ignoreCase) throws IOException { List<String> files = splitFileNames(wordFiles); CharArraySet words = null; if (files.size() > 0) { // default stopwords list has 35 or so words, but maybe don't make it that // big to start words = new CharArraySet(files.size() * 10, ignoreCase); for (String file : files) { List<String> wlist = getLines(loader, file.trim()); words.addAll(StopFilter.makeStopSet(wlist, ignoreCase)); } } return words; }
public void testUnmodifiableSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(Arrays.asList(TEST_STOP_WORDS)); set.add(Integer.valueOf(1)); final int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); for (String stopword : TEST_STOP_WORDS) { assertTrue(set.contains(stopword)); } assertTrue(set.contains(Integer.valueOf(1))); assertTrue(set.contains("1")); assertTrue(set.contains(new char[] {'1'})); try { CharArraySet.unmodifiableSet(null); fail("can not make null unmodifiable"); } catch (NullPointerException e) { // expected } }
public void testModifyOnUnmodifiable() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(Arrays.asList(TEST_STOP_WORDS)); final int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); String NOT_IN_SET = "SirGallahad"; assertFalse("Test String already exists in set", set.contains(NOT_IN_SET)); try { set.add(NOT_IN_SET.toCharArray()); fail("Modified unmodifiable set"); } catch (UnsupportedOperationException e) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (UnsupportedOperationException e) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(new StringBuilder(NOT_IN_SET)); fail("Modified unmodifiable set"); } catch (UnsupportedOperationException e) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.clear(); fail("Modified unmodifiable set"); } catch (UnsupportedOperationException e) { // expected assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add((Object) NOT_IN_SET); fail("Modified unmodifiable set"); } catch (UnsupportedOperationException e) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } // This test was changed in 3.1, as a contains() call on the given Collection using the // "correct" iterator's // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor // never call // remove() on the iterator try { set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true)); fail("Modified unmodifiable set"); } catch (UnsupportedOperationException e) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(NOT_IN_SET), true)); fail("Modified unmodifiable set"); } catch (UnsupportedOperationException e) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.addAll(Arrays.asList(NOT_IN_SET)); fail("Modified unmodifiable set"); } catch (UnsupportedOperationException e) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } for (int i = 0; i < TEST_STOP_WORDS.length; i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } }