public BloomFilteredFieldsProducer(SegmentReadState state) throws IOException { String bloomFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); IndexInput bloomIn = null; boolean success = false; try { bloomIn = state.directory.openInput(bloomFileName, state.context); CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION, BLOOM_CODEC_VERSION); // // Load the hash function used in the BloomFilter // hashFunction = HashFunction.forName(bloomIn.readString()); // Load the delegate postings format PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn.readString()); this.delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state); int numBlooms = bloomIn.readInt(); for (int i = 0; i < numBlooms; i++) { int fieldNum = bloomIn.readInt(); FuzzySet bloom = FuzzySet.deserialize(bloomIn); FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum); bloomsByFieldName.put(fieldInfo.name, bloom); } IOUtils.close(bloomIn); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer); } } }
public CompletionFieldsProducer(SegmentReadState state) throws IOException { String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); IndexInput input = state.directory.openInput(suggestFSTFile, state.context); if (state.segmentInfo.getVersion().onOrAfter(Version.LUCENE_6_2_0)) { // Lucene 6.2.0+ requires all index files to use index header, but prior to that we used an // ordinary codec header: version = CodecUtil.checkIndexHeader( input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); } else { version = CodecUtil.checkHeader( input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT); } FieldsProducer delegateProducer = null; boolean success = false; try { PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString()); String providerName = input.readString(); CompletionLookupProvider completionLookupProvider = providers.get(providerName); if (completionLookupProvider == null) { throw new IllegalStateException( "no provider with name [" + providerName + "] registered"); } // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent // unecessary heap usage? delegateProducer = delegatePostingsFormat.fieldsProducer(state); /* * If we are merging we don't load the FSTs at all such that we * don't consume so much memory during merge */ if (state.context.context != Context.MERGE) { // TODO: maybe we can do this in a fully lazy fashion based on some configuration // eventually we should have some kind of curciut breaker that prevents us from going OOM // here // with some configuration this.lookupFactory = completionLookupProvider.load(input); } else { this.lookupFactory = null; } this.delegateProducer = delegateProducer; success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(delegateProducer, input); } else { IOUtils.close(input); } } }
private static final class CustomPerFieldCodec extends Lucene46Codec { private final PostingsFormat simpleTextFormat = PostingsFormat.forName("SimpleText"); private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41"); private final PostingsFormat mockSepFormat = PostingsFormat.forName("MockSep"); @Override public PostingsFormat getPostingsFormatForField(String field) { if (field.equals("id")) { return simpleTextFormat; } else if (field.equals("content")) { return mockSepFormat; } else { return defaultFormat; } } }
@Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { if (delegatePostingsFormat == null) { throw new UnsupportedOperationException( "Error - " + getClass().getName() + " has been constructed without a choice of PostingsFormat"); } return new BloomFilteredFieldsConsumer( delegatePostingsFormat.fieldsConsumer(state), state, delegatePostingsFormat); }
/** * Check codec restrictions. * * @throws AssumptionViolatedException if the class does not work with a given codec. */ private void checkCodecRestrictions(Codec codec) { assumeFalse( "Class not allowed to use codec: " + codec.getName() + ".", shouldAvoidCodec(codec.getName())); if (codec instanceof RandomCodec && !avoidCodecs.isEmpty()) { for (String name : ((RandomCodec) codec).formatNames) { assumeFalse( "Class not allowed to use postings format: " + name + ".", shouldAvoidCodec(name)); } } PostingsFormat pf = codec.postingsFormat(); assumeFalse( "Class not allowed to use postings format: " + pf.getName() + ".", shouldAvoidCodec(pf.getName())); assumeFalse( "Class not allowed to use postings format: " + LuceneTestCase.TEST_POSTINGSFORMAT + ".", shouldAvoidCodec(LuceneTestCase.TEST_POSTINGSFORMAT)); }
@Test public void testCompletionPostingsFormat() throws IOException { AnalyzingCompletionLookupProviderV1 providerV1 = new AnalyzingCompletionLookupProviderV1(true, false, true, true); AnalyzingCompletionLookupProvider currentProvider = new AnalyzingCompletionLookupProvider(true, false, true, true); List<Completion090PostingsFormat.CompletionLookupProvider> providers = Lists.newArrayList(providerV1, currentProvider); Completion090PostingsFormat.CompletionLookupProvider randomProvider = providers.get(getRandom().nextInt(providers.size())); RAMDirectory dir = new RAMDirectory(); writeData(dir, randomProvider); IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT); LookupFactory load = currentProvider.load(input); PostingsFormat format = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT); NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer()); Lookup lookup = load.getLookup( new CompletionFieldMapper( new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, indexSettings, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING), new CompletionSuggestionContext(null)); List<LookupResult> result = lookup.lookup("ge", false, 10); assertThat(result.get(0).key.toString(), equalTo("Generator - Foo Fighters")); assertThat(result.get(0).payload.utf8ToString(), equalTo("id:10")); dir.close(); }
@Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { return new AssertingFieldsProducer(in.fieldsProducer(state)); }
@Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { return new AssertingFieldsConsumer(in.fieldsConsumer(state)); }
@Override protected void before() throws Exception { // enable this by default, for IDE consistency with ant tests (as its the default from ant) // TODO: really should be in solr base classes, but some extend LTC directly. // we do this in beforeClass, because some tests currently disable it restoreProperties.put("solr.directoryFactory", System.getProperty("solr.directoryFactory")); if (System.getProperty("solr.directoryFactory") == null) { System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockDirectoryFactory"); } // Restore more Solr properties. restoreProperties.put("solr.solr.home", System.getProperty("solr.solr.home")); restoreProperties.put("solr.data.dir", System.getProperty("solr.data.dir")); // if verbose: print some debugging stuff about which codecs are loaded. if (VERBOSE) { Set<String> codecs = Codec.availableCodecs(); for (String codec : codecs) { System.out.println( "Loaded codec: '" + codec + "': " + Codec.forName(codec).getClass().getName()); } Set<String> postingsFormats = PostingsFormat.availablePostingsFormats(); for (String postingsFormat : postingsFormats) { System.out.println( "Loaded postingsFormat: '" + postingsFormat + "': " + PostingsFormat.forName(postingsFormat).getClass().getName()); } } savedInfoStream = InfoStream.getDefault(); final Random random = RandomizedContext.current().getRandom(); final boolean v = random.nextBoolean(); if (INFOSTREAM) { InfoStream.setDefault(new ThreadNameFixingPrintStreamInfoStream(System.out)); } else if (v) { InfoStream.setDefault(new NullInfoStream()); } Class<?> targetClass = RandomizedContext.current().getTargetClass(); avoidCodecs = new HashSet<String>(); if (targetClass.isAnnotationPresent(SuppressCodecs.class)) { SuppressCodecs a = targetClass.getAnnotation(SuppressCodecs.class); avoidCodecs.addAll(Arrays.asList(a.value())); } // set back to default LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = false; LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; savedCodec = Codec.getDefault(); int randomVal = random.nextInt(10); if ("Lucene3x".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 3 && !shouldAvoidCodec("Lucene3x"))) { // preflex-only setup codec = Codec.forName("Lucene3x"); assert (codec instanceof PreFlexRWCodec) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = true; } else if ("Lucene40".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && randomVal == 0 && !shouldAvoidCodec("Lucene40"))) { // 4.0 setup codec = Codec.forName("Lucene40"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene40RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; assert (PostingsFormat.forName("Lucene40") instanceof Lucene40RWPostingsFormat) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if ("Lucene41".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 1 && !shouldAvoidCodec("Lucene41"))) { codec = Codec.forName("Lucene41"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene41RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if ("Lucene42".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 2 && !shouldAvoidCodec("Lucene42"))) { codec = Codec.forName("Lucene42"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene42RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if ("Lucene45".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 5 && !shouldAvoidCodec("Lucene45"))) { codec = Codec.forName("Lucene45"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene45RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if (("random".equals(TEST_POSTINGSFORMAT) == false) || ("random".equals(TEST_DOCVALUESFORMAT) == false)) { // the user wired postings or DV: this is messy // refactor into RandomCodec.... final PostingsFormat format; if ("random".equals(TEST_POSTINGSFORMAT)) { format = PostingsFormat.forName("Lucene41"); } else { format = PostingsFormat.forName(TEST_POSTINGSFORMAT); } final DocValuesFormat dvFormat; if ("random".equals(TEST_DOCVALUESFORMAT)) { dvFormat = DocValuesFormat.forName("Lucene45"); } else { dvFormat = DocValuesFormat.forName(TEST_DOCVALUESFORMAT); } codec = new Lucene46Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return format; } @Override public DocValuesFormat getDocValuesFormatForField(String field) { return dvFormat; } @Override public String toString() { return super.toString() + ": " + format.toString() + ", " + dvFormat.toString(); } }; } else if ("SimpleText".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 9 && LuceneTestCase.rarely(random) && !shouldAvoidCodec("SimpleText"))) { codec = new SimpleTextCodec(); } else if ("Appending".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 8 && !shouldAvoidCodec("Appending"))) { codec = new AppendingRWCodec(); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // this is really just Lucene40 with some minor changes } else if ("CheapBastard".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 8 && !shouldAvoidCodec("CheapBastard") && !shouldAvoidCodec("Lucene41"))) { // we also avoid this codec if Lucene41 is avoided, since thats the postings format it uses. codec = new CheapBastardCodec(); } else if ("Asserting".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Asserting"))) { codec = new AssertingCodec(); } else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Compressing"))) { codec = CompressingCodec.randomInstance(random); } else if (!"random".equals(TEST_CODEC)) { codec = Codec.forName(TEST_CODEC); } else if ("random".equals(TEST_POSTINGSFORMAT)) { codec = new RandomCodec(random, avoidCodecs); } else { assert false; } Codec.setDefault(codec); // Initialize locale/ timezone. String testLocale = System.getProperty("tests.locale", "random"); String testTimeZone = System.getProperty("tests.timezone", "random"); // Always pick a random one for consistency (whether tests.locale was specified or not). savedLocale = Locale.getDefault(); Locale randomLocale = randomLocale(random); locale = testLocale.equals("random") ? randomLocale : localeForName(testLocale); Locale.setDefault(locale); // TimeZone.getDefault will set user.timezone to the default timezone of the user's locale. // So store the original property value and restore it at end. restoreProperties.put("user.timezone", System.getProperty("user.timezone")); savedTimeZone = TimeZone.getDefault(); TimeZone randomTimeZone = randomTimeZone(random()); timeZone = testTimeZone.equals("random") ? randomTimeZone : TimeZone.getTimeZone(testTimeZone); TimeZone.setDefault(timeZone); similarity = random().nextBoolean() ? new DefaultSimilarity() : new RandomSimilarityProvider(random()); // Check codec restrictions once at class level. try { checkCodecRestrictions(codec); } catch (AssumptionViolatedException e) { System.err.println( "NOTE: " + e.getMessage() + " Suppressed codecs: " + Arrays.toString(avoidCodecs.toArray())); throw e; } }
public void testRandom() throws Exception { Directory dir = newDirectory(); final int NUM_TERMS = atLeast(20); final Set<BytesRef> terms = new HashSet<BytesRef>(); while (terms.size() < NUM_TERMS) { final String s = _TestUtil.randomRealisticUnicodeString(random()); // final String s = _TestUtil.randomSimpleString(random); if (s.length() > 0) { terms.add(new BytesRef(s)); } } final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]); Arrays.sort(termsArray); final int NUM_DOCS = atLeast(100); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); // Sometimes swap in codec that impls ord(): if (random().nextInt(10) == 7) { // Make sure terms index has ords: Codec codec = _TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene41WithOrds")); conf.setCodec(codec); } final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf); final int[][] idToOrds = new int[NUM_DOCS][]; final Set<Integer> ordsForDocSet = new HashSet<Integer>(); for (int id = 0; id < NUM_DOCS; id++) { Document doc = new Document(); doc.add(new IntField("id", id, Field.Store.YES)); final int termCount = _TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER); while (ordsForDocSet.size() < termCount) { ordsForDocSet.add(random().nextInt(termsArray.length)); } final int[] ordsForDoc = new int[termCount]; int upto = 0; if (VERBOSE) { System.out.println("TEST: doc id=" + id); } for (int ord : ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO); if (VERBOSE) { System.out.println(" f=" + termsArray[ord].utf8ToString()); } doc.add(field); } ordsForDocSet.clear(); Arrays.sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.addDocument(doc); } final DirectoryReader r = w.getReader(); w.close(); if (VERBOSE) { System.out.println("TEST: reader=" + r); } for (AtomicReaderContext ctx : r.leaves()) { if (VERBOSE) { System.out.println("\nTEST: sub=" + ctx.reader()); } verify(ctx.reader(), idToOrds, termsArray, null); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (VERBOSE) { System.out.println("TEST: top reader"); } AtomicReader slowR = SlowCompositeReaderWrapper.wrap(r); verify(slowR, idToOrds, termsArray, null); FieldCache.DEFAULT.purge(slowR); r.close(); dir.close(); }
@Test public void testDuellCompletions() throws IOException, NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException { final boolean preserveSeparators = getRandom().nextBoolean(); final boolean preservePositionIncrements = getRandom().nextBoolean(); final boolean usePayloads = getRandom().nextBoolean(); final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0; XAnalyzingSuggester reference = new XAnalyzingSuggester( new StandardAnalyzer(), null, new StandardAnalyzer(), options, 256, -1, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); LineFileDocs docs = new LineFileDocs(getRandom()); int num = scaledRandomIntBetween(150, 300); final String[] titles = new String[num]; final long[] weights = new long[num]; for (int i = 0; i < titles.length; i++) { Document nextDoc = docs.nextDoc(); IndexableField field = nextDoc.getField("title"); titles[i] = field.stringValue(); weights[i] = between(0, 100); } docs.close(); final InputIterator primaryIter = new InputIterator() { int index = 0; long currentWeight = -1; @Override public BytesRef next() throws IOException { if (index < titles.length) { currentWeight = weights[index]; return new BytesRef(titles[index++]); } return null; } @Override public long weight() { return currentWeight; } @Override public BytesRef payload() { return null; } @Override public boolean hasPayloads() { return false; } @Override public Set<BytesRef> contexts() { return null; } @Override public boolean hasContexts() { return false; } }; InputIterator iter; if (usePayloads) { iter = new InputIterator() { @Override public long weight() { return primaryIter.weight(); } @Override public BytesRef next() throws IOException { return primaryIter.next(); } @Override public BytesRef payload() { return new BytesRef(Long.toString(weight())); } @Override public boolean hasPayloads() { return true; } @Override public Set<BytesRef> contexts() { return null; } @Override public boolean hasContexts() { return false; } }; } else { iter = primaryIter; } reference.build(iter); PostingsFormat provider = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT); NamedAnalyzer namedAnalzyer = new NamedAnalyzer("foo", new StandardAnalyzer()); final CompletionFieldMapper mapper = new CompletionFieldMapper( new Names("foo"), namedAnalzyer, namedAnalzyer, provider, null, usePayloads, preserveSeparators, preservePositionIncrements, Integer.MAX_VALUE, indexSettings, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING); Lookup buildAnalyzingLookup = buildAnalyzingLookup(mapper, titles, titles, weights); Field field = buildAnalyzingLookup.getClass().getDeclaredField("maxAnalyzedPathsForOneInput"); field.setAccessible(true); Field refField = reference.getClass().getDeclaredField("maxAnalyzedPathsForOneInput"); refField.setAccessible(true); assertThat(refField.get(reference), equalTo(field.get(buildAnalyzingLookup))); for (int i = 0; i < titles.length; i++) { int res = between(1, 10); final StringBuilder builder = new StringBuilder(); SuggestUtils.analyze( namedAnalzyer.tokenStream("foo", titles[i]), new SuggestUtils.TokenConsumer() { @Override public void nextToken() throws IOException { if (builder.length() == 0) { builder.append(this.charTermAttr.toString()); } } }); String firstTerm = builder.toString(); String prefix = firstTerm.isEmpty() ? "" : firstTerm.substring(0, between(1, firstTerm.length())); List<LookupResult> refLookup = reference.lookup(prefix, false, res); List<LookupResult> lookup = buildAnalyzingLookup.lookup(prefix, false, res); assertThat(refLookup.toString(), lookup.size(), equalTo(refLookup.size())); for (int j = 0; j < refLookup.size(); j++) { assertThat(lookup.get(j).key, equalTo(refLookup.get(j).key)); assertThat( "prefix: " + prefix + " " + j + " -- missmatch cost: " + lookup.get(j).key + " - " + lookup.get(j).value + " | " + refLookup.get(j).key + " - " + refLookup.get(j).value, lookup.get(j).value, equalTo(refLookup.get(j).value)); assertThat(lookup.get(j).payload, equalTo(refLookup.get(j).payload)); if (usePayloads) { assertThat( lookup.get(j).payload.utf8ToString(), equalTo(Long.toString(lookup.get(j).value))); } } } }