@Override public void setUp() throws Exception { super.setUp(); // set the default codec, so adding test cases to this isn't fragile savedCodec = Codec.getDefault(); Codec.setDefault(getCodec()); }
/** Test attributes map */ public void testAttributes() throws Exception { Directory dir = newDirectory(); Codec codec = getCodec(); byte id[] = StringHelper.randomId(); Map<String, String> attributes = new HashMap<>(); attributes.put("key1", "value1"); attributes.put("key2", "value2"); SegmentInfo info = new SegmentInfo( dir, getVersions()[0], "_123", 1, false, codec, Collections.emptyMap(), id, attributes, null); info.setFiles(Collections.<String>emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); assertEquals(attributes, info2.getAttributes()); // attributes map should be immutable expectThrows( UnsupportedOperationException.class, () -> { info2.getAttributes().put("bogus", "bogus"); }); dir.close(); }
/** * Sets some otherwise hard-to-test properties: random segment names, ID values, document count, * etc and round-trips */ public void testRandom() throws Exception { Codec codec = getCodec(); Version[] versions = getVersions(); for (int i = 0; i < 10; i++) { Directory dir = newDirectory(); Version version = versions[random().nextInt(versions.length)]; String name = "_" + Integer.toString(random().nextInt(Integer.MAX_VALUE), Character.MAX_RADIX); int docCount = TestUtil.nextInt(random(), 1, IndexWriter.MAX_DOCS); boolean isCompoundFile = random().nextBoolean(); Set<String> files = new HashSet<>(); int numFiles = random().nextInt(10); for (int j = 0; j < numFiles; j++) { String file = IndexFileNames.segmentFileName(name, "", Integer.toString(j)); files.add(file); dir.createOutput(file, IOContext.DEFAULT).close(); } Map<String, String> diagnostics = new HashMap<>(); int numDiags = random().nextInt(10); for (int j = 0; j < numDiags; j++) { diagnostics.put( TestUtil.randomUnicodeString(random()), TestUtil.randomUnicodeString(random())); } byte id[] = new byte[StringHelper.ID_LENGTH]; random().nextBytes(id); Map<String, String> attributes = new HashMap<>(); int numAttributes = random().nextInt(10); for (int j = 0; j < numAttributes; j++) { attributes.put( TestUtil.randomUnicodeString(random()), TestUtil.randomUnicodeString(random())); } SegmentInfo info = new SegmentInfo( dir, version, name, docCount, isCompoundFile, codec, diagnostics, id, attributes, null); info.setFiles(files); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, name, id, IOContext.DEFAULT); assertEquals(info, info2); dir.close(); } }
@BeforeClass public static void beforeClass() throws Exception { savedCodec = Codec.getDefault(); // currently only these codecs that support random access ordinals int victim = random().nextInt(2); switch (victim) { case 0: Codec.setDefault(TestUtil.alwaysDocValuesFormat(new DirectDocValuesFormat())); default: Codec.setDefault(TestUtil.alwaysDocValuesFormat(new Lucene410DocValuesFormat())); } }
// we want an exception if its not found. public void testBogusLookup() { try { Codec.forName("dskfdskfsdfksdfdsf"); fail(); } catch (IllegalArgumentException expected) { } }
private static Codec readCodec(DataInput input, boolean unsupportedAllowed) throws IOException { final String name = input.readString(); try { return Codec.forName(name); } catch (IllegalArgumentException e) { // give better error messages if we can, first check if this is a legacy codec if (unsupportedCodecs.contains(name)) { // We should only get here on pre-5.3 indices, but we can't test this until 7.0 when 5.x // indices become too old: assert unsupportedAllowed; IOException newExc = new IndexFormatTooOldException(input, "Codec '" + name + "' is too old"); newExc.initCause(e); throw newExc; } // or maybe it's an old default codec that moved if (name.startsWith("Lucene")) { throw new IllegalArgumentException( "Could not load codec '" + name + "'. Did you forget to add lucene-backward-codecs.jar?", e); } throw e; } }
static { assert Codec.forName(Lucene.LATEST_CODEC) .getClass() .isAssignableFrom(PerFieldMappingPostingFormatCodec.class) : "PerFieldMappingPostingFormatCodec must subclass the latest lucene codec: " + Lucene.LATEST_CODEC; }
private SegmentCommitInfo merge( Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile) throws Exception { IOContext context = newIOContext(random()); SegmentReader r1 = new SegmentReader(si1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context); SegmentReader r2 = new SegmentReader(si2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context); final Codec codec = Codec.getDefault(); TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir); final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, merged, -1, false, codec, null); SegmentMerger merger = new SegmentMerger( Arrays.<AtomicReader>asList(r1, r2), si, InfoStream.getDefault(), trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context, true); MergeState mergeState = merger.merge(); r1.close(); r2.close(); final SegmentInfo info = new SegmentInfo( si1.info.dir, Version.LATEST, merged, si1.info.getDocCount() + si2.info.getDocCount(), false, codec, null); info.setFiles(new HashSet<>(trackingDir.getCreatedFiles())); if (useCompoundFile) { Collection<String> filesToDelete = IndexWriter.createCompoundFile( InfoStream.getDefault(), dir, MergeState.CheckAbort.NONE, info, newIOContext(random())); info.setUseCompoundFile(true); for (final String fileToDelete : filesToDelete) { si1.info.dir.deleteFile(fileToDelete); } } return new SegmentCommitInfo(info, 0, -1L, -1L, -1L); }
/** * Test segment infos read that hits exception on close make sure we get our exception back, no * file handle leaks, etc. */ public void testExceptionOnCloseInput() throws Exception { Failure fail = new Failure() { @Override public void eval(MockDirectoryWrapper dir) throws IOException { for (StackTraceElement e : Thread.currentThread().getStackTrace()) { if (doFail && "close".equals(e.getMethodName())) { throw new FakeIOException(); } } } }; MockDirectoryWrapper dir = newMockDirectory(); dir.failOn(fail); Codec codec = getCodec(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo( dir, getVersions()[0], "_123", 1, false, codec, Collections.<String, String>emptyMap(), id, new HashMap<>(), null); info.setFiles(Collections.<String>emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); fail.setDoFail(); expectThrows( FakeIOException.class, () -> { codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); }); fail.clearDoFail(); dir.close(); }
@Test public void testDeadlock() throws Exception { LuceneTestCase.assumeFalse( "This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/LUCENE-6036)", new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage())); // pick random codec names for stress test in separate process: final Random rnd = RandomizedContext.current().getRandom(); Set<String> avail; final String codecName = new ArrayList<>(avail = Codec.availableCodecs()).get(rnd.nextInt(avail.size())); final String pfName = new ArrayList<>(avail = PostingsFormat.availablePostingsFormats()) .get(rnd.nextInt(avail.size())); final String dvfName = new ArrayList<>(avail = DocValuesFormat.availableDocValuesFormats()) .get(rnd.nextInt(avail.size())); // spawn separate JVM: final Process p = new ProcessBuilder( Paths.get(System.getProperty("java.home"), "bin", "java").toString(), "-cp", System.getProperty("java.class.path"), getClass().getName(), codecName, pfName, dvfName) .inheritIO() .start(); final ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("processKiller")); final ScheduledFuture<?> f = scheduler.schedule( new Runnable() { @Override public void run() { p.destroy(); } }, 30, TimeUnit.SECONDS); try { final int exitCode = p.waitFor(); if (f.cancel(false)) { assertEquals("Process died abnormally", 0, exitCode); } else { fail("Process did not exit after 30 secs -> classloader deadlock?"); } } finally { scheduler.shutdown(); while (!scheduler.awaitTermination(1, TimeUnit.MINUTES)) ; } }
/** * Check codec restrictions. * * @throws AssumptionViolatedException if the class does not work with a given codec. */ private void checkCodecRestrictions(Codec codec) { assumeFalse( "Class not allowed to use codec: " + codec.getName() + ".", shouldAvoidCodec(codec.getName())); if (codec instanceof RandomCodec && !avoidCodecs.isEmpty()) { for (String name : ((RandomCodec) codec).formatNames) { assumeFalse( "Class not allowed to use postings format: " + name + ".", shouldAvoidCodec(name)); } } PostingsFormat pf = codec.postingsFormat(); assumeFalse( "Class not allowed to use postings format: " + pf.getName() + ".", shouldAvoidCodec(pf.getName())); assumeFalse( "Class not allowed to use postings format: " + LuceneTestCase.TEST_POSTINGSFORMAT + ".", shouldAvoidCodec(LuceneTestCase.TEST_POSTINGSFORMAT)); }
/** Test sort */ public void testSort() throws IOException { assumeTrue("test requires a codec that can read/write index sort", supportsIndexSort()); final int iters = atLeast(5); for (int i = 0; i < iters; ++i) { Sort sort; if (i == 0) { sort = null; } else { final int numSortFields = TestUtil.nextInt(random(), 1, 3); SortField[] sortFields = new SortField[numSortFields]; for (int j = 0; j < numSortFields; ++j) { sortFields[j] = randomIndexSortField(); } sort = new Sort(sortFields); } Directory dir = newDirectory(); Codec codec = getCodec(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo( dir, getVersions()[0], "_123", 1, false, codec, Collections.<String, String>emptyMap(), id, new HashMap<>(), sort); info.setFiles(Collections.<String>emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); assertEquals(sort, info2.getIndexSort()); dir.close(); } }
/** Test files map */ public void testFiles() throws Exception { Directory dir = newDirectory(); Codec codec = getCodec(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo( dir, getVersions()[0], "_123", 1, false, codec, Collections.<String, String>emptyMap(), id, new HashMap<>(), null); info.setFiles(Collections.<String>emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); assertEquals(info.files(), info2.files()); dir.close(); }
/** Tests SI writer adds itself to files... */ public void testAddsSelfToFiles() throws Exception { Directory dir = newDirectory(); Codec codec = getCodec(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo( dir, getVersions()[0], "_123", 1, false, codec, Collections.<String, String>emptyMap(), id, new HashMap<>(), null); Set<String> originalFiles = Collections.singleton("_123.a"); info.setFiles(originalFiles); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); Set<String> modifiedFiles = info.files(); assertTrue(modifiedFiles.containsAll(originalFiles)); assertTrue( "did you forget to add yourself to files()", modifiedFiles.size() > originalFiles.size()); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); assertEquals(info.files(), info2.files()); // files set should be immutable expectThrows( UnsupportedOperationException.class, () -> { info2.files().add("bogus"); }); dir.close(); }
/** create a RandomIndexWriter with the provided config */ public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c) throws IOException { // TODO: this should be solved in a different way; Random should not be shared (!). this.r = new Random(r.nextLong()); w = mockIndexWriter(dir, c, r); flushAt = _TestUtil.nextInt(r, 10, 1000); codec = w.getConfig().getCodec(); if (LuceneTestCase.VERBOSE) { System.out.println("RIW dir=" + dir + " config=" + w.getConfig()); System.out.println("codec default=" + codec.getName()); } // Make sure we sometimes test indices that don't get // any forced merges: doRandomForceMerge = !(c.getMergePolicy() instanceof NoMergePolicy) && r.nextBoolean(); }
@Listeners({ReproduceInfoPrinter.class}) @ThreadLeakFilters( defaultFilters = true, filters = {ElasticSearchThreadFilter.class}) @ThreadLeakScope(Scope.NONE) @TimeoutSuite(millis = TimeUnits.HOUR) @SuppressCodecs("Lucene3x") public abstract class ElasticSearchLuceneTestCase extends LuceneTestCase { private static final Codec DEFAULT_CODEC = Codec.getDefault(); /** Forcefully reset the default codec */ public static void forceDefaultCodec() { Codec.setDefault(DEFAULT_CODEC); } }
/** After suite cleanup (always invoked). */ @Override protected void after() throws Exception { for (Map.Entry<String, String> e : restoreProperties.entrySet()) { if (e.getValue() == null) { System.clearProperty(e.getKey()); } else { System.setProperty(e.getKey(), e.getValue()); } } restoreProperties.clear(); Codec.setDefault(savedCodec); InfoStream.setDefault(savedInfoStream); if (savedLocale != null) Locale.setDefault(savedLocale); if (savedTimeZone != null) TimeZone.setDefault(savedTimeZone); }
public Lookup buildAnalyzingLookup( final CompletionFieldMapper mapper, String[] terms, String[] surfaces, long[] weights) throws IOException { RAMDirectory dir = new RAMDirectory(); FilterCodec filterCodec = new FilterCodec("filtered", Codec.getDefault()) { @Override public PostingsFormat postingsFormat() { final PostingsFormat in = super.postingsFormat(); return mapper.postingsFormat(in); } }; IndexWriterConfig indexWriterConfig = new IndexWriterConfig(mapper.indexAnalyzer()); indexWriterConfig.setCodec(filterCodec); IndexWriter writer = new IndexWriter(dir, indexWriterConfig); for (int i = 0; i < weights.length; i++) { Document doc = new Document(); BytesRef payload = mapper.buildPayload( new BytesRef(surfaces[i]), weights[i], new BytesRef(Long.toString(weights[i]))); doc.add(mapper.getCompletionField(ContextMapping.EMPTY_CONTEXT, terms[i], payload)); if (randomBoolean()) { writer.commit(); } writer.addDocument(doc); } writer.commit(); writer.forceMerge(1, true); writer.commit(); DirectoryReader reader = DirectoryReader.open(writer, true); assertThat(reader.leaves().size(), equalTo(1)); assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length)); LeafReaderContext atomicReaderContext = reader.leaves().get(0); Terms luceneTerms = atomicReaderContext.reader().terms(mapper.name()); Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms) .getLookup(mapper, new CompletionSuggestionContext(null)); reader.close(); writer.close(); dir.close(); return lookup; }
public void testAcceptDocValuesFormat() throws IOException { String mapping = XContentFactory.jsonBuilder() .startObject() .startObject("type") .startObject("properties") .startObject("field") .field("type", "string") .field("doc_values_format", Codec.getDefault().docValuesFormat().getName()) .endObject() .endObject() .endObject() .endObject() .string(); int i = 0; for (Version v : VersionUtils.allVersions()) { if (v.onOrAfter(Version.V_2_0_0) == false) { // no need to test, we don't support upgrading from these versions continue; } IndexService indexService = createIndex( "test-" + i++, Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, v).build()); DocumentMapperParser parser = indexService.mapperService().documentMapperParser(); try { parser.parse("type", new CompressedXContent(mapping)); if (v.onOrAfter(Version.V_2_0_0_beta1)) { fail("Elasticsearch 2.0 should not support custom postings formats"); } } catch (MapperParsingException e) { if (v.before(Version.V_2_0_0_beta1)) { // Elasticsearch 1.x should ignore custom postings formats throw e; } Assert.assertThat( e.getMessage(), containsString("unsupported parameters: [doc_values_format")); } } }
/** * Encodes/decodes an inverted index segment. * * <p>Note, when extending this class, the name ({@link #getName}) is written into the index. In * order for the segment to be read, the name must resolve to your implementation via {@link * #forName(String)}. This method uses Java's {@link ServiceLoader Service Provider Interface} (SPI) * to resolve codec names. * * <p>If you implement your own codec, make sure that it has a no-arg constructor so SPI can load * it. * * @see ServiceLoader */ public abstract class Codec implements NamedSPILoader.NamedSPI { private static final NamedSPILoader<Codec> loader = new NamedSPILoader<Codec>(Codec.class); private final String name; /** * Creates a new codec. * * <p>The provided name will be written into the index segment: in order to for the segment to be * read this class should be registered with Java's SPI mechanism (registered in META-INF/ of your * jar file, etc). * * @param name must be all ascii alphanumeric, and less than 128 characters in length. */ protected Codec(String name) { NamedSPILoader.checkServiceName(name); this.name = name; } /** Returns this codec's name */ @Override public final String getName() { return name; } /** Encodes/decodes postings */ public abstract PostingsFormat postingsFormat(); /** Encodes/decodes docvalues */ public abstract DocValuesFormat docValuesFormat(); /** Encodes/decodes stored fields */ public abstract StoredFieldsFormat storedFieldsFormat(); /** Encodes/decodes term vectors */ public abstract TermVectorsFormat termVectorsFormat(); /** Encodes/decodes field infos file */ public abstract FieldInfosFormat fieldInfosFormat(); /** Encodes/decodes segment info file */ public abstract SegmentInfoFormat segmentInfoFormat(); /** Encodes/decodes document normalization values */ public abstract NormsFormat normsFormat(); /** Encodes/decodes live docs */ public abstract LiveDocsFormat liveDocsFormat(); /** looks up a codec by name */ public static Codec forName(String name) { if (loader == null) { throw new IllegalStateException( "You called Codec.forName() before all Codecs could be initialized. " + "This likely happens if you call it from a Codec's ctor."); } return loader.lookup(name); } /** returns a list of all available codec names */ public static Set<String> availableCodecs() { if (loader == null) { throw new IllegalStateException( "You called Codec.availableCodecs() before all Codecs could be initialized. " + "This likely happens if you call it from a Codec's ctor."); } return loader.availableServices(); } /** * Reloads the codec list from the given {@link ClassLoader}. Changes to the codecs are visible * after the method ends, all iterators ({@link #availableCodecs()},...) stay consistent. * * <p><b>NOTE:</b> Only new codecs are added, existing ones are never removed or replaced. * * <p><em>This method is expensive and should only be called for discovery of new codecs on the * given classpath/classloader!</em> */ public static void reloadCodecs(ClassLoader classloader) { loader.reload(classloader); } private static Codec defaultCodec = Codec.forName("Lucene41"); /** expert: returns the default codec used for newly created {@link IndexWriterConfig}s. */ // TODO: should we use this, or maybe a system property is better? public static Codec getDefault() { return defaultCodec; } /** expert: sets the default codec used for newly created {@link IndexWriterConfig}s. */ public static void setDefault(Codec codec) { defaultCodec = codec; } /** * returns the codec's name. Subclasses can override to provide more detail (such as parameters). */ @Override public String toString() { return name; } }
/** * Encodes/decodes an inverted index segment. * * <p>Note, when extending this class, the name ({@link #getName}) is written into the index. In * order for the segment to be read, the name must resolve to your implementation via {@link * #forName(String)}. This method uses Java's {@link ServiceLoader Service Provider Interface} to * resolve codec names. * * <p> * * @see ServiceLoader */ public abstract class Codec implements NamedSPILoader.NamedSPI { private static final NamedSPILoader<Codec> loader = new NamedSPILoader<Codec>(Codec.class); private final String name; public Codec(String name) { NamedSPILoader.checkServiceName(name); this.name = name; } /** Returns this codec's name */ @Override public final String getName() { return name; } /** Encodes/decodes postings */ public abstract PostingsFormat postingsFormat(); /** Encodes/decodes docvalues */ public abstract DocValuesFormat docValuesFormat(); /** Encodes/decodes stored fields */ public abstract StoredFieldsFormat storedFieldsFormat(); /** Encodes/decodes term vectors */ public abstract TermVectorsFormat termVectorsFormat(); /** Encodes/decodes field infos file */ public abstract FieldInfosFormat fieldInfosFormat(); /** Encodes/decodes segment info file */ public abstract SegmentInfoFormat segmentInfoFormat(); /** Encodes/decodes document normalization values */ public abstract NormsFormat normsFormat(); /** Encodes/decodes live docs */ public abstract LiveDocsFormat liveDocsFormat(); /** looks up a codec by name */ public static Codec forName(String name) { return loader.lookup(name); } /** returns a list of all available codec names */ public static Set<String> availableCodecs() { return loader.availableServices(); } private static Codec defaultCodec = Codec.forName("Lucene40"); /** expert: returns the default codec used for newly created {@link IndexWriterConfig}s. */ // TODO: should we use this, or maybe a system property is better? public static Codec getDefault() { return defaultCodec; } /** expert: sets the default codec used for newly created {@link IndexWriterConfig}s. */ public static void setDefault(Codec codec) { defaultCodec = codec; } @Override public String toString() { return name; } }
public void testWriteReadMerge() throws IOException { // get another codec, other than the default: so we are merging segments across different codecs final Codec otherCodec; if ("SimpleText".equals(Codec.getDefault().getName())) { otherCodec = new Lucene46Codec(); } else { otherCodec = new SimpleTextCodec(); } Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf.clone()); final int docCount = atLeast(200); final byte[][][] data = new byte[docCount][][]; for (int i = 0; i < docCount; ++i) { final int fieldCount = rarely() ? RandomInts.randomIntBetween(random(), 1, 500) : RandomInts.randomIntBetween(random(), 1, 5); data[i] = new byte[fieldCount][]; for (int j = 0; j < fieldCount; ++j) { final int length = rarely() ? random().nextInt(1000) : random().nextInt(10); final int max = rarely() ? 256 : 2; data[i][j] = randomByteArray(length, max); } } final FieldType type = new FieldType(StringField.TYPE_STORED); type.setIndexed(false); type.freeze(); IntField id = new IntField("id", 0, Store.YES); for (int i = 0; i < data.length; ++i) { Document doc = new Document(); doc.add(id); id.setIntValue(i); for (int j = 0; j < data[i].length; ++j) { Field f = new Field("bytes" + j, data[i][j], type); doc.add(f); } iw.w.addDocument(doc); if (random().nextBoolean() && (i % (data.length / 10) == 0)) { iw.w.close(); // test merging against a non-compressing codec if (iwConf.getCodec() == otherCodec) { iwConf.setCodec(Codec.getDefault()); } else { iwConf.setCodec(otherCodec); } iw = new RandomIndexWriter(random(), dir, iwConf.clone()); } } for (int i = 0; i < 10; ++i) { final int min = random().nextInt(data.length); final int max = min + random().nextInt(20); iw.deleteDocuments(NumericRangeQuery.newIntRange("id", min, max, true, false)); } iw.forceMerge(2); // force merges with deletions iw.commit(); final DirectoryReader ir = DirectoryReader.open(dir); assertTrue(ir.numDocs() > 0); int numDocs = 0; for (int i = 0; i < ir.maxDoc(); ++i) { final Document doc = ir.document(i); if (doc == null) { continue; } ++numDocs; final int docId = doc.getField("id").numericValue().intValue(); assertEquals(data[docId].length + 1, doc.getFields().size()); for (int j = 0; j < data[docId].length; ++j) { final byte[] arr = data[docId][j]; final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j); final byte[] arr2 = Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length); assertArrayEquals(arr, arr2); } } assertTrue(ir.numDocs() <= numDocs); ir.close(); iw.deleteAll(); iw.commit(); iw.forceMerge(1); iw.close(); dir.close(); }
@Override protected void before() throws Exception { // enable this by default, for IDE consistency with ant tests (as its the default from ant) // TODO: really should be in solr base classes, but some extend LTC directly. // we do this in beforeClass, because some tests currently disable it restoreProperties.put("solr.directoryFactory", System.getProperty("solr.directoryFactory")); if (System.getProperty("solr.directoryFactory") == null) { System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockDirectoryFactory"); } // Restore more Solr properties. restoreProperties.put("solr.solr.home", System.getProperty("solr.solr.home")); restoreProperties.put("solr.data.dir", System.getProperty("solr.data.dir")); // if verbose: print some debugging stuff about which codecs are loaded. if (VERBOSE) { Set<String> codecs = Codec.availableCodecs(); for (String codec : codecs) { System.out.println( "Loaded codec: '" + codec + "': " + Codec.forName(codec).getClass().getName()); } Set<String> postingsFormats = PostingsFormat.availablePostingsFormats(); for (String postingsFormat : postingsFormats) { System.out.println( "Loaded postingsFormat: '" + postingsFormat + "': " + PostingsFormat.forName(postingsFormat).getClass().getName()); } } savedInfoStream = InfoStream.getDefault(); final Random random = RandomizedContext.current().getRandom(); final boolean v = random.nextBoolean(); if (INFOSTREAM) { InfoStream.setDefault(new ThreadNameFixingPrintStreamInfoStream(System.out)); } else if (v) { InfoStream.setDefault(new NullInfoStream()); } Class<?> targetClass = RandomizedContext.current().getTargetClass(); avoidCodecs = new HashSet<String>(); if (targetClass.isAnnotationPresent(SuppressCodecs.class)) { SuppressCodecs a = targetClass.getAnnotation(SuppressCodecs.class); avoidCodecs.addAll(Arrays.asList(a.value())); } // set back to default LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = false; LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; savedCodec = Codec.getDefault(); int randomVal = random.nextInt(10); if ("Lucene3x".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 3 && !shouldAvoidCodec("Lucene3x"))) { // preflex-only setup codec = Codec.forName("Lucene3x"); assert (codec instanceof PreFlexRWCodec) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = true; } else if ("Lucene40".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && randomVal == 0 && !shouldAvoidCodec("Lucene40"))) { // 4.0 setup codec = Codec.forName("Lucene40"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene40RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; assert (PostingsFormat.forName("Lucene40") instanceof Lucene40RWPostingsFormat) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if ("Lucene41".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 1 && !shouldAvoidCodec("Lucene41"))) { codec = Codec.forName("Lucene41"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene41RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if ("Lucene42".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 2 && !shouldAvoidCodec("Lucene42"))) { codec = Codec.forName("Lucene42"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene42RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if ("Lucene45".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 5 && !shouldAvoidCodec("Lucene45"))) { codec = Codec.forName("Lucene45"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene45RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if (("random".equals(TEST_POSTINGSFORMAT) == false) || ("random".equals(TEST_DOCVALUESFORMAT) == false)) { // the user wired postings or DV: this is messy // refactor into RandomCodec.... final PostingsFormat format; if ("random".equals(TEST_POSTINGSFORMAT)) { format = PostingsFormat.forName("Lucene41"); } else { format = PostingsFormat.forName(TEST_POSTINGSFORMAT); } final DocValuesFormat dvFormat; if ("random".equals(TEST_DOCVALUESFORMAT)) { dvFormat = DocValuesFormat.forName("Lucene45"); } else { dvFormat = DocValuesFormat.forName(TEST_DOCVALUESFORMAT); } codec = new Lucene46Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return format; } @Override public DocValuesFormat getDocValuesFormatForField(String field) { return dvFormat; } @Override public String toString() { return super.toString() + ": " + format.toString() + ", " + dvFormat.toString(); } }; } else if ("SimpleText".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 9 && LuceneTestCase.rarely(random) && !shouldAvoidCodec("SimpleText"))) { codec = new SimpleTextCodec(); } else if ("Appending".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 8 && !shouldAvoidCodec("Appending"))) { codec = new AppendingRWCodec(); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // this is really just Lucene40 with some minor changes } else if ("CheapBastard".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 8 && !shouldAvoidCodec("CheapBastard") && !shouldAvoidCodec("Lucene41"))) { // we also avoid this codec if Lucene41 is avoided, since thats the postings format it uses. codec = new CheapBastardCodec(); } else if ("Asserting".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Asserting"))) { codec = new AssertingCodec(); } else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Compressing"))) { codec = CompressingCodec.randomInstance(random); } else if (!"random".equals(TEST_CODEC)) { codec = Codec.forName(TEST_CODEC); } else if ("random".equals(TEST_POSTINGSFORMAT)) { codec = new RandomCodec(random, avoidCodecs); } else { assert false; } Codec.setDefault(codec); // Initialize locale/ timezone. String testLocale = System.getProperty("tests.locale", "random"); String testTimeZone = System.getProperty("tests.timezone", "random"); // Always pick a random one for consistency (whether tests.locale was specified or not). savedLocale = Locale.getDefault(); Locale randomLocale = randomLocale(random); locale = testLocale.equals("random") ? randomLocale : localeForName(testLocale); Locale.setDefault(locale); // TimeZone.getDefault will set user.timezone to the default timezone of the user's locale. // So store the original property value and restore it at end. restoreProperties.put("user.timezone", System.getProperty("user.timezone")); savedTimeZone = TimeZone.getDefault(); TimeZone randomTimeZone = randomTimeZone(random()); timeZone = testTimeZone.equals("random") ? randomTimeZone : TimeZone.getTimeZone(testTimeZone); TimeZone.setDefault(timeZone); similarity = random().nextBoolean() ? new DefaultSimilarity() : new RandomSimilarityProvider(random()); // Check codec restrictions once at class level. try { checkCodecRestrictions(codec); } catch (AssumptionViolatedException e) { System.err.println( "NOTE: " + e.getMessage() + " Suppressed codecs: " + Arrays.toString(avoidCodecs.toArray())); throw e; } }
@Test public void testGetThatFieldProbabilityRatioIsReflectedInBoost() throws Exception { ArgumentCaptor<Float> normalizeCaptor = ArgumentCaptor.forClass(Float.class); DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection(); Directory directory = newDirectory(); Analyzer analyzer = new Analyzer() { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new WhitespaceTokenizer(); TokenStream filter = new WordDelimiterFilter( source, WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE, null); filter = new LowerCaseFilter(filter); return new TokenStreamComponents(source, filter); } }; IndexWriterConfig conf = new IndexWriterConfig(analyzer); conf.setCodec(Codec.forName(TestUtil.LUCENE_CODEC)); IndexWriter indexWriter = new IndexWriter(directory, conf); // Both fields f1 and f2 have 10 terms in total. // f1: the search terms (abc def) make 100% of all terms in f1 // f2: the search terms (abc def) make 50% of all terms in f2 // --> we expect that the sum of the boost factors for terms in bq(+f1:abc, +f1:def) // equals 2 * sum of the boost factors for terms in bq(+f2:abc, +f2:def) PRMSFieldBoostTest.addNumDocs("f1", "abc def", indexWriter, 2); PRMSFieldBoostTest.addNumDocs("f1", "abc", indexWriter, 4); PRMSFieldBoostTest.addNumDocs("f1", "def", indexWriter, 2); PRMSFieldBoostTest.addNumDocs("f2", "abc def", indexWriter, 1); PRMSFieldBoostTest.addNumDocs("f2", "abc", indexWriter, 2); PRMSFieldBoostTest.addNumDocs("f2", "def", indexWriter, 1); PRMSFieldBoostTest.addNumDocs("f2", "ghi", indexWriter, 5); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); indexSearcher.setSimilarity(similarity); Map<String, Float> fields = new HashMap<>(); fields.put("f1", 1f); fields.put("f2", 1f); SearchFieldsAndBoosting searchFieldsAndBoosting = new SearchFieldsAndBoosting(FieldBoostModel.PRMS, fields, fields, 0.8f); LuceneQueryBuilder queryBuilder = new LuceneQueryBuilder(dfc, analyzer, searchFieldsAndBoosting, 0.01f, null); WhiteSpaceQuerqyParser parser = new WhiteSpaceQuerqyParser(); Query query = queryBuilder.createQuery(parser.parse("AbcDef")); dfc.finishedUserQuery(); assertTrue(query instanceof DisjunctionMaxQuery); DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query; List<Query> disjuncts = dmq.getDisjuncts(); assertEquals(2, disjuncts.size()); Query disjunct1 = disjuncts.get(0); if (disjunct1 instanceof BoostQuery) { disjunct1 = ((BoostQuery) disjunct1).getQuery(); } assertTrue(disjunct1 instanceof BooleanQuery); BooleanQuery bq1 = (BooleanQuery) disjunct1; Query disjunct2 = disjuncts.get(1); if (disjunct2 instanceof BoostQuery) { disjunct2 = ((BoostQuery) disjunct2).getQuery(); } assertTrue(disjunct2 instanceof BooleanQuery); BooleanQuery bq2 = (BooleanQuery) disjunct2; final Weight weight1 = bq1.createWeight(indexSearcher, true); weight1.normalize(0.1f, 4f); final Weight weight2 = bq2.createWeight(indexSearcher, true); weight2.normalize(0.1f, 4f); Mockito.verify(simWeight, times(4)).normalize(eq(0.1f), normalizeCaptor.capture()); final List<Float> capturedBoosts = normalizeCaptor.getAllValues(); // capturedBoosts = boosts of [bq1.term1, bq1.term2, bq2.term1, bq2.term2 ] assertEquals(capturedBoosts.get(0), capturedBoosts.get(1), 0.00001); assertEquals(capturedBoosts.get(2), capturedBoosts.get(3), 0.00001); assertEquals(2f, capturedBoosts.get(0) / capturedBoosts.get(3), 0.00001); indexReader.close(); directory.close(); analyzer.close(); }
@Override public void tearDown() throws Exception { Codec.setDefault(savedCodec); // restore super.tearDown(); }
public void test() throws Exception { IndexWriterConfig defaultConfig = new IndexWriterConfig(null); Codec defaultCodec = defaultConfig.getCodec(); if ((new IndexWriterConfig(null)).getCodec() instanceof CompressingCodec) { Pattern regex = Pattern.compile("maxDocsPerChunk=(\\d+), blockSize=(\\d+)"); Matcher matcher = regex.matcher(defaultCodec.toString()); assertTrue( "Unexpected CompressingCodec toString() output: " + defaultCodec.toString(), matcher.find()); int maxDocsPerChunk = Integer.parseInt(matcher.group(1)); int blockSize = Integer.parseInt(matcher.group(2)); int product = maxDocsPerChunk * blockSize; assumeTrue( defaultCodec.getName() + " maxDocsPerChunk (" + maxDocsPerChunk + ") * blockSize (" + blockSize + ") < 16 - this can trigger OOM with -Dtests.heapsize=30g", product >= 16); } BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BPostingsBytes1")); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE) .setCodec(TestUtil.getDefaultCodec())); MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024); } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); ft.setOmitNorms(true); MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.add(field); final int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) { // trick blockPF's little optimization tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.addDocument(doc); } w.forceMerge(1); w.close(); DirectoryReader oneThousand = DirectoryReader.open(dir); DirectoryReader subReaders[] = new DirectoryReader[1000]; Arrays.fill(subReaders, oneThousand); BaseDirectoryWrapper dir2 = newFSDirectory(createTempDir("2BPostingsBytes2")); if (dir2 instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir2).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(null)); TestUtil.addIndexesSlowly(w2, subReaders); w2.forceMerge(1); w2.close(); oneThousand.close(); DirectoryReader oneMillion = DirectoryReader.open(dir2); subReaders = new DirectoryReader[2000]; Arrays.fill(subReaders, oneMillion); BaseDirectoryWrapper dir3 = newFSDirectory(createTempDir("2BPostingsBytes3")); if (dir3 instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir3).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(null)); TestUtil.addIndexesSlowly(w3, subReaders); w3.forceMerge(1); w3.close(); oneMillion.close(); dir.close(); dir2.close(); dir3.close(); }
/** Forcefully reset the default codec */ public static void forceDefaultCodec() { Codec.setDefault(DEFAULT_CODEC); }
@Test public void testRollingUpdates() throws Exception { Random random = new Random(random().nextLong()); final BaseDirectoryWrapper dir = newDirectory(); // test checks for no unref'ed files with the IW helper method, which isn't aware of "tried to // delete files" if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setEnableVirusScanner(false); } final LineFileDocs docs = new LineFileDocs(random, true); // provider.register(new MemoryCodec()); if (random().nextBoolean()) { Codec.setDefault( TestUtil.alwaysPostingsFormat( new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat()))); } MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer)); final int SIZE = atLeast(20); int id = 0; IndexReader r = null; IndexSearcher s = null; final int numUpdates = (int) (SIZE * (2 + (TEST_NIGHTLY ? 200 * random().nextDouble() : 5 * random().nextDouble()))); if (VERBOSE) { System.out.println("TEST: numUpdates=" + numUpdates); } int updateCount = 0; // TODO: sometimes update ids not in order... for (int docIter = 0; docIter < numUpdates; docIter++) { final Document doc = docs.nextDoc(); final String myID = Integer.toString(id); if (id == SIZE - 1) { id = 0; } else { id++; } if (VERBOSE) { System.out.println(" docIter=" + docIter + " id=" + id); } ((Field) doc.getField("docid")).setStringValue(myID); Term idTerm = new Term("docid", myID); final boolean doUpdate; if (s != null && updateCount < SIZE) { TopDocs hits = s.search(new TermQuery(idTerm), 1); assertEquals(1, hits.totalHits); doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc); if (VERBOSE) { if (doUpdate) { System.out.println(" tryDeleteDocument failed"); } else { System.out.println(" tryDeleteDocument succeeded"); } } } else { doUpdate = true; if (VERBOSE) { System.out.println(" no searcher: doUpdate=true"); } } updateCount++; if (doUpdate) { if (random().nextBoolean()) { w.updateDocument(idTerm, doc); } else { // It's OK to not be atomic for this test (no separate thread reopening readers): w.deleteDocuments(new TermQuery(idTerm)); w.addDocument(doc); } } else { w.addDocument(doc); } if (docIter >= SIZE && random().nextInt(50) == 17) { if (r != null) { r.close(); } final boolean applyDeletions = random().nextBoolean(); if (VERBOSE) { System.out.println("TEST: reopen applyDeletions=" + applyDeletions); } r = w.getReader(applyDeletions); if (applyDeletions) { s = newSearcher(r); } else { s = null; } assertTrue( "applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE); updateCount = 0; } } if (r != null) { r.close(); } w.commit(); assertEquals(SIZE, w.numDocs()); w.close(); TestIndexWriter.assertNoUnreferencedFiles(dir, "leftover files after rolling updates"); docs.close(); // LUCENE-4455: SegmentInfos infos = SegmentInfos.readLatestCommit(dir); long totalBytes = 0; for (SegmentCommitInfo sipc : infos) { totalBytes += sipc.sizeInBytes(); } long totalBytes2 = 0; for (String fileName : dir.listAll()) { if (IndexFileNames.CODEC_FILE_PATTERN.matcher(fileName).matches()) { totalBytes2 += dir.fileLength(fileName); } } assertEquals(totalBytes2, totalBytes); dir.close(); }
public void testAvailableServices() { Set<String> codecs = Codec.availableCodecs(); assertTrue(codecs.contains("Lucene410")); }
public void testLookup() { Codec codec = Codec.forName("Lucene410"); assertEquals("Lucene410", codec.getName()); }