@Override public void setUp() throws Exception { super.setUp(); // set the default codec, so adding test cases to this isn't fragile savedCodec = Codec.getDefault(); Codec.setDefault(getCodec()); }
private SegmentCommitInfo merge( Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile) throws Exception { IOContext context = newIOContext(random()); SegmentReader r1 = new SegmentReader(si1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context); SegmentReader r2 = new SegmentReader(si2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context); final Codec codec = Codec.getDefault(); TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir); final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, merged, -1, false, codec, null); SegmentMerger merger = new SegmentMerger( Arrays.<AtomicReader>asList(r1, r2), si, InfoStream.getDefault(), trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context, true); MergeState mergeState = merger.merge(); r1.close(); r2.close(); final SegmentInfo info = new SegmentInfo( si1.info.dir, Version.LATEST, merged, si1.info.getDocCount() + si2.info.getDocCount(), false, codec, null); info.setFiles(new HashSet<>(trackingDir.getCreatedFiles())); if (useCompoundFile) { Collection<String> filesToDelete = IndexWriter.createCompoundFile( InfoStream.getDefault(), dir, MergeState.CheckAbort.NONE, info, newIOContext(random())); info.setUseCompoundFile(true); for (final String fileToDelete : filesToDelete) { si1.info.dir.deleteFile(fileToDelete); } } return new SegmentCommitInfo(info, 0, -1L, -1L, -1L); }
@BeforeClass public static void beforeClass() throws Exception { savedCodec = Codec.getDefault(); // currently only these codecs that support random access ordinals int victim = random().nextInt(2); switch (victim) { case 0: Codec.setDefault(TestUtil.alwaysDocValuesFormat(new DirectDocValuesFormat())); default: Codec.setDefault(TestUtil.alwaysDocValuesFormat(new Lucene410DocValuesFormat())); } }
@Listeners({ReproduceInfoPrinter.class}) @ThreadLeakFilters( defaultFilters = true, filters = {ElasticSearchThreadFilter.class}) @ThreadLeakScope(Scope.NONE) @TimeoutSuite(millis = TimeUnits.HOUR) @SuppressCodecs("Lucene3x") public abstract class ElasticSearchLuceneTestCase extends LuceneTestCase { private static final Codec DEFAULT_CODEC = Codec.getDefault(); /** Forcefully reset the default codec */ public static void forceDefaultCodec() { Codec.setDefault(DEFAULT_CODEC); } }
public Lookup buildAnalyzingLookup( final CompletionFieldMapper mapper, String[] terms, String[] surfaces, long[] weights) throws IOException { RAMDirectory dir = new RAMDirectory(); FilterCodec filterCodec = new FilterCodec("filtered", Codec.getDefault()) { @Override public PostingsFormat postingsFormat() { final PostingsFormat in = super.postingsFormat(); return mapper.postingsFormat(in); } }; IndexWriterConfig indexWriterConfig = new IndexWriterConfig(mapper.indexAnalyzer()); indexWriterConfig.setCodec(filterCodec); IndexWriter writer = new IndexWriter(dir, indexWriterConfig); for (int i = 0; i < weights.length; i++) { Document doc = new Document(); BytesRef payload = mapper.buildPayload( new BytesRef(surfaces[i]), weights[i], new BytesRef(Long.toString(weights[i]))); doc.add(mapper.getCompletionField(ContextMapping.EMPTY_CONTEXT, terms[i], payload)); if (randomBoolean()) { writer.commit(); } writer.addDocument(doc); } writer.commit(); writer.forceMerge(1, true); writer.commit(); DirectoryReader reader = DirectoryReader.open(writer, true); assertThat(reader.leaves().size(), equalTo(1)); assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length)); LeafReaderContext atomicReaderContext = reader.leaves().get(0); Terms luceneTerms = atomicReaderContext.reader().terms(mapper.name()); Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms) .getLookup(mapper, new CompletionSuggestionContext(null)); reader.close(); writer.close(); dir.close(); return lookup; }
public void testAcceptDocValuesFormat() throws IOException { String mapping = XContentFactory.jsonBuilder() .startObject() .startObject("type") .startObject("properties") .startObject("field") .field("type", "string") .field("doc_values_format", Codec.getDefault().docValuesFormat().getName()) .endObject() .endObject() .endObject() .endObject() .string(); int i = 0; for (Version v : VersionUtils.allVersions()) { if (v.onOrAfter(Version.V_2_0_0) == false) { // no need to test, we don't support upgrading from these versions continue; } IndexService indexService = createIndex( "test-" + i++, Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, v).build()); DocumentMapperParser parser = indexService.mapperService().documentMapperParser(); try { parser.parse("type", new CompressedXContent(mapping)); if (v.onOrAfter(Version.V_2_0_0_beta1)) { fail("Elasticsearch 2.0 should not support custom postings formats"); } } catch (MapperParsingException e) { if (v.before(Version.V_2_0_0_beta1)) { // Elasticsearch 1.x should ignore custom postings formats throw e; } Assert.assertThat( e.getMessage(), containsString("unsupported parameters: [doc_values_format")); } } }
public void testWriteReadMerge() throws IOException { // get another codec, other than the default: so we are merging segments across different codecs final Codec otherCodec; if ("SimpleText".equals(Codec.getDefault().getName())) { otherCodec = new Lucene46Codec(); } else { otherCodec = new SimpleTextCodec(); } Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf.clone()); final int docCount = atLeast(200); final byte[][][] data = new byte[docCount][][]; for (int i = 0; i < docCount; ++i) { final int fieldCount = rarely() ? RandomInts.randomIntBetween(random(), 1, 500) : RandomInts.randomIntBetween(random(), 1, 5); data[i] = new byte[fieldCount][]; for (int j = 0; j < fieldCount; ++j) { final int length = rarely() ? random().nextInt(1000) : random().nextInt(10); final int max = rarely() ? 256 : 2; data[i][j] = randomByteArray(length, max); } } final FieldType type = new FieldType(StringField.TYPE_STORED); type.setIndexed(false); type.freeze(); IntField id = new IntField("id", 0, Store.YES); for (int i = 0; i < data.length; ++i) { Document doc = new Document(); doc.add(id); id.setIntValue(i); for (int j = 0; j < data[i].length; ++j) { Field f = new Field("bytes" + j, data[i][j], type); doc.add(f); } iw.w.addDocument(doc); if (random().nextBoolean() && (i % (data.length / 10) == 0)) { iw.w.close(); // test merging against a non-compressing codec if (iwConf.getCodec() == otherCodec) { iwConf.setCodec(Codec.getDefault()); } else { iwConf.setCodec(otherCodec); } iw = new RandomIndexWriter(random(), dir, iwConf.clone()); } } for (int i = 0; i < 10; ++i) { final int min = random().nextInt(data.length); final int max = min + random().nextInt(20); iw.deleteDocuments(NumericRangeQuery.newIntRange("id", min, max, true, false)); } iw.forceMerge(2); // force merges with deletions iw.commit(); final DirectoryReader ir = DirectoryReader.open(dir); assertTrue(ir.numDocs() > 0); int numDocs = 0; for (int i = 0; i < ir.maxDoc(); ++i) { final Document doc = ir.document(i); if (doc == null) { continue; } ++numDocs; final int docId = doc.getField("id").numericValue().intValue(); assertEquals(data[docId].length + 1, doc.getFields().size()); for (int j = 0; j < data[docId].length; ++j) { final byte[] arr = data[docId][j]; final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j); final byte[] arr2 = Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length); assertArrayEquals(arr, arr2); } } assertTrue(ir.numDocs() <= numDocs); ir.close(); iw.deleteAll(); iw.commit(); iw.forceMerge(1); iw.close(); dir.close(); }
@Override protected void before() throws Exception { // enable this by default, for IDE consistency with ant tests (as its the default from ant) // TODO: really should be in solr base classes, but some extend LTC directly. // we do this in beforeClass, because some tests currently disable it restoreProperties.put("solr.directoryFactory", System.getProperty("solr.directoryFactory")); if (System.getProperty("solr.directoryFactory") == null) { System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockDirectoryFactory"); } // Restore more Solr properties. restoreProperties.put("solr.solr.home", System.getProperty("solr.solr.home")); restoreProperties.put("solr.data.dir", System.getProperty("solr.data.dir")); // if verbose: print some debugging stuff about which codecs are loaded. if (VERBOSE) { Set<String> codecs = Codec.availableCodecs(); for (String codec : codecs) { System.out.println( "Loaded codec: '" + codec + "': " + Codec.forName(codec).getClass().getName()); } Set<String> postingsFormats = PostingsFormat.availablePostingsFormats(); for (String postingsFormat : postingsFormats) { System.out.println( "Loaded postingsFormat: '" + postingsFormat + "': " + PostingsFormat.forName(postingsFormat).getClass().getName()); } } savedInfoStream = InfoStream.getDefault(); final Random random = RandomizedContext.current().getRandom(); final boolean v = random.nextBoolean(); if (INFOSTREAM) { InfoStream.setDefault(new ThreadNameFixingPrintStreamInfoStream(System.out)); } else if (v) { InfoStream.setDefault(new NullInfoStream()); } Class<?> targetClass = RandomizedContext.current().getTargetClass(); avoidCodecs = new HashSet<String>(); if (targetClass.isAnnotationPresent(SuppressCodecs.class)) { SuppressCodecs a = targetClass.getAnnotation(SuppressCodecs.class); avoidCodecs.addAll(Arrays.asList(a.value())); } // set back to default LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = false; LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; savedCodec = Codec.getDefault(); int randomVal = random.nextInt(10); if ("Lucene3x".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 3 && !shouldAvoidCodec("Lucene3x"))) { // preflex-only setup codec = Codec.forName("Lucene3x"); assert (codec instanceof PreFlexRWCodec) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = true; } else if ("Lucene40".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && randomVal == 0 && !shouldAvoidCodec("Lucene40"))) { // 4.0 setup codec = Codec.forName("Lucene40"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene40RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; assert (PostingsFormat.forName("Lucene40") instanceof Lucene40RWPostingsFormat) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if ("Lucene41".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 1 && !shouldAvoidCodec("Lucene41"))) { codec = Codec.forName("Lucene41"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene41RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if ("Lucene42".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 2 && !shouldAvoidCodec("Lucene42"))) { codec = Codec.forName("Lucene42"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene42RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if ("Lucene45".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 5 && !shouldAvoidCodec("Lucene45"))) { codec = Codec.forName("Lucene45"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene45RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if (("random".equals(TEST_POSTINGSFORMAT) == false) || ("random".equals(TEST_DOCVALUESFORMAT) == false)) { // the user wired postings or DV: this is messy // refactor into RandomCodec.... final PostingsFormat format; if ("random".equals(TEST_POSTINGSFORMAT)) { format = PostingsFormat.forName("Lucene41"); } else { format = PostingsFormat.forName(TEST_POSTINGSFORMAT); } final DocValuesFormat dvFormat; if ("random".equals(TEST_DOCVALUESFORMAT)) { dvFormat = DocValuesFormat.forName("Lucene45"); } else { dvFormat = DocValuesFormat.forName(TEST_DOCVALUESFORMAT); } codec = new Lucene46Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return format; } @Override public DocValuesFormat getDocValuesFormatForField(String field) { return dvFormat; } @Override public String toString() { return super.toString() + ": " + format.toString() + ", " + dvFormat.toString(); } }; } else if ("SimpleText".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 9 && LuceneTestCase.rarely(random) && !shouldAvoidCodec("SimpleText"))) { codec = new SimpleTextCodec(); } else if ("Appending".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 8 && !shouldAvoidCodec("Appending"))) { codec = new AppendingRWCodec(); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // this is really just Lucene40 with some minor changes } else if ("CheapBastard".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 8 && !shouldAvoidCodec("CheapBastard") && !shouldAvoidCodec("Lucene41"))) { // we also avoid this codec if Lucene41 is avoided, since thats the postings format it uses. codec = new CheapBastardCodec(); } else if ("Asserting".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Asserting"))) { codec = new AssertingCodec(); } else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Compressing"))) { codec = CompressingCodec.randomInstance(random); } else if (!"random".equals(TEST_CODEC)) { codec = Codec.forName(TEST_CODEC); } else if ("random".equals(TEST_POSTINGSFORMAT)) { codec = new RandomCodec(random, avoidCodecs); } else { assert false; } Codec.setDefault(codec); // Initialize locale/ timezone. String testLocale = System.getProperty("tests.locale", "random"); String testTimeZone = System.getProperty("tests.timezone", "random"); // Always pick a random one for consistency (whether tests.locale was specified or not). savedLocale = Locale.getDefault(); Locale randomLocale = randomLocale(random); locale = testLocale.equals("random") ? randomLocale : localeForName(testLocale); Locale.setDefault(locale); // TimeZone.getDefault will set user.timezone to the default timezone of the user's locale. // So store the original property value and restore it at end. restoreProperties.put("user.timezone", System.getProperty("user.timezone")); savedTimeZone = TimeZone.getDefault(); TimeZone randomTimeZone = randomTimeZone(random()); timeZone = testTimeZone.equals("random") ? randomTimeZone : TimeZone.getTimeZone(testTimeZone); TimeZone.setDefault(timeZone); similarity = random().nextBoolean() ? new DefaultSimilarity() : new RandomSimilarityProvider(random()); // Check codec restrictions once at class level. try { checkCodecRestrictions(codec); } catch (AssumptionViolatedException e) { System.err.println( "NOTE: " + e.getMessage() + " Suppressed codecs: " + Arrays.toString(avoidCodecs.toArray())); throw e; } }
public void testMerge() throws IOException { final Codec codec = Codec.getDefault(); final SegmentInfo si = new SegmentInfo( mergedDir, Version.LATEST, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); SegmentMerger merger = new SegmentMerger( Arrays.<CodecReader>asList(reader1, reader2), si, InfoStream.getDefault(), mergedDir, new FieldInfos.FieldNumbers(), newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)))); MergeState mergeState = merger.merge(); int docsMerged = mergeState.segmentInfo.maxDoc(); assertTrue(docsMerged == 2); // Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader( new SegmentCommitInfo(mergeState.segmentInfo, 0, -1L, -1L, -1L), newIOContext(random())); assertTrue(mergedReader != null); assertTrue(mergedReader.numDocs() == 2); Document newDoc1 = mergedReader.document(0); assertTrue(newDoc1 != null); // There are 2 unstored fields on the document assertTrue( DocHelper.numFields(newDoc1) == DocHelper.numFields(doc1) - DocHelper.unstored.size()); Document newDoc2 = mergedReader.document(1); assertTrue(newDoc2 != null); assertTrue( DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - DocHelper.unstored.size()); PostingsEnum termDocs = TestUtil.docs( random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), null, 0); assertTrue(termDocs != null); assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); int tvCount = 0; for (FieldInfo fieldInfo : mergedReader.getFieldInfos()) { if (fieldInfo.hasVectors()) { tvCount++; } } // System.out.println("stored size: " + stored.size()); assertEquals("We do not have 3 fields that were indexed with term vector", 3, tvCount); Terms vector = mergedReader.getTermVectors(0).terms(DocHelper.TEXT_FIELD_2_KEY); assertNotNull(vector); assertEquals(3, vector.size()); TermsEnum termsEnum = vector.iterator(); int i = 0; while (termsEnum.next() != null) { String term = termsEnum.term().utf8ToString(); int freq = (int) termsEnum.totalTermFreq(); // System.out.println("Term: " + term + " Freq: " + freq); assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1); assertTrue(DocHelper.FIELD_2_FREQS[i] == freq); i++; } TestSegmentReader.checkNorms(mergedReader); mergedReader.close(); }