static { assert Codec.forName(Lucene.LATEST_CODEC) .getClass() .isAssignableFrom(PerFieldMappingPostingFormatCodec.class) : "PerFieldMappingPostingFormatCodec must subclass the latest lucene codec: " + Lucene.LATEST_CODEC; }
// we want an exception if its not found. public void testBogusLookup() { try { Codec.forName("dskfdskfsdfksdfdsf"); fail(); } catch (IllegalArgumentException expected) { } }
private static Codec readCodec(DataInput input, boolean unsupportedAllowed) throws IOException { final String name = input.readString(); try { return Codec.forName(name); } catch (IllegalArgumentException e) { // give better error messages if we can, first check if this is a legacy codec if (unsupportedCodecs.contains(name)) { // We should only get here on pre-5.3 indices, but we can't test this until 7.0 when 5.x // indices become too old: assert unsupportedAllowed; IOException newExc = new IndexFormatTooOldException(input, "Codec '" + name + "' is too old"); newExc.initCause(e); throw newExc; } // or maybe it's an old default codec that moved if (name.startsWith("Lucene")) { throw new IllegalArgumentException( "Could not load codec '" + name + "'. Did you forget to add lucene-backward-codecs.jar?", e); } throw e; } }
public void testLookup() { Codec codec = Codec.forName("Lucene410"); assertEquals("Lucene410", codec.getName()); }
/** * Encodes/decodes an inverted index segment. * * <p>Note, when extending this class, the name ({@link #getName}) is written into the index. In * order for the segment to be read, the name must resolve to your implementation via {@link * #forName(String)}. This method uses Java's {@link ServiceLoader Service Provider Interface} to * resolve codec names. * * <p> * * @see ServiceLoader */ public abstract class Codec implements NamedSPILoader.NamedSPI { private static final NamedSPILoader<Codec> loader = new NamedSPILoader<Codec>(Codec.class); private final String name; public Codec(String name) { NamedSPILoader.checkServiceName(name); this.name = name; } /** Returns this codec's name */ @Override public final String getName() { return name; } /** Encodes/decodes postings */ public abstract PostingsFormat postingsFormat(); /** Encodes/decodes docvalues */ public abstract DocValuesFormat docValuesFormat(); /** Encodes/decodes stored fields */ public abstract StoredFieldsFormat storedFieldsFormat(); /** Encodes/decodes term vectors */ public abstract TermVectorsFormat termVectorsFormat(); /** Encodes/decodes field infos file */ public abstract FieldInfosFormat fieldInfosFormat(); /** Encodes/decodes segment info file */ public abstract SegmentInfoFormat segmentInfoFormat(); /** Encodes/decodes document normalization values */ public abstract NormsFormat normsFormat(); /** Encodes/decodes live docs */ public abstract LiveDocsFormat liveDocsFormat(); /** looks up a codec by name */ public static Codec forName(String name) { return loader.lookup(name); } /** returns a list of all available codec names */ public static Set<String> availableCodecs() { return loader.availableServices(); } private static Codec defaultCodec = Codec.forName("Lucene40"); /** expert: returns the default codec used for newly created {@link IndexWriterConfig}s. */ // TODO: should we use this, or maybe a system property is better? public static Codec getDefault() { return defaultCodec; } /** expert: sets the default codec used for newly created {@link IndexWriterConfig}s. */ public static void setDefault(Codec codec) { defaultCodec = codec; } @Override public String toString() { return name; } }
@Test public void testGetThatFieldProbabilityRatioIsReflectedInBoost() throws Exception { ArgumentCaptor<Float> normalizeCaptor = ArgumentCaptor.forClass(Float.class); DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection(); Directory directory = newDirectory(); Analyzer analyzer = new Analyzer() { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new WhitespaceTokenizer(); TokenStream filter = new WordDelimiterFilter( source, WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE, null); filter = new LowerCaseFilter(filter); return new TokenStreamComponents(source, filter); } }; IndexWriterConfig conf = new IndexWriterConfig(analyzer); conf.setCodec(Codec.forName(TestUtil.LUCENE_CODEC)); IndexWriter indexWriter = new IndexWriter(directory, conf); // Both fields f1 and f2 have 10 terms in total. // f1: the search terms (abc def) make 100% of all terms in f1 // f2: the search terms (abc def) make 50% of all terms in f2 // --> we expect that the sum of the boost factors for terms in bq(+f1:abc, +f1:def) // equals 2 * sum of the boost factors for terms in bq(+f2:abc, +f2:def) PRMSFieldBoostTest.addNumDocs("f1", "abc def", indexWriter, 2); PRMSFieldBoostTest.addNumDocs("f1", "abc", indexWriter, 4); PRMSFieldBoostTest.addNumDocs("f1", "def", indexWriter, 2); PRMSFieldBoostTest.addNumDocs("f2", "abc def", indexWriter, 1); PRMSFieldBoostTest.addNumDocs("f2", "abc", indexWriter, 2); PRMSFieldBoostTest.addNumDocs("f2", "def", indexWriter, 1); PRMSFieldBoostTest.addNumDocs("f2", "ghi", indexWriter, 5); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); indexSearcher.setSimilarity(similarity); Map<String, Float> fields = new HashMap<>(); fields.put("f1", 1f); fields.put("f2", 1f); SearchFieldsAndBoosting searchFieldsAndBoosting = new SearchFieldsAndBoosting(FieldBoostModel.PRMS, fields, fields, 0.8f); LuceneQueryBuilder queryBuilder = new LuceneQueryBuilder(dfc, analyzer, searchFieldsAndBoosting, 0.01f, null); WhiteSpaceQuerqyParser parser = new WhiteSpaceQuerqyParser(); Query query = queryBuilder.createQuery(parser.parse("AbcDef")); dfc.finishedUserQuery(); assertTrue(query instanceof DisjunctionMaxQuery); DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query; List<Query> disjuncts = dmq.getDisjuncts(); assertEquals(2, disjuncts.size()); Query disjunct1 = disjuncts.get(0); if (disjunct1 instanceof BoostQuery) { disjunct1 = ((BoostQuery) disjunct1).getQuery(); } assertTrue(disjunct1 instanceof BooleanQuery); BooleanQuery bq1 = (BooleanQuery) disjunct1; Query disjunct2 = disjuncts.get(1); if (disjunct2 instanceof BoostQuery) { disjunct2 = ((BoostQuery) disjunct2).getQuery(); } assertTrue(disjunct2 instanceof BooleanQuery); BooleanQuery bq2 = (BooleanQuery) disjunct2; final Weight weight1 = bq1.createWeight(indexSearcher, true); weight1.normalize(0.1f, 4f); final Weight weight2 = bq2.createWeight(indexSearcher, true); weight2.normalize(0.1f, 4f); Mockito.verify(simWeight, times(4)).normalize(eq(0.1f), normalizeCaptor.capture()); final List<Float> capturedBoosts = normalizeCaptor.getAllValues(); // capturedBoosts = boosts of [bq1.term1, bq1.term2, bq2.term1, bq2.term2 ] assertEquals(capturedBoosts.get(0), capturedBoosts.get(1), 0.00001); assertEquals(capturedBoosts.get(2), capturedBoosts.get(3), 0.00001); assertEquals(2f, capturedBoosts.get(0) / capturedBoosts.get(3), 0.00001); indexReader.close(); directory.close(); analyzer.close(); }
@Override protected void before() throws Exception { // enable this by default, for IDE consistency with ant tests (as its the default from ant) // TODO: really should be in solr base classes, but some extend LTC directly. // we do this in beforeClass, because some tests currently disable it restoreProperties.put("solr.directoryFactory", System.getProperty("solr.directoryFactory")); if (System.getProperty("solr.directoryFactory") == null) { System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockDirectoryFactory"); } // Restore more Solr properties. restoreProperties.put("solr.solr.home", System.getProperty("solr.solr.home")); restoreProperties.put("solr.data.dir", System.getProperty("solr.data.dir")); // if verbose: print some debugging stuff about which codecs are loaded. if (VERBOSE) { Set<String> codecs = Codec.availableCodecs(); for (String codec : codecs) { System.out.println( "Loaded codec: '" + codec + "': " + Codec.forName(codec).getClass().getName()); } Set<String> postingsFormats = PostingsFormat.availablePostingsFormats(); for (String postingsFormat : postingsFormats) { System.out.println( "Loaded postingsFormat: '" + postingsFormat + "': " + PostingsFormat.forName(postingsFormat).getClass().getName()); } } savedInfoStream = InfoStream.getDefault(); final Random random = RandomizedContext.current().getRandom(); final boolean v = random.nextBoolean(); if (INFOSTREAM) { InfoStream.setDefault(new ThreadNameFixingPrintStreamInfoStream(System.out)); } else if (v) { InfoStream.setDefault(new NullInfoStream()); } Class<?> targetClass = RandomizedContext.current().getTargetClass(); avoidCodecs = new HashSet<String>(); if (targetClass.isAnnotationPresent(SuppressCodecs.class)) { SuppressCodecs a = targetClass.getAnnotation(SuppressCodecs.class); avoidCodecs.addAll(Arrays.asList(a.value())); } // set back to default LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = false; LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; savedCodec = Codec.getDefault(); int randomVal = random.nextInt(10); if ("Lucene3x".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 3 && !shouldAvoidCodec("Lucene3x"))) { // preflex-only setup codec = Codec.forName("Lucene3x"); assert (codec instanceof PreFlexRWCodec) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = true; } else if ("Lucene40".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && randomVal == 0 && !shouldAvoidCodec("Lucene40"))) { // 4.0 setup codec = Codec.forName("Lucene40"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene40RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; assert (PostingsFormat.forName("Lucene40") instanceof Lucene40RWPostingsFormat) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if ("Lucene41".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 1 && !shouldAvoidCodec("Lucene41"))) { codec = Codec.forName("Lucene41"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene41RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if ("Lucene42".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 2 && !shouldAvoidCodec("Lucene42"))) { codec = Codec.forName("Lucene42"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene42RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if ("Lucene45".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && "random".equals(TEST_DOCVALUESFORMAT) && randomVal == 5 && !shouldAvoidCodec("Lucene45"))) { codec = Codec.forName("Lucene45"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; assert codec instanceof Lucene45RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if (("random".equals(TEST_POSTINGSFORMAT) == false) || ("random".equals(TEST_DOCVALUESFORMAT) == false)) { // the user wired postings or DV: this is messy // refactor into RandomCodec.... final PostingsFormat format; if ("random".equals(TEST_POSTINGSFORMAT)) { format = PostingsFormat.forName("Lucene41"); } else { format = PostingsFormat.forName(TEST_POSTINGSFORMAT); } final DocValuesFormat dvFormat; if ("random".equals(TEST_DOCVALUESFORMAT)) { dvFormat = DocValuesFormat.forName("Lucene45"); } else { dvFormat = DocValuesFormat.forName(TEST_DOCVALUESFORMAT); } codec = new Lucene46Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return format; } @Override public DocValuesFormat getDocValuesFormatForField(String field) { return dvFormat; } @Override public String toString() { return super.toString() + ": " + format.toString() + ", " + dvFormat.toString(); } }; } else if ("SimpleText".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 9 && LuceneTestCase.rarely(random) && !shouldAvoidCodec("SimpleText"))) { codec = new SimpleTextCodec(); } else if ("Appending".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 8 && !shouldAvoidCodec("Appending"))) { codec = new AppendingRWCodec(); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // this is really just Lucene40 with some minor changes } else if ("CheapBastard".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 8 && !shouldAvoidCodec("CheapBastard") && !shouldAvoidCodec("Lucene41"))) { // we also avoid this codec if Lucene41 is avoided, since thats the postings format it uses. codec = new CheapBastardCodec(); } else if ("Asserting".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Asserting"))) { codec = new AssertingCodec(); } else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Compressing"))) { codec = CompressingCodec.randomInstance(random); } else if (!"random".equals(TEST_CODEC)) { codec = Codec.forName(TEST_CODEC); } else if ("random".equals(TEST_POSTINGSFORMAT)) { codec = new RandomCodec(random, avoidCodecs); } else { assert false; } Codec.setDefault(codec); // Initialize locale/ timezone. String testLocale = System.getProperty("tests.locale", "random"); String testTimeZone = System.getProperty("tests.timezone", "random"); // Always pick a random one for consistency (whether tests.locale was specified or not). savedLocale = Locale.getDefault(); Locale randomLocale = randomLocale(random); locale = testLocale.equals("random") ? randomLocale : localeForName(testLocale); Locale.setDefault(locale); // TimeZone.getDefault will set user.timezone to the default timezone of the user's locale. // So store the original property value and restore it at end. restoreProperties.put("user.timezone", System.getProperty("user.timezone")); savedTimeZone = TimeZone.getDefault(); TimeZone randomTimeZone = randomTimeZone(random()); timeZone = testTimeZone.equals("random") ? randomTimeZone : TimeZone.getTimeZone(testTimeZone); TimeZone.setDefault(timeZone); similarity = random().nextBoolean() ? new DefaultSimilarity() : new RandomSimilarityProvider(random()); // Check codec restrictions once at class level. try { checkCodecRestrictions(codec); } catch (AssumptionViolatedException e) { System.err.println( "NOTE: " + e.getMessage() + " Suppressed codecs: " + Arrays.toString(avoidCodecs.toArray())); throw e; } }
/** * Encodes/decodes an inverted index segment. * * <p>Note, when extending this class, the name ({@link #getName}) is written into the index. In * order for the segment to be read, the name must resolve to your implementation via {@link * #forName(String)}. This method uses Java's {@link ServiceLoader Service Provider Interface} (SPI) * to resolve codec names. * * <p>If you implement your own codec, make sure that it has a no-arg constructor so SPI can load * it. * * @see ServiceLoader */ public abstract class Codec implements NamedSPILoader.NamedSPI { private static final NamedSPILoader<Codec> loader = new NamedSPILoader<Codec>(Codec.class); private final String name; /** * Creates a new codec. * * <p>The provided name will be written into the index segment: in order to for the segment to be * read this class should be registered with Java's SPI mechanism (registered in META-INF/ of your * jar file, etc). * * @param name must be all ascii alphanumeric, and less than 128 characters in length. */ protected Codec(String name) { NamedSPILoader.checkServiceName(name); this.name = name; } /** Returns this codec's name */ @Override public final String getName() { return name; } /** Encodes/decodes postings */ public abstract PostingsFormat postingsFormat(); /** Encodes/decodes docvalues */ public abstract DocValuesFormat docValuesFormat(); /** Encodes/decodes stored fields */ public abstract StoredFieldsFormat storedFieldsFormat(); /** Encodes/decodes term vectors */ public abstract TermVectorsFormat termVectorsFormat(); /** Encodes/decodes field infos file */ public abstract FieldInfosFormat fieldInfosFormat(); /** Encodes/decodes segment info file */ public abstract SegmentInfoFormat segmentInfoFormat(); /** Encodes/decodes document normalization values */ public abstract NormsFormat normsFormat(); /** Encodes/decodes live docs */ public abstract LiveDocsFormat liveDocsFormat(); /** looks up a codec by name */ public static Codec forName(String name) { if (loader == null) { throw new IllegalStateException( "You called Codec.forName() before all Codecs could be initialized. " + "This likely happens if you call it from a Codec's ctor."); } return loader.lookup(name); } /** returns a list of all available codec names */ public static Set<String> availableCodecs() { if (loader == null) { throw new IllegalStateException( "You called Codec.availableCodecs() before all Codecs could be initialized. " + "This likely happens if you call it from a Codec's ctor."); } return loader.availableServices(); } /** * Reloads the codec list from the given {@link ClassLoader}. Changes to the codecs are visible * after the method ends, all iterators ({@link #availableCodecs()},...) stay consistent. * * <p><b>NOTE:</b> Only new codecs are added, existing ones are never removed or replaced. * * <p><em>This method is expensive and should only be called for discovery of new codecs on the * given classpath/classloader!</em> */ public static void reloadCodecs(ClassLoader classloader) { loader.reload(classloader); } private static Codec defaultCodec = Codec.forName("Lucene41"); /** expert: returns the default codec used for newly created {@link IndexWriterConfig}s. */ // TODO: should we use this, or maybe a system property is better? public static Codec getDefault() { return defaultCodec; } /** expert: sets the default codec used for newly created {@link IndexWriterConfig}s. */ public static void setDefault(Codec codec) { defaultCodec = codec; } /** * returns the codec's name. Subclasses can override to provide more detail (such as parameters). */ @Override public String toString() { return name; } }
public void testDocsStuckInRAMForever() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setRAMBufferSizeMB(.2); Codec codec = Codec.forName("Lucene49"); iwc.setCodec(codec); iwc.setMergePolicy(NoMergePolicy.INSTANCE); final IndexWriter w = new IndexWriter(dir, iwc); final CountDownLatch startingGun = new CountDownLatch(1); Thread[] threads = new Thread[2]; for (int i = 0; i < threads.length; i++) { final int threadID = i; threads[i] = new Thread() { @Override public void run() { try { startingGun.await(); for (int j = 0; j < 1000; j++) { Document doc = new Document(); doc.add(newStringField("field", "threadID" + threadID, Field.Store.NO)); w.addDocument(doc); } } catch (Exception e) { throw new RuntimeException(e); } } }; threads[i].start(); } startingGun.countDown(); for (Thread t : threads) { t.join(); } Set<String> segSeen = new HashSet<>(); int thread0Count = 0; int thread1Count = 0; // At this point the writer should have 2 thread states w/ docs; now we index with only 1 thread // until we see all 1000 thread0 & thread1 // docs flushed. If the writer incorrectly holds onto previously indexed docs forever then this // will run forever: while (thread0Count < 1000 || thread1Count < 1000) { Document doc = new Document(); doc.add(newStringField("field", "threadIDmain", Field.Store.NO)); w.addDocument(doc); for (String fileName : dir.listAll()) { if (fileName.endsWith(".si")) { String segName = IndexFileNames.parseSegmentName(fileName); if (segSeen.contains(segName) == false) { segSeen.add(segName); SegmentInfo si = new Lucene46SegmentInfoFormat() .getSegmentInfoReader() .read(dir, segName, IOContext.DEFAULT); si.setCodec(codec); SegmentCommitInfo sci = new SegmentCommitInfo(si, 0, -1, -1, -1); SegmentReader sr = new SegmentReader(sci, 1, IOContext.DEFAULT); try { thread0Count += sr.docFreq(new Term("field", "threadID0")); thread1Count += sr.docFreq(new Term("field", "threadID1")); } finally { sr.close(); } } } } } w.close(); dir.close(); }