static {
   assert Codec.forName(Lucene.LATEST_CODEC)
           .getClass()
           .isAssignableFrom(PerFieldMappingPostingFormatCodec.class)
       : "PerFieldMappingPostingFormatCodec must subclass the latest lucene codec: "
           + Lucene.LATEST_CODEC;
 }
 // we want an exception if its not found.
 public void testBogusLookup() {
   try {
     Codec.forName("dskfdskfsdfksdfdsf");
     fail();
   } catch (IllegalArgumentException expected) {
   }
 }
Пример #3
0
 private static Codec readCodec(DataInput input, boolean unsupportedAllowed) throws IOException {
   final String name = input.readString();
   try {
     return Codec.forName(name);
   } catch (IllegalArgumentException e) {
     // give better error messages if we can, first check if this is a legacy codec
     if (unsupportedCodecs.contains(name)) {
       // We should only get here on pre-5.3 indices, but we can't test this until 7.0 when 5.x
       // indices become too old:
       assert unsupportedAllowed;
       IOException newExc =
           new IndexFormatTooOldException(input, "Codec '" + name + "' is too old");
       newExc.initCause(e);
       throw newExc;
     }
     // or maybe it's an old default codec that moved
     if (name.startsWith("Lucene")) {
       throw new IllegalArgumentException(
           "Could not load codec '"
               + name
               + "'.  Did you forget to add lucene-backward-codecs.jar?",
           e);
     }
     throw e;
   }
 }
 public void testLookup() {
   Codec codec = Codec.forName("Lucene410");
   assertEquals("Lucene410", codec.getName());
 }
Пример #5
0
/**
 * Encodes/decodes an inverted index segment.
 *
 * <p>Note, when extending this class, the name ({@link #getName}) is written into the index. In
 * order for the segment to be read, the name must resolve to your implementation via {@link
 * #forName(String)}. This method uses Java's {@link ServiceLoader Service Provider Interface} to
 * resolve codec names.
 *
 * <p>
 *
 * @see ServiceLoader
 */
public abstract class Codec implements NamedSPILoader.NamedSPI {

  private static final NamedSPILoader<Codec> loader = new NamedSPILoader<Codec>(Codec.class);

  private final String name;

  public Codec(String name) {
    NamedSPILoader.checkServiceName(name);
    this.name = name;
  }

  /** Returns this codec's name */
  @Override
  public final String getName() {
    return name;
  }

  /** Encodes/decodes postings */
  public abstract PostingsFormat postingsFormat();

  /** Encodes/decodes docvalues */
  public abstract DocValuesFormat docValuesFormat();

  /** Encodes/decodes stored fields */
  public abstract StoredFieldsFormat storedFieldsFormat();

  /** Encodes/decodes term vectors */
  public abstract TermVectorsFormat termVectorsFormat();

  /** Encodes/decodes field infos file */
  public abstract FieldInfosFormat fieldInfosFormat();

  /** Encodes/decodes segment info file */
  public abstract SegmentInfoFormat segmentInfoFormat();

  /** Encodes/decodes document normalization values */
  public abstract NormsFormat normsFormat();

  /** Encodes/decodes live docs */
  public abstract LiveDocsFormat liveDocsFormat();

  /** looks up a codec by name */
  public static Codec forName(String name) {
    return loader.lookup(name);
  }

  /** returns a list of all available codec names */
  public static Set<String> availableCodecs() {
    return loader.availableServices();
  }

  private static Codec defaultCodec = Codec.forName("Lucene40");

  /** expert: returns the default codec used for newly created {@link IndexWriterConfig}s. */
  // TODO: should we use this, or maybe a system property is better?
  public static Codec getDefault() {
    return defaultCodec;
  }

  /** expert: sets the default codec used for newly created {@link IndexWriterConfig}s. */
  public static void setDefault(Codec codec) {
    defaultCodec = codec;
  }

  @Override
  public String toString() {
    return name;
  }
}
Пример #6
0
  @Test
  public void testGetThatFieldProbabilityRatioIsReflectedInBoost() throws Exception {

    ArgumentCaptor<Float> normalizeCaptor = ArgumentCaptor.forClass(Float.class);

    DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection();

    Directory directory = newDirectory();

    Analyzer analyzer =
        new Analyzer() {
          protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new WhitespaceTokenizer();
            TokenStream filter =
                new WordDelimiterFilter(
                    source,
                    WordDelimiterFilter.GENERATE_WORD_PARTS
                        | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE,
                    null);
            filter = new LowerCaseFilter(filter);
            return new TokenStreamComponents(source, filter);
          }
        };

    IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    conf.setCodec(Codec.forName(TestUtil.LUCENE_CODEC));
    IndexWriter indexWriter = new IndexWriter(directory, conf);

    // Both fields f1 and f2 have 10 terms in total.
    // f1: the search terms (abc def) make 100% of all terms in f1
    // f2: the search terms (abc def) make 50% of all terms in f2
    // --> we expect that the sum of the boost factors for terms in bq(+f1:abc, +f1:def)
    // equals 2 * sum of the boost factors for terms in bq(+f2:abc, +f2:def)

    PRMSFieldBoostTest.addNumDocs("f1", "abc def", indexWriter, 2);
    PRMSFieldBoostTest.addNumDocs("f1", "abc", indexWriter, 4);
    PRMSFieldBoostTest.addNumDocs("f1", "def", indexWriter, 2);
    PRMSFieldBoostTest.addNumDocs("f2", "abc def", indexWriter, 1);
    PRMSFieldBoostTest.addNumDocs("f2", "abc", indexWriter, 2);
    PRMSFieldBoostTest.addNumDocs("f2", "def", indexWriter, 1);
    PRMSFieldBoostTest.addNumDocs("f2", "ghi", indexWriter, 5);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(similarity);

    Map<String, Float> fields = new HashMap<>();
    fields.put("f1", 1f);
    fields.put("f2", 1f);
    SearchFieldsAndBoosting searchFieldsAndBoosting =
        new SearchFieldsAndBoosting(FieldBoostModel.PRMS, fields, fields, 0.8f);

    LuceneQueryBuilder queryBuilder =
        new LuceneQueryBuilder(dfc, analyzer, searchFieldsAndBoosting, 0.01f, null);

    WhiteSpaceQuerqyParser parser = new WhiteSpaceQuerqyParser();

    Query query = queryBuilder.createQuery(parser.parse("AbcDef"));
    dfc.finishedUserQuery();

    assertTrue(query instanceof DisjunctionMaxQuery);

    DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query;
    List<Query> disjuncts = dmq.getDisjuncts();
    assertEquals(2, disjuncts.size());

    Query disjunct1 = disjuncts.get(0);
    if (disjunct1 instanceof BoostQuery) {
      disjunct1 = ((BoostQuery) disjunct1).getQuery();
    }
    assertTrue(disjunct1 instanceof BooleanQuery);

    BooleanQuery bq1 = (BooleanQuery) disjunct1;

    Query disjunct2 = disjuncts.get(1);
    if (disjunct2 instanceof BoostQuery) {
      disjunct2 = ((BoostQuery) disjunct2).getQuery();
    }
    assertTrue(disjunct2 instanceof BooleanQuery);

    BooleanQuery bq2 = (BooleanQuery) disjunct2;

    final Weight weight1 = bq1.createWeight(indexSearcher, true);
    weight1.normalize(0.1f, 4f);

    final Weight weight2 = bq2.createWeight(indexSearcher, true);
    weight2.normalize(0.1f, 4f);

    Mockito.verify(simWeight, times(4)).normalize(eq(0.1f), normalizeCaptor.capture());
    final List<Float> capturedBoosts = normalizeCaptor.getAllValues();

    // capturedBoosts = boosts of [bq1.term1, bq1.term2, bq2.term1, bq2.term2 ]
    assertEquals(capturedBoosts.get(0), capturedBoosts.get(1), 0.00001);
    assertEquals(capturedBoosts.get(2), capturedBoosts.get(3), 0.00001);
    assertEquals(2f, capturedBoosts.get(0) / capturedBoosts.get(3), 0.00001);

    indexReader.close();
    directory.close();
    analyzer.close();
  }
  @Override
  protected void before() throws Exception {
    // enable this by default, for IDE consistency with ant tests (as its the default from ant)
    // TODO: really should be in solr base classes, but some extend LTC directly.
    // we do this in beforeClass, because some tests currently disable it
    restoreProperties.put("solr.directoryFactory", System.getProperty("solr.directoryFactory"));
    if (System.getProperty("solr.directoryFactory") == null) {
      System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockDirectoryFactory");
    }

    // Restore more Solr properties.
    restoreProperties.put("solr.solr.home", System.getProperty("solr.solr.home"));
    restoreProperties.put("solr.data.dir", System.getProperty("solr.data.dir"));

    // if verbose: print some debugging stuff about which codecs are loaded.
    if (VERBOSE) {
      Set<String> codecs = Codec.availableCodecs();
      for (String codec : codecs) {
        System.out.println(
            "Loaded codec: '" + codec + "': " + Codec.forName(codec).getClass().getName());
      }

      Set<String> postingsFormats = PostingsFormat.availablePostingsFormats();
      for (String postingsFormat : postingsFormats) {
        System.out.println(
            "Loaded postingsFormat: '"
                + postingsFormat
                + "': "
                + PostingsFormat.forName(postingsFormat).getClass().getName());
      }
    }

    savedInfoStream = InfoStream.getDefault();
    final Random random = RandomizedContext.current().getRandom();
    final boolean v = random.nextBoolean();
    if (INFOSTREAM) {
      InfoStream.setDefault(new ThreadNameFixingPrintStreamInfoStream(System.out));
    } else if (v) {
      InfoStream.setDefault(new NullInfoStream());
    }

    Class<?> targetClass = RandomizedContext.current().getTargetClass();
    avoidCodecs = new HashSet<String>();
    if (targetClass.isAnnotationPresent(SuppressCodecs.class)) {
      SuppressCodecs a = targetClass.getAnnotation(SuppressCodecs.class);
      avoidCodecs.addAll(Arrays.asList(a.value()));
    }

    // set back to default
    LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = false;
    LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false;

    savedCodec = Codec.getDefault();
    int randomVal = random.nextInt(10);
    if ("Lucene3x".equals(TEST_CODEC)
        || ("random".equals(TEST_CODEC)
            && "random".equals(TEST_POSTINGSFORMAT)
            && "random".equals(TEST_DOCVALUESFORMAT)
            && randomVal == 3
            && !shouldAvoidCodec("Lucene3x"))) { // preflex-only setup
      codec = Codec.forName("Lucene3x");
      assert (codec instanceof PreFlexRWCodec)
          : "fix your classpath to have tests-framework.jar before lucene-core.jar";
      LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = true;
    } else if ("Lucene40".equals(TEST_CODEC)
        || ("random".equals(TEST_CODEC)
            && "random".equals(TEST_POSTINGSFORMAT)
            && randomVal == 0
            && !shouldAvoidCodec("Lucene40"))) { // 4.0 setup
      codec = Codec.forName("Lucene40");
      LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true;
      assert codec instanceof Lucene40RWCodec
          : "fix your classpath to have tests-framework.jar before lucene-core.jar";
      assert (PostingsFormat.forName("Lucene40") instanceof Lucene40RWPostingsFormat)
          : "fix your classpath to have tests-framework.jar before lucene-core.jar";
    } else if ("Lucene41".equals(TEST_CODEC)
        || ("random".equals(TEST_CODEC)
            && "random".equals(TEST_POSTINGSFORMAT)
            && "random".equals(TEST_DOCVALUESFORMAT)
            && randomVal == 1
            && !shouldAvoidCodec("Lucene41"))) {
      codec = Codec.forName("Lucene41");
      LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true;
      assert codec instanceof Lucene41RWCodec
          : "fix your classpath to have tests-framework.jar before lucene-core.jar";
    } else if ("Lucene42".equals(TEST_CODEC)
        || ("random".equals(TEST_CODEC)
            && "random".equals(TEST_POSTINGSFORMAT)
            && "random".equals(TEST_DOCVALUESFORMAT)
            && randomVal == 2
            && !shouldAvoidCodec("Lucene42"))) {
      codec = Codec.forName("Lucene42");
      LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true;
      assert codec instanceof Lucene42RWCodec
          : "fix your classpath to have tests-framework.jar before lucene-core.jar";
    } else if ("Lucene45".equals(TEST_CODEC)
        || ("random".equals(TEST_CODEC)
            && "random".equals(TEST_POSTINGSFORMAT)
            && "random".equals(TEST_DOCVALUESFORMAT)
            && randomVal == 5
            && !shouldAvoidCodec("Lucene45"))) {
      codec = Codec.forName("Lucene45");
      LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true;
      assert codec instanceof Lucene45RWCodec
          : "fix your classpath to have tests-framework.jar before lucene-core.jar";
    } else if (("random".equals(TEST_POSTINGSFORMAT) == false)
        || ("random".equals(TEST_DOCVALUESFORMAT) == false)) {
      // the user wired postings or DV: this is messy
      // refactor into RandomCodec....

      final PostingsFormat format;
      if ("random".equals(TEST_POSTINGSFORMAT)) {
        format = PostingsFormat.forName("Lucene41");
      } else {
        format = PostingsFormat.forName(TEST_POSTINGSFORMAT);
      }

      final DocValuesFormat dvFormat;
      if ("random".equals(TEST_DOCVALUESFORMAT)) {
        dvFormat = DocValuesFormat.forName("Lucene45");
      } else {
        dvFormat = DocValuesFormat.forName(TEST_DOCVALUESFORMAT);
      }

      codec =
          new Lucene46Codec() {
            @Override
            public PostingsFormat getPostingsFormatForField(String field) {
              return format;
            }

            @Override
            public DocValuesFormat getDocValuesFormatForField(String field) {
              return dvFormat;
            }

            @Override
            public String toString() {
              return super.toString() + ": " + format.toString() + ", " + dvFormat.toString();
            }
          };
    } else if ("SimpleText".equals(TEST_CODEC)
        || ("random".equals(TEST_CODEC)
            && randomVal == 9
            && LuceneTestCase.rarely(random)
            && !shouldAvoidCodec("SimpleText"))) {
      codec = new SimpleTextCodec();
    } else if ("Appending".equals(TEST_CODEC)
        || ("random".equals(TEST_CODEC) && randomVal == 8 && !shouldAvoidCodec("Appending"))) {
      codec = new AppendingRWCodec();
      LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE =
          true; // this is really just Lucene40 with some minor changes
    } else if ("CheapBastard".equals(TEST_CODEC)
        || ("random".equals(TEST_CODEC)
            && randomVal == 8
            && !shouldAvoidCodec("CheapBastard")
            && !shouldAvoidCodec("Lucene41"))) {
      // we also avoid this codec if Lucene41 is avoided, since thats the postings format it uses.
      codec = new CheapBastardCodec();
    } else if ("Asserting".equals(TEST_CODEC)
        || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Asserting"))) {
      codec = new AssertingCodec();
    } else if ("Compressing".equals(TEST_CODEC)
        || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Compressing"))) {
      codec = CompressingCodec.randomInstance(random);
    } else if (!"random".equals(TEST_CODEC)) {
      codec = Codec.forName(TEST_CODEC);
    } else if ("random".equals(TEST_POSTINGSFORMAT)) {
      codec = new RandomCodec(random, avoidCodecs);
    } else {
      assert false;
    }
    Codec.setDefault(codec);

    // Initialize locale/ timezone.
    String testLocale = System.getProperty("tests.locale", "random");
    String testTimeZone = System.getProperty("tests.timezone", "random");

    // Always pick a random one for consistency (whether tests.locale was specified or not).
    savedLocale = Locale.getDefault();
    Locale randomLocale = randomLocale(random);
    locale = testLocale.equals("random") ? randomLocale : localeForName(testLocale);
    Locale.setDefault(locale);

    // TimeZone.getDefault will set user.timezone to the default timezone of the user's locale.
    // So store the original property value and restore it at end.
    restoreProperties.put("user.timezone", System.getProperty("user.timezone"));
    savedTimeZone = TimeZone.getDefault();
    TimeZone randomTimeZone = randomTimeZone(random());
    timeZone = testTimeZone.equals("random") ? randomTimeZone : TimeZone.getTimeZone(testTimeZone);
    TimeZone.setDefault(timeZone);
    similarity =
        random().nextBoolean() ? new DefaultSimilarity() : new RandomSimilarityProvider(random());

    // Check codec restrictions once at class level.
    try {
      checkCodecRestrictions(codec);
    } catch (AssumptionViolatedException e) {
      System.err.println(
          "NOTE: "
              + e.getMessage()
              + " Suppressed codecs: "
              + Arrays.toString(avoidCodecs.toArray()));
      throw e;
    }
  }
Пример #8
0
/**
 * Encodes/decodes an inverted index segment.
 *
 * <p>Note, when extending this class, the name ({@link #getName}) is written into the index. In
 * order for the segment to be read, the name must resolve to your implementation via {@link
 * #forName(String)}. This method uses Java's {@link ServiceLoader Service Provider Interface} (SPI)
 * to resolve codec names.
 *
 * <p>If you implement your own codec, make sure that it has a no-arg constructor so SPI can load
 * it.
 *
 * @see ServiceLoader
 */
public abstract class Codec implements NamedSPILoader.NamedSPI {

  private static final NamedSPILoader<Codec> loader = new NamedSPILoader<Codec>(Codec.class);

  private final String name;

  /**
   * Creates a new codec.
   *
   * <p>The provided name will be written into the index segment: in order to for the segment to be
   * read this class should be registered with Java's SPI mechanism (registered in META-INF/ of your
   * jar file, etc).
   *
   * @param name must be all ascii alphanumeric, and less than 128 characters in length.
   */
  protected Codec(String name) {
    NamedSPILoader.checkServiceName(name);
    this.name = name;
  }

  /** Returns this codec's name */
  @Override
  public final String getName() {
    return name;
  }

  /** Encodes/decodes postings */
  public abstract PostingsFormat postingsFormat();

  /** Encodes/decodes docvalues */
  public abstract DocValuesFormat docValuesFormat();

  /** Encodes/decodes stored fields */
  public abstract StoredFieldsFormat storedFieldsFormat();

  /** Encodes/decodes term vectors */
  public abstract TermVectorsFormat termVectorsFormat();

  /** Encodes/decodes field infos file */
  public abstract FieldInfosFormat fieldInfosFormat();

  /** Encodes/decodes segment info file */
  public abstract SegmentInfoFormat segmentInfoFormat();

  /** Encodes/decodes document normalization values */
  public abstract NormsFormat normsFormat();

  /** Encodes/decodes live docs */
  public abstract LiveDocsFormat liveDocsFormat();

  /** looks up a codec by name */
  public static Codec forName(String name) {
    if (loader == null) {
      throw new IllegalStateException(
          "You called Codec.forName() before all Codecs could be initialized. "
              + "This likely happens if you call it from a Codec's ctor.");
    }
    return loader.lookup(name);
  }

  /** returns a list of all available codec names */
  public static Set<String> availableCodecs() {
    if (loader == null) {
      throw new IllegalStateException(
          "You called Codec.availableCodecs() before all Codecs could be initialized. "
              + "This likely happens if you call it from a Codec's ctor.");
    }
    return loader.availableServices();
  }

  /**
   * Reloads the codec list from the given {@link ClassLoader}. Changes to the codecs are visible
   * after the method ends, all iterators ({@link #availableCodecs()},...) stay consistent.
   *
   * <p><b>NOTE:</b> Only new codecs are added, existing ones are never removed or replaced.
   *
   * <p><em>This method is expensive and should only be called for discovery of new codecs on the
   * given classpath/classloader!</em>
   */
  public static void reloadCodecs(ClassLoader classloader) {
    loader.reload(classloader);
  }

  private static Codec defaultCodec = Codec.forName("Lucene41");

  /** expert: returns the default codec used for newly created {@link IndexWriterConfig}s. */
  // TODO: should we use this, or maybe a system property is better?
  public static Codec getDefault() {
    return defaultCodec;
  }

  /** expert: sets the default codec used for newly created {@link IndexWriterConfig}s. */
  public static void setDefault(Codec codec) {
    defaultCodec = codec;
  }

  /**
   * returns the codec's name. Subclasses can override to provide more detail (such as parameters).
   */
  @Override
  public String toString() {
    return name;
  }
}
  public void testDocsStuckInRAMForever() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwc.setRAMBufferSizeMB(.2);
    Codec codec = Codec.forName("Lucene49");
    iwc.setCodec(codec);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    final IndexWriter w = new IndexWriter(dir, iwc);
    final CountDownLatch startingGun = new CountDownLatch(1);
    Thread[] threads = new Thread[2];
    for (int i = 0; i < threads.length; i++) {
      final int threadID = i;
      threads[i] =
          new Thread() {
            @Override
            public void run() {
              try {
                startingGun.await();
                for (int j = 0; j < 1000; j++) {
                  Document doc = new Document();
                  doc.add(newStringField("field", "threadID" + threadID, Field.Store.NO));
                  w.addDocument(doc);
                }
              } catch (Exception e) {
                throw new RuntimeException(e);
              }
            }
          };
      threads[i].start();
    }

    startingGun.countDown();
    for (Thread t : threads) {
      t.join();
    }

    Set<String> segSeen = new HashSet<>();
    int thread0Count = 0;
    int thread1Count = 0;

    // At this point the writer should have 2 thread states w/ docs; now we index with only 1 thread
    // until we see all 1000 thread0 & thread1
    // docs flushed.  If the writer incorrectly holds onto previously indexed docs forever then this
    // will run forever:
    while (thread0Count < 1000 || thread1Count < 1000) {
      Document doc = new Document();
      doc.add(newStringField("field", "threadIDmain", Field.Store.NO));
      w.addDocument(doc);

      for (String fileName : dir.listAll()) {
        if (fileName.endsWith(".si")) {
          String segName = IndexFileNames.parseSegmentName(fileName);
          if (segSeen.contains(segName) == false) {
            segSeen.add(segName);
            SegmentInfo si =
                new Lucene46SegmentInfoFormat()
                    .getSegmentInfoReader()
                    .read(dir, segName, IOContext.DEFAULT);
            si.setCodec(codec);
            SegmentCommitInfo sci = new SegmentCommitInfo(si, 0, -1, -1, -1);
            SegmentReader sr = new SegmentReader(sci, 1, IOContext.DEFAULT);
            try {
              thread0Count += sr.docFreq(new Term("field", "threadID0"));
              thread1Count += sr.docFreq(new Term("field", "threadID1"));
            } finally {
              sr.close();
            }
          }
        }
      }
    }

    w.close();
    dir.close();
  }