Beispiel #1
0
 static {
   // Id
   IdFielType = new FieldType();
   IdFielType.setStored(true);
   IdFielType.setTokenized(false);
   IdFielType.setOmitNorms(true);
   IdFielType.setIndexOptions(IndexOptions.DOCS);
   IdFielType.freeze();
   // content
   ContentFielType = new FieldType();
   ContentFielType.setStored(false);
   ContentFielType.setTokenized(true);
   ContentFielType.setOmitNorms(false);
   ContentFielType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
   ContentFielType.freeze();
   // title
   TitleFielType = new FieldType();
   TitleFielType.setStored(true);
   TitleFielType.setTokenized(true);
   TitleFielType.setOmitNorms(false);
   TitleFielType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
   TitleFielType.freeze();
   // onlyForStore
   OnLyStoreFieldType = new FieldType();
   OnLyStoreFieldType.setStored(true);
   OnLyStoreFieldType.setTokenized(false);
   OnLyStoreFieldType.setOmitNorms(false);
   OnLyStoreFieldType.setIndexOptions(IndexOptions.NONE);
   OnLyStoreFieldType.freeze();
 }
Beispiel #2
0
  /*
   * If you write your own analyzer please register it here
   */
  static {
    FileAnalyzerFactory[] analyzers = {
      DEFAULT_ANALYZER_FACTORY,
      new IgnorantAnalyzerFactory(),
      new BZip2AnalyzerFactory(),
      new XMLAnalyzerFactory(),
      new TroffAnalyzerFactory(),
      new ELFAnalyzerFactory(),
      new JavaClassAnalyzerFactory(),
      new ImageAnalyzerFactory(),
      JarAnalyzerFactory.DEFAULT_INSTANCE,
      ZipAnalyzerFactory.DEFAULT_INSTANCE,
      new TarAnalyzerFactory(),
      new CAnalyzerFactory(),
      new CSharpAnalyzerFactory(),
      new VBAnalyzerFactory(),
      new CxxAnalyzerFactory(),
      new ErlangAnalyzerFactory(),
      new ShAnalyzerFactory(),
      PlainAnalyzerFactory.DEFAULT_INSTANCE,
      new UuencodeAnalyzerFactory(),
      new GZIPAnalyzerFactory(),
      new JavaAnalyzerFactory(),
      new JavaScriptAnalyzerFactory(),
      new PythonAnalyzerFactory(),
      new RustAnalyzerFactory(),
      new PerlAnalyzerFactory(),
      new PhpAnalyzerFactory(),
      new LispAnalyzerFactory(),
      new TclAnalyzerFactory(),
      new ScalaAnalyzerFactory(),
      new ClojureAnalyzerFactory(),
      new SQLAnalyzerFactory(),
      new PLSQLAnalyzerFactory(),
      new FortranAnalyzerFactory(),
      new HaskellAnalyzerFactory(),
      new GolangAnalyzerFactory(),
      new LuaAnalyzerFactory(),
      new PascalAnalyzerFactory()
    };

    for (FileAnalyzerFactory analyzer : analyzers) {
      registerAnalyzer(analyzer);
    }

    for (FileAnalyzerFactory analyzer : analyzers) {
      if (analyzer.getName() != null && !analyzer.getName().isEmpty()) {
        fileTypeDescriptions.put(analyzer.getAnalyzer().getFileTypeName(), analyzer.getName());
      }
    }

    string_ft_stored_nanalyzed_norms.setOmitNorms(false);
    string_ft_nstored_nanalyzed_norms.setOmitNorms(false);
  }
  static {
    TYPE_NOT_STORED.setOmitNorms(true);
    TYPE_NOT_STORED.setIndexOptions(IndexOptions.DOCS);
    TYPE_NOT_STORED.setTokenized(false);
    TYPE_NOT_STORED.freeze();

    TYPE_STORED.setOmitNorms(true);
    TYPE_STORED.setIndexOptions(IndexOptions.DOCS);
    TYPE_STORED.setStored(true);
    TYPE_STORED.setTokenized(false);
    TYPE_STORED.freeze();
  }
  /**
   * Translates the pre-4.0 enums for specifying how a field should be indexed into the 4.0 {@link
   * FieldType} approach.
   *
   * @deprecated This is here only to ease transition from the pre-4.0 APIs.
   */
  @Deprecated
  public static final FieldType translateFieldType(
      Store store, Index index, TermVector termVector) {
    final FieldType ft = new FieldType();

    ft.setStored(store == Store.YES);

    switch (index) {
      case ANALYZED:
        ft.setIndexed(true);
        ft.setTokenized(true);
        break;
      case ANALYZED_NO_NORMS:
        ft.setIndexed(true);
        ft.setTokenized(true);
        ft.setOmitNorms(true);
        break;
      case NOT_ANALYZED:
        ft.setIndexed(true);
        ft.setTokenized(false);
        break;
      case NOT_ANALYZED_NO_NORMS:
        ft.setIndexed(true);
        ft.setTokenized(false);
        ft.setOmitNorms(true);
        break;
      case NO:
        break;
    }

    switch (termVector) {
      case NO:
        break;
      case YES:
        ft.setStoreTermVectors(true);
        break;
      case WITH_POSITIONS:
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorPositions(true);
        break;
      case WITH_OFFSETS:
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorOffsets(true);
        break;
      case WITH_POSITIONS_OFFSETS:
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorPositions(true);
        ft.setStoreTermVectorOffsets(true);
        break;
    }
    ft.freeze();
    return ft;
  }
  /**
   * Subclass can override this method to change the field type of the text field e.g. to change the
   * index options
   */
  protected FieldType getTextFieldType() {
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_ONLY);
    ft.setOmitNorms(true);

    return ft;
  }
  /**
   * Used for adding a document when a field needs to be created from a type and a string.
   *
   * <p>By default, the indexed value is the same as the stored value (taken from toInternal()).
   * Having a different representation for external, internal, and indexed would present quite a few
   * problems given the current Lucene architecture. An analyzer for adding docs would need to
   * translate internal-&gt;indexed while an analyzer for querying would need to translate
   * external-&gt;indexed.
   *
   * <p>The only other alternative to having internal==indexed would be to have internal==external.
   * In this case, toInternal should convert to the indexed representation, toExternal() should do
   * nothing, and createField() should *not* call toInternal, but use the external value and set
   * tokenized=true to get Lucene to convert to the internal(indexed) form. :TODO: clean up and
   * clarify this explanation.
   *
   * @see #toInternal
   */
  public StorableField createField(SchemaField field, Object value, float boost) {
    if (!field.indexed() && !field.stored()) {
      if (log.isTraceEnabled()) log.trace("Ignoring unindexed/unstored field: " + field);
      return null;
    }

    String val;
    try {
      val = toInternal(value.toString());
    } catch (RuntimeException e) {
      throw new SolrException(
          SolrException.ErrorCode.SERVER_ERROR,
          "Error while creating field '" + field + "' from value '" + value + "'",
          e);
    }
    if (val == null) return null;

    org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType();
    newType.setTokenized(field.isTokenized());
    newType.setStored(field.stored());
    newType.setOmitNorms(field.omitNorms());
    newType.setIndexOptions(field.indexed() ? getIndexOptions(field, val) : IndexOptions.NONE);
    newType.setStoreTermVectors(field.storeTermVector());
    newType.setStoreTermVectorOffsets(field.storeTermOffsets());
    newType.setStoreTermVectorPositions(field.storeTermPositions());
    newType.setStoreTermVectorPayloads(field.storeTermPayloads());

    return createField(field.getName(), val, newType, boost);
  }
  // Verifies no *.nrm exists when all fields omit norms:
  public void testNoNrmFile() throws Throwable {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(3)
                .setMergePolicy(newLogMergePolicy()));
    LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
    lmp.setMergeFactor(2);
    lmp.setNoCFSRatio(0.0);
    Document d = new Document();

    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setOmitNorms(true);
    Field f1 = newField("f1", "This field has no norms", customType);
    d.add(f1);

    for (int i = 0; i < 30; i++) {
      writer.addDocument(d);
    }

    writer.commit();

    assertNoNrm(ram);

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    assertNoNrm(ram);
    ram.close();
  }
  @Override
  public void setUp() throws Exception {
    super.setUp();
    dir = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(
            random(),
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(_TestUtil.nextInt(random(), 50, 1000)));

    Document doc = new Document();
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setOmitNorms(true);
    Field field = newField("field", "", customType);
    doc.add(field);

    NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
    for (int i = 0; i < 1000; i++) {
      field.setStringValue(df.format(i));
      writer.addDocument(doc);
    }

    reader = writer.getReader();
    writer.close();
    searcher = newSearcher(reader);
  }
 /*
  * Test per field codec support - adding fields with random codecs
  */
 @Test
 public void testStressPerFieldCodec() throws IOException {
   Directory dir = newDirectory(random());
   final int docsPerRound = 97;
   int numRounds = atLeast(1);
   for (int i = 0; i < numRounds; i++) {
     int num = TestUtil.nextInt(random(), 30, 60);
     IndexWriterConfig config =
         newIndexWriterConfig(random(), TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     config.setOpenMode(OpenMode.CREATE_OR_APPEND);
     IndexWriter writer = newWriter(dir, config);
     for (int j = 0; j < docsPerRound; j++) {
       final Document doc = new Document();
       for (int k = 0; k < num; k++) {
         FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
         customType.setTokenized(random().nextBoolean());
         customType.setOmitNorms(random().nextBoolean());
         Field field =
             newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128), customType);
         doc.add(field);
       }
       writer.addDocument(doc);
     }
     if (random().nextBoolean()) {
       writer.forceMerge(1);
     }
     writer.commit();
     assertEquals((i + 1) * docsPerRound, writer.maxDoc());
     writer.close();
   }
   dir.close();
 }
 static {
   FIELD_TYPE.setIndexed(true);
   FIELD_TYPE.setTokenized(false);
   FIELD_TYPE.setStored(true);
   FIELD_TYPE.setOmitNorms(true);
   FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY);
   FIELD_TYPE.freeze();
 }
 static {
   TYPE.setIndexed(true);
   TYPE.setOmitNorms(true);
   TYPE.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS);
   TYPE.setTokenized(true);
   TYPE.setStoreTermVectors(true);
   TYPE.freeze();
 }
    static {
      FIELD_TYPE.setIndexed(true);
      FIELD_TYPE.setTokenized(false);
      FIELD_TYPE.setStored(true);
      FIELD_TYPE.setOmitNorms(true);
      FIELD_TYPE.setIndexOptions(
          FieldInfo.IndexOptions
              .DOCS_AND_FREQS_AND_POSITIONS); // we store payload (otherwise, we really need just
                                              // docs)
      FIELD_TYPE.freeze();

      NESTED_FIELD_TYPE.setIndexed(true);
      NESTED_FIELD_TYPE.setTokenized(false);
      NESTED_FIELD_TYPE.setStored(false);
      NESTED_FIELD_TYPE.setOmitNorms(true);
      // we can set this to another index option when we move away from storing payload..
      // NESTED_FIELD_TYPE.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY);
      NESTED_FIELD_TYPE.freeze();
    }
Beispiel #13
0
 static {
   UID_FIELD_TYPE.setIndexed(true);
   UID_FIELD_TYPE.setTokenized(false);
   UID_FIELD_TYPE.setStored(true);
   UID_FIELD_TYPE.setOmitNorms(true);
   UID_FIELD_TYPE.setIndexOptions(
       FieldInfo.IndexOptions
           .DOCS_AND_FREQS_AND_POSITIONS); // we store payload (otherwise, we really need just
                                           // docs)
   UID_FIELD_TYPE.freeze();
 }
  // Tests whether merging of docs that have different
  // omitNorms for the same field works
  public void testMixedMerge() throws Exception {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(3)
                .setMergePolicy(newLogMergePolicy(2)));
    Document d = new Document();

    // this field will have norms
    Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
    d.add(f1);

    // this field will NOT have norms
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setOmitNorms(true);
    Field f2 = newField("f2", "This field has NO norms in all docs", customType);
    d.add(f2);

    for (int i = 0; i < 30; i++) {
      writer.addDocument(d);
    }

    // now we add another document which has norms for field f2 and not for f1 and verify if the
    // SegmentMerger
    // keep things constant
    d = new Document();

    // Reverese
    d.add(newField("f1", "This field has norms", customType));

    d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO));

    for (int i = 0; i < 30; i++) {
      writer.addDocument(d);
    }

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram));
    FieldInfos fi = reader.getFieldInfos();
    assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms());
    assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());

    reader.close();
    ram.close();
  }
  /**
   * Tests various combinations of omitNorms=true/false, the field not existing at all, ensuring
   * that only omitNorms is 'viral'. Internally checks that MultiNorms.norms() is consistent
   * (returns the same bytes) as the fully merged equivalent.
   */
  public void testOmitNormsCombos() throws IOException {
    // indexed with norms
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    Field norms = new Field("foo", "a", customType);
    // indexed without norms
    FieldType customType1 = new FieldType(TextField.TYPE_STORED);
    customType1.setOmitNorms(true);
    Field noNorms = new Field("foo", "a", customType1);
    // not indexed, but stored
    FieldType customType2 = new FieldType();
    customType2.setStored(true);
    Field noIndex = new Field("foo", "a", customType2);
    // not indexed but stored, omitNorms is set
    FieldType customType3 = new FieldType();
    customType3.setStored(true);
    customType3.setOmitNorms(true);
    Field noNormsNoIndex = new Field("foo", "a", customType3);
    // not indexed nor stored (doesnt exist at all, we index a different field instead)
    Field emptyNorms = new Field("bar", "a", customType);

    assertNotNull(getNorms("foo", norms, norms));
    assertNull(getNorms("foo", norms, noNorms));
    assertNotNull(getNorms("foo", norms, noIndex));
    assertNotNull(getNorms("foo", norms, noNormsNoIndex));
    assertNotNull(getNorms("foo", norms, emptyNorms));
    assertNull(getNorms("foo", noNorms, noNorms));
    assertNull(getNorms("foo", noNorms, noIndex));
    assertNull(getNorms("foo", noNorms, noNormsNoIndex));
    assertNull(getNorms("foo", noNorms, emptyNorms));
    assertNull(getNorms("foo", noIndex, noIndex));
    assertNull(getNorms("foo", noIndex, noNormsNoIndex));
    assertNull(getNorms("foo", noIndex, emptyNorms));
    assertNull(getNorms("foo", noNormsNoIndex, noNormsNoIndex));
    assertNull(getNorms("foo", noNormsNoIndex, emptyNorms));
    assertNull(getNorms("foo", emptyNorms, emptyNorms));
  }
 @Override
 public StringFieldMapper build(BuilderContext context) {
   if (positionOffsetGap > 0) {
     indexAnalyzer = new NamedAnalyzer(indexAnalyzer, positionOffsetGap);
     searchAnalyzer = new NamedAnalyzer(searchAnalyzer, positionOffsetGap);
     searchQuotedAnalyzer = new NamedAnalyzer(searchQuotedAnalyzer, positionOffsetGap);
   }
   // if the field is not analyzed, then by default, we should omit norms and have docs only
   // index options, as probably what the user really wants
   // if they are set explicitly, we will use those values
   // we also change the values on the default field type so that toXContent emits what
   // differs from the defaults
   FieldType defaultFieldType = new FieldType(Defaults.FIELD_TYPE);
   if (fieldType.indexOptions() != IndexOptions.NONE && !fieldType.tokenized()) {
     defaultFieldType.setOmitNorms(true);
     defaultFieldType.setIndexOptions(IndexOptions.DOCS);
     if (!omitNormsSet && boost == Defaults.BOOST) {
       fieldType.setOmitNorms(true);
     }
     if (!indexOptionsSet) {
       fieldType.setIndexOptions(IndexOptions.DOCS);
     }
   }
   defaultFieldType.freeze();
   StringFieldMapper fieldMapper =
       new StringFieldMapper(
           buildNames(context),
           boost,
           fieldType,
           defaultFieldType,
           docValues,
           nullValue,
           indexAnalyzer,
           searchAnalyzer,
           searchQuotedAnalyzer,
           positionOffsetGap,
           ignoreAbove,
           similarity,
           normsLoading,
           fieldDataSettings,
           context.indexSettings(),
           multiFieldsBuilder.build(this, context),
           copyTo);
   fieldMapper.includeInAll(includeInAll);
   return fieldMapper;
 }
 private IndexReader createIndex(int docCount, int facetFields, boolean ram)
     throws CorruptIndexException, LockObtainFailedException, IOException {
   Directory directory;
   if (ram) {
     directory = new RAMDirectory();
   } else {
     File dir = new File("./target/tmp/facet_tmp");
     if (dir.exists()) {
       directory = FSDirectory.open(dir);
       if (DirectoryReader.indexExists(directory)) {
         DirectoryReader reader = DirectoryReader.open(directory);
         if (reader.numDocs() == docCount) {
           return reader;
         }
         reader.close();
         directory.close();
       }
     }
     rmr(dir);
     directory = FSDirectory.open(dir);
   }
   IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer());
   IndexWriter writer = new IndexWriter(directory, conf);
   FieldType fieldType = new FieldType();
   fieldType.setStored(true);
   fieldType.setIndexed(true);
   fieldType.setOmitNorms(true);
   long start = System.nanoTime();
   for (int i = 0; i < docCount; i++) {
     long now = System.nanoTime();
     if (start + TimeUnit.SECONDS.toNanos(5) < now) {
       System.out.println("Indexing doc " + i + " of " + docCount);
       start = System.nanoTime();
     }
     Document document = new Document();
     document.add(new Field("f1", "value", fieldType));
     document.add(new Field("f2", "v" + i, fieldType));
     for (int f = 0; f < facetFields; f++) {
       document.add(new Field("facet" + f, "value", fieldType));
     }
     writer.addDocument(document);
   }
   writer.close();
   return DirectoryReader.open(directory);
 }
Beispiel #18
0
  /**
   * Construct Indexer
   *
   * @param directory the main BlackLab index directory
   * @param create if true, creates a new index; otherwise, appends to existing index
   * @param docIndexerClass how to index the files, or null to autodetect
   * @param indexTemplateFile JSON file to use as template for index structure / metadata (if
   *     creating new index)
   * @throws DocumentFormatException if no DocIndexer was specified and autodetection failed
   * @throws IOException
   */
  public Indexer(
      File directory,
      boolean create,
      Class<? extends DocIndexer> docIndexerClass,
      File indexTemplateFile)
      throws DocumentFormatException, IOException {
    this.docIndexerClass = docIndexerClass;

    searcher = Searcher.openForWriting(directory, create, indexTemplateFile);
    if (!create) searcher.getIndexStructure().setModified();

    if (this.docIndexerClass == null) {
      // No DocIndexer supplied; try to detect it from the index
      // metadata.
      String formatId = searcher.getIndexStructure().getDocumentFormat();
      if (formatId != null && formatId.length() > 0)
        setDocIndexer(DocumentFormats.getIndexerClass(formatId));
      else {
        throw new DocumentFormatException("Cannot detect document format for index!");
      }
    }

    metadataFieldTypeTokenized = new FieldType();
    metadataFieldTypeTokenized.setStored(true);
    metadataFieldTypeTokenized.setIndexed(true);
    metadataFieldTypeTokenized.setTokenized(true);
    metadataFieldTypeTokenized.setOmitNorms(true); // @@@ <-- depending on setting?
    metadataFieldTypeTokenized.setStoreTermVectors(true);
    metadataFieldTypeTokenized.setStoreTermVectorPositions(true);
    metadataFieldTypeTokenized.setStoreTermVectorOffsets(true);
    metadataFieldTypeTokenized.freeze();

    metadataFieldTypeUntokenized = new FieldType(metadataFieldTypeTokenized);
    metadataFieldTypeUntokenized.setTokenized(false);
    metadataFieldTypeUntokenized.freeze();
  }
    public void indexDoc() throws IOException {
      Document d = new Document();

      FieldType customType1 = new FieldType(TextField.TYPE_STORED);
      customType1.setTokenized(false);
      customType1.setOmitNorms(true);

      ArrayList<Field> fields = new ArrayList<>();
      String idString = getIdString();
      Field idField = newField("id", idString, customType1);
      fields.add(idField);

      int nFields = nextInt(maxFields);
      for (int i = 0; i < nFields; i++) {

        FieldType customType = new FieldType();
        switch (nextInt(4)) {
          case 0:
            break;
          case 1:
            customType.setStoreTermVectors(true);
            break;
          case 2:
            customType.setStoreTermVectors(true);
            customType.setStoreTermVectorPositions(true);
            break;
          case 3:
            customType.setStoreTermVectors(true);
            customType.setStoreTermVectorOffsets(true);
            break;
        }

        switch (nextInt(4)) {
          case 0:
            customType.setStored(true);
            customType.setOmitNorms(true);
            customType.setIndexed(true);
            fields.add(newField("f" + nextInt(100), getString(1), customType));
            break;
          case 1:
            customType.setIndexed(true);
            customType.setTokenized(true);
            fields.add(newField("f" + nextInt(100), getString(0), customType));
            break;
          case 2:
            customType.setStored(true);
            customType.setStoreTermVectors(false);
            customType.setStoreTermVectorOffsets(false);
            customType.setStoreTermVectorPositions(false);
            fields.add(newField("f" + nextInt(100), getString(0), customType));
            break;
          case 3:
            customType.setStored(true);
            customType.setIndexed(true);
            customType.setTokenized(true);
            fields.add(newField("f" + nextInt(100), getString(bigFieldSize), customType));
            break;
        }
      }

      if (sameFieldOrder) {
        Collections.sort(fields, fieldNameComparator);
      } else {
        // random placement of id field also
        Collections.swap(fields, nextInt(fields.size()), 0);
      }

      for (int i = 0; i < fields.size(); i++) {
        d.add(fields.get(i));
      }
      if (VERBOSE) {
        System.out.println(Thread.currentThread().getName() + ": indexing id:" + idString);
      }
      w.updateDocument(new Term("id", idString), d);
      // System.out.println(Thread.currentThread().getName() + ": indexing "+d);
      docs.put(idString, d);
    }
  public void test() throws Exception {
    IndexWriterConfig defaultConfig = new IndexWriterConfig(null);
    Codec defaultCodec = defaultConfig.getCodec();
    if ((new IndexWriterConfig(null)).getCodec() instanceof CompressingCodec) {
      Pattern regex = Pattern.compile("maxDocsPerChunk=(\\d+), blockSize=(\\d+)");
      Matcher matcher = regex.matcher(defaultCodec.toString());
      assertTrue(
          "Unexpected CompressingCodec toString() output: " + defaultCodec.toString(),
          matcher.find());
      int maxDocsPerChunk = Integer.parseInt(matcher.group(1));
      int blockSize = Integer.parseInt(matcher.group(2));
      int product = maxDocsPerChunk * blockSize;
      assumeTrue(
          defaultCodec.getName()
              + " maxDocsPerChunk ("
              + maxDocsPerChunk
              + ") * blockSize ("
              + blockSize
              + ") < 16 - this can trigger OOM with -Dtests.heapsize=30g",
          product >= 16);
    }

    BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BPostingsBytes1"));
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }

    IndexWriter w =
        new IndexWriter(
            dir,
            new IndexWriterConfig(new MockAnalyzer(random()))
                .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                .setRAMBufferSizeMB(256.0)
                .setMergeScheduler(new ConcurrentMergeScheduler())
                .setMergePolicy(newLogMergePolicy(false, 10))
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
                .setCodec(TestUtil.getDefaultCodec()));

    MergePolicy mp = w.getConfig().getMergePolicy();
    if (mp instanceof LogByteSizeMergePolicy) {
      // 1 petabyte:
      ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
    }

    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    ft.setOmitNorms(true);
    MyTokenStream tokenStream = new MyTokenStream();
    Field field = new Field("field", tokenStream, ft);
    doc.add(field);

    final int numDocs = 1000;
    for (int i = 0; i < numDocs; i++) {
      if (i % 2 == 1) { // trick blockPF's little optimization
        tokenStream.n = 65536;
      } else {
        tokenStream.n = 65537;
      }
      w.addDocument(doc);
    }
    w.forceMerge(1);
    w.close();

    DirectoryReader oneThousand = DirectoryReader.open(dir);
    DirectoryReader subReaders[] = new DirectoryReader[1000];
    Arrays.fill(subReaders, oneThousand);
    BaseDirectoryWrapper dir2 = newFSDirectory(createTempDir("2BPostingsBytes2"));
    if (dir2 instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir2).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(null));
    TestUtil.addIndexesSlowly(w2, subReaders);
    w2.forceMerge(1);
    w2.close();
    oneThousand.close();

    DirectoryReader oneMillion = DirectoryReader.open(dir2);
    subReaders = new DirectoryReader[2000];
    Arrays.fill(subReaders, oneMillion);
    BaseDirectoryWrapper dir3 = newFSDirectory(createTempDir("2BPostingsBytes3"));
    if (dir3 instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir3).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(null));
    TestUtil.addIndexesSlowly(w3, subReaders);
    w3.forceMerge(1);
    w3.close();
    oneMillion.close();

    dir.close();
    dir2.close();
    dir3.close();
  }
Beispiel #21
0
 static {
   customType3 = new FieldType(TextField.TYPE_STORED);
   customType3.setOmitNorms(true);
   textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, customType3);
 }
Beispiel #22
0
 static {
   customType5 = new FieldType(TextField.TYPE_STORED);
   customType5.setOmitNorms(true);
   customType5.setTokenized(false);
   noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, customType5);
 }
  // @Absurd @Ignore takes ~20GB-30GB of space and 10 minutes.
  // with some codecs needs more heap space as well.
  @Ignore("Very slow. Enable manually by removing @Ignore.")
  public void test() throws Exception {
    BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BPostingsBytes1"));
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }

    IndexWriter w =
        new IndexWriter(
            dir,
            new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                .setRAMBufferSizeMB(256.0)
                .setMergeScheduler(new ConcurrentMergeScheduler())
                .setMergePolicy(newLogMergePolicy(false, 10))
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));

    MergePolicy mp = w.getConfig().getMergePolicy();
    if (mp instanceof LogByteSizeMergePolicy) {
      // 1 petabyte:
      ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
    }

    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    ft.setOmitNorms(true);
    MyTokenStream tokenStream = new MyTokenStream();
    Field field = new Field("field", tokenStream, ft);
    doc.add(field);

    final int numDocs = 1000;
    for (int i = 0; i < numDocs; i++) {
      if (i % 2 == 1) { // trick blockPF's little optimization
        tokenStream.n = 65536;
      } else {
        tokenStream.n = 65537;
      }
      w.addDocument(doc);
    }
    w.forceMerge(1);
    w.close();

    DirectoryReader oneThousand = DirectoryReader.open(dir);
    IndexReader subReaders[] = new IndexReader[1000];
    Arrays.fill(subReaders, oneThousand);
    MultiReader mr = new MultiReader(subReaders);
    BaseDirectoryWrapper dir2 = newFSDirectory(_TestUtil.getTempDir("2BPostingsBytes2"));
    if (dir2 instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir2).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
    w2.addIndexes(mr);
    w2.forceMerge(1);
    w2.close();
    oneThousand.close();

    DirectoryReader oneMillion = DirectoryReader.open(dir2);
    subReaders = new IndexReader[2000];
    Arrays.fill(subReaders, oneMillion);
    mr = new MultiReader(subReaders);
    BaseDirectoryWrapper dir3 = newFSDirectory(_TestUtil.getTempDir("2BPostingsBytes3"));
    if (dir3 instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir3).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
    w3.addIndexes(mr);
    w3.forceMerge(1);
    w3.close();
    oneMillion.close();

    dir.close();
    dir2.close();
    dir3.close();
  }
 static {
   FIELD_TYPE.setOmitNorms(true);
   FIELD_TYPE.freeze();
 }
 static {
   FIELD_TYPE.setIndexOptions(IndexOptions.NONE);
   FIELD_TYPE.setStored(false);
   FIELD_TYPE.setOmitNorms(true);
   FIELD_TYPE.freeze();
 }