@Test
  public void testVectorHighlighter() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter =
        new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(
        new Field(
            "content",
            "the big bad dog",
            Field.Store.YES,
            Field.Index.ANALYZED,
            Field.TermVector.WITH_POSITIONS_OFFSETS));
    indexWriter.addDocument(document);

    IndexReader reader = DirectoryReader.open(indexWriter, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    XFastVectorHighlighter highlighter = new XFastVectorHighlighter();
    String fragment =
        highlighter.getBestFragment(
            highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
            reader,
            topDocs.scoreDocs[0].doc,
            "content",
            30);
    assertThat(fragment, notNullValue());
    assertThat(fragment, equalTo("the big <b>bad</b> dog"));
  }
  /**
   * {@link PersistentSnapshotDeletionPolicy} wraps another {@link IndexDeletionPolicy} to enable
   * flexible snapshotting.
   *
   * @param primary the {@link IndexDeletionPolicy} that is used on non-snapshotted commits.
   *     Snapshotted commits, by definition, are not deleted until explicitly released via {@link
   *     #release(String)}.
   * @param dir the {@link Directory} which will be used to persist the snapshots information.
   * @param mode specifies whether a new index should be created, deleting all existing snapshots
   *     information (immediately), or open an existing index, initializing the class with the
   *     snapshots information.
   * @param matchVersion specifies the {@link Version} that should be used when opening the
   *     IndexWriter.
   */
  public PersistentSnapshotDeletionPolicy(
      IndexDeletionPolicy primary, Directory dir, OpenMode mode, Version matchVersion)
      throws CorruptIndexException, LockObtainFailedException, IOException {
    super(primary, null);

    // Initialize the index writer over the snapshot directory.
    writer = new IndexWriter(dir, new IndexWriterConfig(matchVersion, null).setOpenMode(mode));
    if (mode != OpenMode.APPEND) {
      // IndexWriter no longer creates a first commit on an empty Directory. So
      // if we were asked to CREATE*, call commit() just to be sure. If the
      // index contains information and mode is CREATE_OR_APPEND, it's a no-op.
      writer.commit();
    }

    try {
      // Initializes the snapshots information. This code should basically run
      // only if mode != CREATE, but if it is, it's no harm as we only open the
      // reader once and immediately close it.
      for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) {
        registerSnapshotInfo(e.getKey(), e.getValue(), null);
      }
    } catch (RuntimeException e) {
      writer.close(); // don't leave any open file handles
      throw e;
    } catch (IOException e) {
      writer.close(); // don't leave any open file handles
      throw e;
    }
  }
Beispiel #3
0
  public void testBackToTheFuture() throws Exception {
    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));

    Document doc = new Document();
    doc.add(newStringField("foo", "bar", Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(newStringField("foo", "baz", Field.Store.NO));
    iw.addDocument(doc);

    DirectoryReader r1 = DirectoryReader.open(iw, true);

    iw.deleteDocuments(new Term("foo", "baz"));
    DirectoryReader r2 = DirectoryReader.open(iw, true);

    FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r2), "foo");

    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r1), "foo");
    assertEquals(2, v.getValueCount());
    v.setDocument(1);
    assertEquals(1, v.nextOrd());

    iw.close();
    r1.close();
    r2.close();
    dir.close();
  }
  public static void addDocuments(
      String searchEngineId, long companyId, Collection<Document> documents)
      throws SearchException {

    if (isIndexReadOnly() || (documents == null) || documents.isEmpty()) {
      return;
    }

    SearchEngine searchEngine = getSearchEngine(searchEngineId);

    IndexWriter indexWriter = searchEngine.getIndexWriter();

    for (Document document : documents) {
      if (_log.isDebugEnabled()) {
        _log.debug("Add document " + document.toString());
      }

      _searchPermissionChecker.addPermissionFields(companyId, document);
    }

    SearchContext searchContext = new SearchContext();

    searchContext.setCompanyId(companyId);
    searchContext.setSearchEngineId(searchEngineId);

    indexWriter.addDocuments(searchContext, documents);
  }
  // test when delete terms only apply to disk segments
  public void testNonRAMDelete() throws IOException {

    Directory dir = new MockRAMDirectory();
    IndexWriter modifier =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
    modifier.setMaxBufferedDocs(2);
    modifier.setMaxBufferedDeleteTerms(2);

    int id = 0;
    int value = 100;

    for (int i = 0; i < 7; i++) {
      addDoc(modifier, ++id, value);
    }
    modifier.commit();

    assertEquals(0, modifier.getNumBufferedDocuments());
    assertTrue(0 < modifier.getSegmentCount());

    modifier.commit();

    IndexReader reader = IndexReader.open(dir, true);
    assertEquals(7, reader.numDocs());
    reader.close();

    modifier.deleteDocuments(new Term("value", String.valueOf(value)));

    modifier.commit();

    reader = IndexReader.open(dir, true);
    assertEquals(0, reader.numDocs());
    reader.close();
    modifier.close();
    dir.close();
  }
Beispiel #6
0
  // case 3: tail segments, invariants hold, copy, invariants hold
  public void testNoMergeAfterCopy() throws IOException {
    // main directory
    Directory dir = newDirectory();
    // auxiliary directory
    Directory aux = newDirectory();

    setUpDirs(dir, aux);

    IndexWriter writer =
        newWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.APPEND)
                .setMaxBufferedDocs(10)
                .setMergePolicy(newLogMergePolicy(4)));

    writer.addIndexes(
        aux, new MockDirectoryWrapper(random(), new RAMDirectory(aux, newIOContext(random()))));
    assertEquals(1060, writer.maxDoc());
    assertEquals(1000, writer.getDocCount(0));
    writer.close();

    // make sure the index is correct
    verifyNumDocs(dir, 1060);
    dir.close();
    aux.close();
  }
  // LUCENE-1727: make sure doc fields are stored in order
  public void testStoredFieldsOrder() throws Throwable {
    Directory d = newDirectory();
    IndexWriter w =
        new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    Document doc = new Document();

    FieldType customType = new FieldType();
    customType.setStored(true);
    doc.add(newField("zzz", "a b c", customType));
    doc.add(newField("aaa", "a b c", customType));
    doc.add(newField("zzz", "1 2 3", customType));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    Document doc2 = r.document(0);
    Iterator<IndexableField> it = doc2.getFields().iterator();
    assertTrue(it.hasNext());
    Field f = (Field) it.next();
    assertEquals(f.name(), "zzz");
    assertEquals(f.stringValue(), "a b c");

    assertTrue(it.hasNext());
    f = (Field) it.next();
    assertEquals(f.name(), "aaa");
    assertEquals(f.stringValue(), "a b c");

    assertTrue(it.hasNext());
    f = (Field) it.next();
    assertEquals(f.name(), "zzz");
    assertEquals(f.stringValue(), "1 2 3");
    assertFalse(it.hasNext());
    r.close();
    w.close();
    d.close();
  }
  // LUCENE-1130: make sure immeidate disk full on creating
  // an IndexWriter (hit during DW.ThreadState.init()) is
  // OK:
  public void testImmediateDiskFull() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(2)
                .setMergeScheduler(new ConcurrentMergeScheduler()));
    dir.setMaxSizeInBytes(Math.max(1, dir.getRecomputedActualSizeInBytes()));
    final Document doc = new Document();
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType));
    try {
      writer.addDocument(doc);
      fail("did not hit disk full");
    } catch (IOException ioe) {
    }
    // Without fix for LUCENE-1130: this call will hang:
    try {
      writer.addDocument(doc);
      fail("did not hit disk full");
    } catch (IOException ioe) {
    }
    try {
      writer.close(false);
      fail("did not hit disk full");
    } catch (IOException ioe) {
    }

    // Make sure once disk space is avail again, we can
    // cleanly close:
    dir.setMaxSizeInBytes(0);
    writer.close(false);
    dir.close();
  }
  /**
   * Tests that index merging (specifically addIndexes(Directory...)) doesn't change the index order
   * of documents.
   */
  public void testLucene() throws IOException {
    int num = 100;

    Directory indexA = newDirectory();
    Directory indexB = newDirectory();

    fillIndex(random(), indexA, 0, num);
    boolean fail = verifyIndex(indexA, 0);
    if (fail) {
      fail("Index a is invalid");
    }

    fillIndex(random(), indexB, num, num);
    fail = verifyIndex(indexB, num);
    if (fail) {
      fail("Index b is invalid");
    }

    Directory merged = newDirectory();

    IndexWriter writer =
        new IndexWriter(
            merged,
            newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy(2)));
    writer.addIndexes(indexA, indexB);
    writer.forceMerge(1);
    writer.close();

    fail = verifyIndex(merged, 0);

    assertFalse("The merged index is invalid", fail);
    indexA.close();
    indexB.close();
    merged.close();
  }
  public void testTotalBytesSize() throws Exception {
    Directory d = newDirectory();
    if (d instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) d).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwc.setMaxBufferedDocs(5);
    iwc.setMergeScheduler(new TrackingCMS());
    if (TestUtil.getPostingsFormat("id").equals("SimpleText")) {
      // no
      iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()));
    }
    IndexWriter w = new IndexWriter(d, iwc);
    for (int i = 0; i < 1000; i++) {
      Document doc = new Document();
      doc.add(new StringField("id", "" + i, Field.Store.NO));
      w.addDocument(doc);

      if (random().nextBoolean()) {
        w.deleteDocuments(new Term("id", "" + random().nextInt(i + 1)));
      }
    }
    assertTrue(((TrackingCMS) w.getConfig().getMergeScheduler()).totMergedBytes != 0);
    w.close();
    d.close();
  }
  // LUCENE-1219
  public void testBinaryFieldOffsetLength() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w =
        new IndexWriter(
            dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    byte[] b = new byte[50];
    for (int i = 0; i < 50; i++) b[i] = (byte) (i + 77);

    Document doc = new Document();
    Field f = new StoredField("binary", b, 10, 17);
    byte[] bx = f.binaryValue().bytes;
    assertTrue(bx != null);
    assertEquals(50, bx.length);
    assertEquals(10, f.binaryValue().offset);
    assertEquals(17, f.binaryValue().length);
    doc.add(f);
    w.addDocument(doc);
    w.close();

    IndexReader ir = DirectoryReader.open(dir);
    Document doc2 = ir.document(0);
    IndexableField f2 = doc2.getField("binary");
    b = f2.binaryValue().bytes;
    assertTrue(b != null);
    assertEquals(17, b.length, 17);
    assertEquals(87, b[0]);
    ir.close();
    dir.close();
  }
  public static void indexSerial(Random random, Map<String, Document> docs, Directory dir)
      throws IOException {
    IndexWriter w =
        new IndexWriter(
            dir,
            LuceneTestCase.newIndexWriterConfig(
                    random, TEST_VERSION_CURRENT, new MockAnalyzer(random))
                .setMergePolicy(newLogMergePolicy()));

    // index all docs in a single thread
    Iterator<Document> iter = docs.values().iterator();
    while (iter.hasNext()) {
      Document d = iter.next();
      ArrayList<Field> fields = new ArrayList<>();
      fields.addAll(d.getFields());
      // put fields in same order each time
      Collections.sort(fields, fieldNameComparator);

      Document d1 = new Document();
      for (int i = 0; i < fields.size(); i++) {
        d1.add(fields.get(i));
      }
      w.addDocument(d1);
      // System.out.println("indexing "+d1);
    }

    w.close();
  }
Beispiel #13
0
  private static Directory index(Analyzer analyzer, String processingPath) {
    RAMDirectory directory = null;
    IndexWriter indexWriter = null;
    try {
      directory = new RAMDirectory();
      IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer);
      indexWriter = new IndexWriter(directory, iwc);
      File file = new File(processingPath);
      index_h("", file, indexWriter);
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      if (indexWriter != null) {
        try {
          indexWriter.close();
        } catch (CorruptIndexException e1) {
          // TODO Auto-generated catch block
          e1.printStackTrace();
        } catch (IOException e1) {
          // TODO Auto-generated catch block
          e1.printStackTrace();
        }
      }
    }

    return directory;
  }
Beispiel #14
0
  public void testUpdateSameDoc() throws Exception {
    final Directory dir = newDirectory();

    final LineFileDocs docs = new LineFileDocs(random());
    for (int r = 0; r < 3; r++) {
      final IndexWriter w =
          new IndexWriter(
              dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2));
      final int numUpdates = atLeast(20);
      int numThreads = TestUtil.nextInt(random(), 2, 6);
      IndexingThread[] threads = new IndexingThread[numThreads];
      for (int i = 0; i < numThreads; i++) {
        threads[i] = new IndexingThread(docs, w, numUpdates);
        threads[i].start();
      }

      for (int i = 0; i < numThreads; i++) {
        threads[i].join();
      }

      w.close();
    }

    IndexReader open = DirectoryReader.open(dir);
    assertEquals(1, open.numDocs());
    open.close();
    docs.close();
    dir.close();
  }
  // LUCENE-1262
  public void testExceptions() throws Throwable {
    Path indexDir = createTempDir("testfieldswriterexceptions");

    Directory fsDir = newFSDirectory(indexDir);
    FaultyFSDirectory dir = new FaultyFSDirectory(fsDir);
    IndexWriterConfig iwc =
        newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, iwc);
    for (int i = 0; i < 2; i++) writer.addDocument(testDoc);
    writer.forceMerge(1);
    writer.close();

    IndexReader reader = DirectoryReader.open(dir);
    dir.startFailing();

    boolean exc = false;

    for (int i = 0; i < 2; i++) {
      try {
        reader.document(i);
      } catch (IOException ioe) {
        // expected
        exc = true;
      }
      try {
        reader.document(i);
      } catch (IOException ioe) {
        // expected
        exc = true;
      }
    }
    assertTrue(exc);
    reader.close();
    dir.close();
  }
  private Directory makeIndex() throws Exception {
    Directory dir = newDirectory();
    try {
      IndexWriter writer =
          new IndexWriter(
              dir,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT)));
      LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
      lmp.setUseCompoundFile(false);

      for (int d = 1; d <= NUM_DOCS; d++) {
        Document doc = new Document();
        for (int f = 1; f <= NUM_FIELDS; f++) {
          doc.add(
              newField(
                  "f" + f,
                  data[f % data.length] + '#' + data[random.nextInt(data.length)],
                  Field.Store.YES,
                  Field.Index.ANALYZED));
        }
        writer.addDocument(doc);
      }
      writer.close();
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
    return dir;
  }
 public void testDocValuesUnstored() throws IOException {
   Directory dir = newDirectory();
   IndexWriterConfig iwconfig =
       newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
   iwconfig.setMergePolicy(newLogMergePolicy());
   IndexWriter writer = new IndexWriter(dir, iwconfig);
   for (int i = 0; i < 50; i++) {
     Document doc = new Document();
     doc.add(new NumericDocValuesField("dv", i));
     doc.add(new TextField("docId", "" + i, Field.Store.YES));
     writer.addDocument(doc);
   }
   DirectoryReader r = writer.getReader();
   SlowCompositeReaderWrapper slow = new SlowCompositeReaderWrapper(r);
   FieldInfos fi = slow.getFieldInfos();
   FieldInfo dvInfo = fi.fieldInfo("dv");
   assertTrue(dvInfo.hasDocValues());
   NumericDocValues dv = slow.getNumericDocValues("dv");
   for (int i = 0; i < 50; i++) {
     assertEquals(i, dv.get(i));
     StoredDocument d = slow.document(i);
     // cannot use d.get("dv") due to another bug!
     assertNull(d.getField("dv"));
     assertEquals(Integer.toString(i), d.get("docId"));
   }
   slow.close();
   writer.close();
   dir.close();
 }
  public void testDocsWithField() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    IndexWriter writer = new IndexWriter(dir, conf);
    Document doc = new Document();
    doc.add(new NumericDocValuesField("dv", 0L));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new TextField("dv", "some text", Field.Store.NO));
    doc.add(new NumericDocValuesField("dv", 0L));
    writer.addDocument(doc);

    DirectoryReader r = writer.getReader();
    writer.close();

    AtomicReader subR = r.leaves().get(0).reader();
    assertEquals(2, subR.numDocs());

    Bits bits = FieldCache.DEFAULT.getDocsWithField(subR, "dv");
    assertTrue(bits.get(0));
    assertTrue(bits.get(1));
    r.close();
    dir.close();
  }
Beispiel #19
0
  /**
   * Remove a stale file (uidIter.term().text()) from the index database (and the xref file)
   *
   * @throws java.io.IOException if an error occurs
   */
  private void removeFile() throws IOException {
    String path = Util.uid2url(uidIter.term().utf8ToString());

    for (IndexChangedListener listener : listeners) {
      listener.fileRemove(path);
    }
    writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term()));
    writer.prepareCommit();
    writer.commit();

    File xrefFile;
    if (RuntimeEnvironment.getInstance().isCompressXref()) {
      xrefFile = new File(xrefDir, path + ".gz");
    } else {
      xrefFile = new File(xrefDir, path);
    }
    File parent = xrefFile.getParentFile();

    if (!xrefFile.delete() && xrefFile.exists()) {
      log.log(Level.INFO, "Failed to remove obsolete xref-file: {0}", xrefFile.getAbsolutePath());
    }

    // Remove the parent directory if it's empty
    if (parent.delete()) {
      log.log(Level.FINE, "Removed empty xref dir:{0}", parent.getAbsolutePath());
    }
    setDirty();
    for (IndexChangedListener listener : listeners) {
      listener.fileRemoved(path);
    }
  }
  public void testSimpleSkip() throws IOException {
    Directory dir = new CountingRAMDirectory(new RAMDirectory());
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
                .setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()))
                .setMergePolicy(newLogMergePolicy()));
    Term term = new Term("test", "a");
    for (int i = 0; i < 5000; i++) {
      Document d1 = new Document();
      d1.add(newTextField(term.field(), term.text(), Field.Store.NO));
      writer.addDocument(d1);
    }
    writer.commit();
    writer.forceMerge(1);
    writer.close();

    AtomicReader reader = getOnlySegmentReader(DirectoryReader.open(dir));

    for (int i = 0; i < 2; i++) {
      counter = 0;
      DocsAndPositionsEnum tp = reader.termPositionsEnum(term);
      checkSkipTo(tp, 14, 185); // no skips
      checkSkipTo(tp, 17, 190); // one skip on level 0
      checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0

      // this test would fail if we had only one skip level,
      // because than more bytes would be read from the freqStream
      checkSkipTo(tp, 4800, 250); // one skip on level 2
    }
  }
  public void testNoExtraFiles() throws IOException {
    Directory directory = newDirectory();
    IndexWriter writer =
        new IndexWriter(
            directory,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(2));

    for (int iter = 0; iter < 7; iter++) {
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }

      for (int j = 0; j < 21; j++) {
        Document doc = new Document();
        doc.add(newTextField("content", "a b c", Field.Store.NO));
        writer.addDocument(doc);
      }

      writer.close();
      TestIndexWriter.assertNoUnreferencedFiles(directory, "testNoExtraFiles");

      // Reopen
      writer =
          new IndexWriter(
              directory,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setOpenMode(OpenMode.APPEND)
                  .setMaxBufferedDocs(2));
    }

    writer.close();

    directory.close();
  }
 private void createIndex(
     IndexWriterConfig config,
     Directory target,
     IndexReader reader,
     Filter preserveFilter,
     boolean negateFilter)
     throws IOException {
   boolean success = false;
   final IndexWriter w = new IndexWriter(target, config);
   try {
     final List<LeafReaderContext> leaves = reader.leaves();
     final IndexReader[] subReaders = new IndexReader[leaves.size()];
     int i = 0;
     for (final LeafReaderContext ctx : leaves) {
       subReaders[i++] = new DocumentFilteredLeafIndexReader(ctx, preserveFilter, negateFilter);
     }
     w.addIndexes(subReaders);
     success = true;
   } finally {
     if (success) {
       w.close();
     } else {
       IOUtils.closeWhileHandlingException(w);
     }
   }
 }
  public void testSizeInBytesCache() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())
            .setMergePolicy(newLogMergePolicy());
    IndexWriter writer = new IndexWriter(dir, conf);
    writer.setInfoStream(VERBOSE ? System.out : null);
    Document doc = new Document();
    doc.add(new Field("a", "value", Store.YES, Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();

    SegmentInfos sis = new SegmentInfos();
    sis.read(dir);
    SegmentInfo si = sis.info(0);
    long sizeInBytesNoStore = si.sizeInBytes(false);
    long sizeInBytesWithStore = si.sizeInBytes(true);
    assertTrue(
        "sizeInBytesNoStore="
            + sizeInBytesNoStore
            + " sizeInBytesWithStore="
            + sizeInBytesWithStore,
        sizeInBytesWithStore > sizeInBytesNoStore);
    dir.close();
  }
Beispiel #24
0
  private final IndexReader doOpenFromWriter(boolean openReadOnly, IndexCommit commit)
      throws CorruptIndexException, IOException {
    assert readOnly;

    if (!openReadOnly) {
      throw new IllegalArgumentException(
          "a reader obtained from IndexWriter.getReader() can only be reopened with openReadOnly=true (got false)");
    }

    if (commit != null) {
      throw new IllegalArgumentException(
          "a reader obtained from IndexWriter.getReader() cannot currently accept a commit");
    }

    if (writer.nrtIsCurrent(segmentInfos)) {
      return null;
    }

    IndexReader reader = writer.getReader(applyAllDeletes);

    // If in fact no changes took place, return null:
    if (reader.getVersion() == segmentInfos.getVersion()) {
      reader.decRef();
      return null;
    }

    reader.readerFinishedListeners = readerFinishedListeners;
    return reader;
  }
  /*
   * Test a deletion policy that keeps last N commits.
   */
  public void testKeepLastNDeletionPolicy() throws IOException {
    final int N = 5;

    for (int pass = 0; pass < 2; pass++) {

      boolean useCompoundFile = (pass % 2) != 0;

      Directory dir = newDirectory();
      if (dir instanceof MockDirectoryWrapper) {
        // test manually deletes files
        ((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
      }

      KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N);
      for (int j = 0; j < N + 1; j++) {
        IndexWriterConfig conf =
            newIndexWriterConfig(new MockAnalyzer(random()))
                .setOpenMode(OpenMode.CREATE)
                .setIndexDeletionPolicy(policy)
                .setMaxBufferedDocs(10);
        MergePolicy mp = conf.getMergePolicy();
        mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
        IndexWriter writer = new IndexWriter(dir, conf);
        policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
        for (int i = 0; i < 17; i++) {
          addDoc(writer);
        }
        writer.forceMerge(1);
        writer.close();
      }

      assertTrue(policy.numDelete > 0);
      assertEquals(N + 1, policy.numOnInit);
      assertEquals(N + 1, policy.numOnCommit);

      // Simplistic check: just verify only the past N segments_N's still
      // exist, and, I can open a reader on each:
      long gen = SegmentInfos.getLastCommitGeneration(dir);
      for (int i = 0; i < N + 1; i++) {
        try {
          IndexReader reader = DirectoryReader.open(dir);
          reader.close();
          if (i == N) {
            fail("should have failed on commits prior to last " + N);
          }
        } catch (IOException e) {
          if (i != N) {
            throw e;
          }
        }
        if (i < N) {
          dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
        }
        gen--;
      }

      dir.close();
    }
  }
  public void buildIndex(JSONObject indexData) {

    try {
      Directory dir = FSDirectory.open(new File(indexDir));
      IKAnalyzer analyzer = new IKAnalyzer();
      analyzer.setUseSmart(true);
      IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer);
      indexWriter = new IndexWriter(dir, iwc);
      indexWriter.deleteAll();

      JSONArray statusData = indexData.getJSONArray("statusData");
      for (int i = 0; i < statusData.length(); i++) {
        String text = statusData.getString(i);
        Document doc = new Document();
        doc.add(
            new Field(
                "text",
                text,
                Field.Store.YES,
                Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS));
        indexWriter.addDocument(doc);
      }

      JSONArray userData = indexData.getJSONArray("userData");
      for (int i = 0; i < userData.length(); i++) {
        String text = userData.getString(i);
        Document doc = new Document();
        doc.add(
            new Field(
                "text",
                text,
                Field.Store.YES,
                Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS));
        indexWriter.addDocument(doc);
      }
      // indexWriter.commit();
      System.out.println("Index is done");
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (JSONException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      try {
        indexWriter.close();
      } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }
 /**
  * Returns true if this single info is already fully merged (has no pending deletes, is in the
  * same dir as the writer, and matches the current compound file setting
  */
 protected final boolean isMerged(SegmentInfos infos, SegmentCommitInfo info) throws IOException {
   IndexWriter w = writer.get();
   assert w != null;
   boolean hasDeletions = w.numDeletedDocs(info) > 0;
   return !hasDeletions
       && !info.info.hasSeparateNorms()
       && info.info.dir == w.getDirectory()
       && useCompoundFile(infos, info) == info.info.getUseCompoundFile();
 }
 private void crash(final IndexWriter writer) throws IOException {
   final MockDirectoryWrapper dir = (MockDirectoryWrapper) writer.getDirectory();
   ConcurrentMergeScheduler cms =
       (ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler();
   cms.sync();
   dir.crash();
   cms.sync();
   dir.clearCrash();
 }
  public Map<String, Document> indexRandom(
      int nThreads,
      int iterations,
      int range,
      Directory dir,
      int maxThreadStates,
      boolean doReaderPooling)
      throws IOException, InterruptedException {
    Map<String, Document> docs = new HashMap<>();
    IndexWriter w =
        RandomIndexWriter.mockIndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.CREATE)
                .setRAMBufferSizeMB(0.1)
                .setMaxBufferedDocs(maxBufferedDocs)
                .setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(maxThreadStates))
                .setReaderPooling(doReaderPooling)
                .setMergePolicy(newLogMergePolicy()),
            new YieldTestPoint());
    LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
    lmp.setNoCFSRatio(0.0);
    lmp.setMergeFactor(mergeFactor);

    threads = new IndexingThread[nThreads];
    for (int i = 0; i < threads.length; i++) {
      IndexingThread th = new IndexingThread();
      th.w = w;
      th.base = 1000000 * i;
      th.range = range;
      th.iterations = iterations;
      threads[i] = th;
    }

    for (int i = 0; i < threads.length; i++) {
      threads[i].start();
    }
    for (int i = 0; i < threads.length; i++) {
      threads[i].join();
    }

    // w.forceMerge(1);
    w.close();

    for (int i = 0; i < threads.length; i++) {
      IndexingThread th = threads[i];
      synchronized (th) {
        docs.putAll(th.docs);
      }
    }

    // System.out.println("TEST: checkindex");
    TestUtil.checkIndex(dir);

    return docs;
  }
  // indexes Integer.MAX_VALUE docs with a fixed binary field
  public void testFixedSorted() throws Exception {
    BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BFixedSorted"));
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }

    IndexWriter w =
        new IndexWriter(
            dir,
            new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                .setRAMBufferSizeMB(256.0)
                .setMergeScheduler(new ConcurrentMergeScheduler())
                .setMergePolicy(newLogMergePolicy(false, 10))
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));

    Document doc = new Document();
    byte bytes[] = new byte[2];
    BytesRef data = new BytesRef(bytes);
    SortedDocValuesField dvField = new SortedDocValuesField("dv", data);
    doc.add(dvField);

    for (int i = 0; i < Integer.MAX_VALUE; i++) {
      bytes[0] = (byte) (i >> 8);
      bytes[1] = (byte) i;
      w.addDocument(doc);
      if (i % 100000 == 0) {
        System.out.println("indexed: " + i);
        System.out.flush();
      }
    }

    w.forceMerge(1);
    w.close();

    System.out.println("verifying...");
    System.out.flush();

    DirectoryReader r = DirectoryReader.open(dir);
    int expectedValue = 0;
    for (AtomicReaderContext context : r.leaves()) {
      AtomicReader reader = context.reader();
      BytesRef scratch = new BytesRef();
      BinaryDocValues dv = reader.getSortedDocValues("dv");
      for (int i = 0; i < reader.maxDoc(); i++) {
        bytes[0] = (byte) (expectedValue >> 8);
        bytes[1] = (byte) expectedValue;
        dv.get(i, scratch);
        assertEquals(data, scratch);
        expectedValue++;
      }
    }

    r.close();
    dir.close();
  }