public void testDocsWithField() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    IndexWriter writer = new IndexWriter(dir, conf);
    Document doc = new Document();
    doc.add(new NumericDocValuesField("dv", 0L));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new TextField("dv", "some text", Field.Store.NO));
    doc.add(new NumericDocValuesField("dv", 0L));
    writer.addDocument(doc);

    DirectoryReader r = writer.getReader();
    writer.close();

    AtomicReader subR = r.leaves().get(0).reader();
    assertEquals(2, subR.numDocs());

    Bits bits = FieldCache.DEFAULT.getDocsWithField(subR, "dv");
    assertTrue(bits.get(0));
    assertTrue(bits.get(1));
    r.close();
    dir.close();
  }
  // LUCENE-3870
  public void testLengthPrefixAcrossTwoPages() throws Exception {
    Directory d = newDirectory();
    IndexWriter w =
        new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    Document doc = new Document();
    byte[] bytes = new byte[32764];
    BytesRef b = new BytesRef();
    b.bytes = bytes;
    b.length = bytes.length;
    doc.add(new SortedDocValuesField("field", b));
    w.addDocument(doc);
    bytes[0] = 1;
    w.addDocument(doc);
    w.forceMerge(1);
    DirectoryReader r = w.getReader();
    BinaryDocValues s = FieldCache.DEFAULT.getTerms(getOnlySegmentReader(r), "field");

    BytesRef bytes1 = new BytesRef();
    s.get(0, bytes1);
    assertEquals(bytes.length, bytes1.length);
    bytes[0] = 0;
    assertEquals(b, bytes1);

    s.get(1, bytes1);
    assertEquals(bytes.length, bytes1.length);
    bytes[0] = 1;
    assertEquals(b, bytes1);
    r.close();
    w.close();
    d.close();
  }
 public void testDocValuesUnstored() throws IOException {
   Directory dir = newDirectory();
   IndexWriterConfig iwconfig =
       newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
   iwconfig.setMergePolicy(newLogMergePolicy());
   IndexWriter writer = new IndexWriter(dir, iwconfig);
   for (int i = 0; i < 50; i++) {
     Document doc = new Document();
     doc.add(new NumericDocValuesField("dv", i));
     doc.add(new TextField("docId", "" + i, Field.Store.YES));
     writer.addDocument(doc);
   }
   DirectoryReader r = writer.getReader();
   SlowCompositeReaderWrapper slow = new SlowCompositeReaderWrapper(r);
   FieldInfos fi = slow.getFieldInfos();
   FieldInfo dvInfo = fi.fieldInfo("dv");
   assertTrue(dvInfo.hasDocValues());
   NumericDocValues dv = slow.getNumericDocValues("dv");
   for (int i = 0; i < 50; i++) {
     assertEquals(i, dv.get(i));
     StoredDocument d = slow.document(i);
     // cannot use d.get("dv") due to another bug!
     assertNull(d.getField("dv"));
     assertEquals(Integer.toString(i), d.get("docId"));
   }
   slow.close();
   writer.close();
   dir.close();
 }
Example #4
0
  private final IndexReader doOpenFromWriter(boolean openReadOnly, IndexCommit commit)
      throws CorruptIndexException, IOException {
    assert readOnly;

    if (!openReadOnly) {
      throw new IllegalArgumentException(
          "a reader obtained from IndexWriter.getReader() can only be reopened with openReadOnly=true (got false)");
    }

    if (commit != null) {
      throw new IllegalArgumentException(
          "a reader obtained from IndexWriter.getReader() cannot currently accept a commit");
    }

    if (writer.nrtIsCurrent(segmentInfos)) {
      return null;
    }

    IndexReader reader = writer.getReader(applyAllDeletes);

    // If in fact no changes took place, return null:
    if (reader.getVersion() == segmentInfos.getVersion()) {
      reader.decRef();
      return null;
    }

    reader.readerFinishedListeners = readerFinishedListeners;
    return reader;
  }
  // LUCENE-1727: make sure doc fields are stored in order
  public void testStoredFieldsOrder() throws Throwable {
    Directory d = newDirectory();
    IndexWriter w =
        new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    Document doc = new Document();

    FieldType customType = new FieldType();
    customType.setStored(true);
    doc.add(newField("zzz", "a b c", customType));
    doc.add(newField("aaa", "a b c", customType));
    doc.add(newField("zzz", "1 2 3", customType));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    Document doc2 = r.document(0);
    Iterator<IndexableField> it = doc2.getFields().iterator();
    assertTrue(it.hasNext());
    Field f = (Field) it.next();
    assertEquals(f.name(), "zzz");
    assertEquals(f.stringValue(), "a b c");

    assertTrue(it.hasNext());
    f = (Field) it.next();
    assertEquals(f.name(), "aaa");
    assertEquals(f.stringValue(), "a b c");

    assertTrue(it.hasNext());
    f = (Field) it.next();
    assertEquals(f.name(), "zzz");
    assertEquals(f.stringValue(), "1 2 3");
    assertFalse(it.hasNext());
    r.close();
    w.close();
    d.close();
  }
  public void testForceMergeNotNeeded() throws IOException {
    try (Directory dir = newDirectory()) {
      final AtomicBoolean mayMerge = new AtomicBoolean(true);
      final MergeScheduler mergeScheduler =
          new SerialMergeScheduler() {
            @Override
            public synchronized void merge(
                IndexWriter writer, MergeTrigger trigger, boolean newMergesFound)
                throws IOException {
              if (mayMerge.get() == false) {
                MergePolicy.OneMerge merge = writer.getNextMerge();
                if (merge != null) {
                  System.out.println(
                      "TEST: we should not need any merging, yet merge policy returned merge "
                          + merge);
                  throw new AssertionError();
                }
              }

              super.merge(writer, trigger, newMergesFound);
            }
          };

      MergePolicy mp = mergePolicy();
      assumeFalse(
          "this test cannot tolerate random forceMerges",
          mp.toString().contains("MockRandomMergePolicy"));
      mp.setNoCFSRatio(random().nextBoolean() ? 0 : 1);

      IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
      iwc.setMergeScheduler(mergeScheduler);
      iwc.setMergePolicy(mp);

      IndexWriter writer = new IndexWriter(dir, iwc);
      final int numSegments = TestUtil.nextInt(random(), 2, 20);
      for (int i = 0; i < numSegments; ++i) {
        final int numDocs = TestUtil.nextInt(random(), 1, 5);
        for (int j = 0; j < numDocs; ++j) {
          writer.addDocument(new Document());
        }
        writer.getReader().close();
      }
      for (int i = 5; i >= 0; --i) {
        final int segmentCount = writer.getSegmentCount();
        final int maxNumSegments = i == 0 ? 1 : TestUtil.nextInt(random(), 1, 10);
        mayMerge.set(segmentCount > maxNumSegments);
        if (VERBOSE) {
          System.out.println(
              "TEST: now forceMerge(maxNumSegments="
                  + maxNumSegments
                  + ") vs segmentCount="
                  + segmentCount);
        }
        writer.forceMerge(maxNumSegments);
      }
      writer.close();
    }
  }
  // test deleteAll() w/ near real-time reader
  public void testDeleteAllNRT() throws IOException {
    Directory dir = new MockRAMDirectory();
    IndexWriter modifier =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
    modifier.setMaxBufferedDocs(2);
    modifier.setMaxBufferedDeleteTerms(2);

    int id = 0;
    int value = 100;

    for (int i = 0; i < 7; i++) {
      addDoc(modifier, ++id, value);
    }
    modifier.commit();

    IndexReader reader = modifier.getReader();
    assertEquals(7, reader.numDocs());
    reader.close();

    addDoc(modifier, ++id, value);
    addDoc(modifier, ++id, value);

    // Delete all
    modifier.deleteAll();

    reader = modifier.getReader();
    assertEquals(0, reader.numDocs());
    reader.close();

    // Roll it back
    modifier.rollback();
    modifier.close();

    // Validate that the docs are still there
    reader = IndexReader.open(dir, true);
    assertEquals(7, reader.numDocs());
    reader.close();

    dir.close();
  }
 public void testTermDocsEnum() throws Exception {
   Directory dir = newDirectory();
   IndexWriter w =
       new IndexWriter(
           dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
   Document d = new Document();
   d.add(newStringField("f", "j", Field.Store.NO));
   w.addDocument(d);
   w.commit();
   w.addDocument(d);
   IndexReader r = w.getReader();
   w.close();
   DocsEnum de = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j"));
   assertEquals(0, de.nextDoc());
   assertEquals(1, de.nextDoc());
   assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
   r.close();
   dir.close();
 }
 public void testSeparateEnums() throws Exception {
   Directory dir = newDirectory();
   IndexWriter w =
       new IndexWriter(
           dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
   Document d = new Document();
   d.add(newStringField("f", "j", Field.Store.NO));
   w.addDocument(d);
   w.commit();
   w.addDocument(d);
   IndexReader r = w.getReader();
   w.close();
   DocsEnum d1 = _TestUtil.docs(random(), r, "f", new BytesRef("j"), null, null, 0);
   DocsEnum d2 = _TestUtil.docs(random(), r, "f", new BytesRef("j"), null, null, 0);
   assertEquals(0, d1.nextDoc());
   assertEquals(0, d2.nextDoc());
   r.close();
   dir.close();
 }
  public void testRandom() throws Exception {

    int num = atLeast(2);
    for (int iter = 0; iter < num; iter++) {
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }

      Directory dir = newDirectory();

      IndexWriter w =
          new IndexWriter(
              dir,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setMergePolicy(NoMergePolicy.COMPOUND_FILES));
      _TestUtil.keepFullyDeletedSegments(w);

      Map<BytesRef, List<Integer>> docs = new HashMap<BytesRef, List<Integer>>();
      Set<Integer> deleted = new HashSet<Integer>();
      List<BytesRef> terms = new ArrayList<BytesRef>();

      int numDocs = _TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER);
      Document doc = new Document();
      Field f = newStringField("field", "", Field.Store.NO);
      doc.add(f);
      Field id = newStringField("id", "", Field.Store.NO);
      doc.add(id);

      boolean onlyUniqueTerms = random().nextBoolean();
      if (VERBOSE) {
        System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
      }
      Set<BytesRef> uniqueTerms = new HashSet<BytesRef>();
      for (int i = 0; i < numDocs; i++) {

        if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) {
          // re-use existing term
          BytesRef term = terms.get(random().nextInt(terms.size()));
          docs.get(term).add(i);
          f.setStringValue(term.utf8ToString());
        } else {
          String s = _TestUtil.randomUnicodeString(random(), 10);
          BytesRef term = new BytesRef(s);
          if (!docs.containsKey(term)) {
            docs.put(term, new ArrayList<Integer>());
          }
          docs.get(term).add(i);
          terms.add(term);
          uniqueTerms.add(term);
          f.setStringValue(s);
        }
        id.setStringValue("" + i);
        w.addDocument(doc);
        if (random().nextInt(4) == 1) {
          w.commit();
        }
        if (i > 0 && random().nextInt(20) == 1) {
          int delID = random().nextInt(i);
          deleted.add(delID);
          w.deleteDocuments(new Term("id", "" + delID));
          if (VERBOSE) {
            System.out.println("TEST: delete " + delID);
          }
        }
      }

      if (VERBOSE) {
        List<BytesRef> termsList = new ArrayList<BytesRef>(uniqueTerms);
        Collections.sort(termsList, BytesRef.getUTF8SortedAsUTF16Comparator());
        System.out.println("TEST: terms in UTF16 order:");
        for (BytesRef b : termsList) {
          System.out.println("  " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b);
          for (int docID : docs.get(b)) {
            if (deleted.contains(docID)) {
              System.out.println("    " + docID + " (deleted)");
            } else {
              System.out.println("    " + docID);
            }
          }
        }
      }

      IndexReader reader = w.getReader();
      w.close();
      if (VERBOSE) {
        System.out.println("TEST: reader=" + reader);
      }

      Bits liveDocs = MultiFields.getLiveDocs(reader);
      for (int delDoc : deleted) {
        assertFalse(liveDocs.get(delDoc));
      }

      for (int i = 0; i < 100; i++) {
        BytesRef term = terms.get(random().nextInt(terms.size()));
        if (VERBOSE) {
          System.out.println(
              "TEST: seek term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
        }

        DocsEnum docsEnum = _TestUtil.docs(random(), reader, "field", term, liveDocs, null, 0);
        assertNotNull(docsEnum);

        for (int docID : docs.get(term)) {
          if (!deleted.contains(docID)) {
            assertEquals(docID, docsEnum.nextDoc());
          }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
      }

      reader.close();
      dir.close();
    }
  }
Example #11
0
  @Test
  public void testRollingUpdates() throws Exception {
    Random random = new Random(random().nextLong());
    final BaseDirectoryWrapper dir = newDirectory();
    // test checks for no unref'ed files with the IW helper method, which isn't aware of "tried to
    // delete files"
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
    }

    final LineFileDocs docs = new LineFileDocs(random, true);

    // provider.register(new MemoryCodec());
    if (random().nextBoolean()) {
      Codec.setDefault(
          TestUtil.alwaysPostingsFormat(
              new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat())));
    }

    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));

    final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
    final int SIZE = atLeast(20);
    int id = 0;
    IndexReader r = null;
    IndexSearcher s = null;
    final int numUpdates =
        (int)
            (SIZE * (2 + (TEST_NIGHTLY ? 200 * random().nextDouble() : 5 * random().nextDouble())));
    if (VERBOSE) {
      System.out.println("TEST: numUpdates=" + numUpdates);
    }
    int updateCount = 0;
    // TODO: sometimes update ids not in order...
    for (int docIter = 0; docIter < numUpdates; docIter++) {
      final Document doc = docs.nextDoc();
      final String myID = Integer.toString(id);
      if (id == SIZE - 1) {
        id = 0;
      } else {
        id++;
      }
      if (VERBOSE) {
        System.out.println("  docIter=" + docIter + " id=" + id);
      }
      ((Field) doc.getField("docid")).setStringValue(myID);

      Term idTerm = new Term("docid", myID);

      final boolean doUpdate;
      if (s != null && updateCount < SIZE) {
        TopDocs hits = s.search(new TermQuery(idTerm), 1);
        assertEquals(1, hits.totalHits);
        doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
        if (VERBOSE) {
          if (doUpdate) {
            System.out.println("  tryDeleteDocument failed");
          } else {
            System.out.println("  tryDeleteDocument succeeded");
          }
        }
      } else {
        doUpdate = true;
        if (VERBOSE) {
          System.out.println("  no searcher: doUpdate=true");
        }
      }

      updateCount++;

      if (doUpdate) {
        if (random().nextBoolean()) {
          w.updateDocument(idTerm, doc);
        } else {
          // It's OK to not be atomic for this test (no separate thread reopening readers):
          w.deleteDocuments(new TermQuery(idTerm));
          w.addDocument(doc);
        }
      } else {
        w.addDocument(doc);
      }

      if (docIter >= SIZE && random().nextInt(50) == 17) {
        if (r != null) {
          r.close();
        }

        final boolean applyDeletions = random().nextBoolean();

        if (VERBOSE) {
          System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
        }

        r = w.getReader(applyDeletions);
        if (applyDeletions) {
          s = newSearcher(r);
        } else {
          s = null;
        }
        assertTrue(
            "applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE,
            !applyDeletions || r.numDocs() == SIZE);
        updateCount = 0;
      }
    }

    if (r != null) {
      r.close();
    }

    w.commit();
    assertEquals(SIZE, w.numDocs());

    w.close();

    TestIndexWriter.assertNoUnreferencedFiles(dir, "leftover files after rolling updates");

    docs.close();

    // LUCENE-4455:
    SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
    long totalBytes = 0;
    for (SegmentCommitInfo sipc : infos) {
      totalBytes += sipc.sizeInBytes();
    }
    long totalBytes2 = 0;

    for (String fileName : dir.listAll()) {
      if (IndexFileNames.CODEC_FILE_PATTERN.matcher(fileName).matches()) {
        totalBytes2 += dir.fileLength(fileName);
      }
    }
    assertEquals(totalBytes2, totalBytes);
    dir.close();
  }
  public void testDeletes1() throws Exception {
    // IndexWriter.debug2 = System.out;
    Directory dir = new MockDirectoryWrapper(new Random(random().nextLong()), new RAMDirectory());
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    iwc.setMergeScheduler(new SerialMergeScheduler());
    iwc.setMaxBufferedDocs(5000);
    iwc.setRAMBufferSizeMB(100);
    RangeMergePolicy fsmp = new RangeMergePolicy(false);
    iwc.setMergePolicy(fsmp);
    IndexWriter writer = new IndexWriter(dir, iwc);
    for (int x = 0; x < 5; x++) {
      writer.addDocument(DocHelper.createDocument(x, "1", 2));
      // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
    }
    // System.out.println("commit1");
    writer.commit();
    assertEquals(1, writer.segmentInfos.size());
    for (int x = 5; x < 10; x++) {
      writer.addDocument(DocHelper.createDocument(x, "2", 2));
      // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
    }
    // System.out.println("commit2");
    writer.commit();
    assertEquals(2, writer.segmentInfos.size());

    for (int x = 10; x < 15; x++) {
      writer.addDocument(DocHelper.createDocument(x, "3", 2));
      // System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
    }

    writer.deleteDocuments(new Term("id", "1"));

    writer.deleteDocuments(new Term("id", "11"));

    // flushing without applying deletes means
    // there will still be deletes in the segment infos
    writer.flush(false, false);
    assertTrue(writer.bufferedUpdatesStream.any());

    // get reader flushes pending deletes
    // so there should not be anymore
    IndexReader r1 = writer.getReader();
    assertFalse(writer.bufferedUpdatesStream.any());
    r1.close();

    // delete id:2 from the first segment
    // merge segments 0 and 1
    // which should apply the delete id:2
    writer.deleteDocuments(new Term("id", "2"));
    writer.flush(false, false);
    fsmp = (RangeMergePolicy) writer.getConfig().getMergePolicy();
    fsmp.doMerge = true;
    fsmp.start = 0;
    fsmp.length = 2;
    writer.maybeMerge();

    assertEquals(2, writer.segmentInfos.size());

    // id:2 shouldn't exist anymore because
    // it's been applied in the merge and now it's gone
    IndexReader r2 = writer.getReader();
    int[] id2docs = toDocsArray(new Term("id", "2"), null, r2);
    assertTrue(id2docs == null);
    r2.close();

    /**
     * // added docs are in the ram buffer for (int x = 15; x < 20; x++) {
     * writer.addDocument(TestIndexWriterReader.createDocument(x, "4", 2));
     * System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); }
     * assertTrue(writer.numRamDocs() > 0); // delete from the ram buffer writer.deleteDocuments(new
     * Term("id", Integer.toString(13)));
     *
     * <p>Term id3 = new Term("id", Integer.toString(3));
     *
     * <p>// delete from the 1st segment writer.deleteDocuments(id3);
     *
     * <p>assertTrue(writer.numRamDocs() > 0);
     *
     * <p>//System.out // .println("segdels1:" + writer.docWriter.deletesToString());
     *
     * <p>//assertTrue(writer.docWriter.segmentDeletes.size() > 0);
     *
     * <p>// we cause a merge to happen fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2;
     * System.out.println("maybeMerge "+writer.segmentInfos);
     *
     * <p>SegmentInfo info0 = writer.segmentInfos.info(0); SegmentInfo info1 =
     * writer.segmentInfos.info(1);
     *
     * <p>writer.maybeMerge(); System.out.println("maybeMerge after "+writer.segmentInfos); // there
     * should be docs in RAM assertTrue(writer.numRamDocs() > 0);
     *
     * <p>// assert we've merged the 1 and 2 segments // and still have a segment leftover == 2
     * assertEquals(2, writer.segmentInfos.size()); assertFalse(segThere(info0,
     * writer.segmentInfos)); assertFalse(segThere(info1, writer.segmentInfos));
     *
     * <p>//System.out.println("segdels2:" + writer.docWriter.deletesToString());
     *
     * <p>//assertTrue(writer.docWriter.segmentDeletes.size() > 0);
     *
     * <p>IndexReader r = writer.getReader(); IndexReader r1 = r.getSequentialSubReaders()[0];
     * printDelDocs(r1.getLiveDocs()); int[] docs = toDocsArray(id3, null, r);
     * System.out.println("id3 docs:"+Arrays.toString(docs)); // there shouldn't be any docs for
     * id:3 assertTrue(docs == null); r.close();
     *
     * <p>part2(writer, fsmp);
     */
    // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString());
    // System.out.println("close");
    writer.close();
    dir.close();
  }