public void testNumerics() throws Exception {
    Directory dir = newDirectory();
    Document doc = new Document();
    Field field = new NumericDocValuesField("numbers", 0);
    doc.add(field);

    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
      field.setLongValue(random().nextLong());
      iw.addDocument(doc);
      if (random().nextInt(17) == 0) {
        iw.commit();
      }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();

    NumericDocValues multi = MultiDocValues.getNumericValues(ir, "numbers");
    NumericDocValues single = merged.getNumericDocValues("numbers");
    for (int i = 0; i < numDocs; i++) {
      assertEquals(single.get(i), multi.get(i));
    }
    ir.close();
    ir2.close();
    dir.close();
  }
  @Override
  public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(
            random(),
            directory,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMergePolicy(newLogMergePolicy()));

    // Add series of docs with filterable fields : url, text and dates  flags
    addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101");
    addDoc(writer, "http://lucene.apache.org", "New release pending", "20040102");
    addDoc(writer, "http://lucene.apache.org", "Lucene 1.9 out now", "20050101");
    addDoc(writer, "http://www.bar.com", "Local man bites dog", "20040101");
    addDoc(writer, "http://www.bar.com", "Dog bites local man", "20040102");
    addDoc(writer, "http://www.bar.com", "Dog uses Lucene", "20050101");
    addDoc(writer, "http://lucene.apache.org", "Lucene 2.0 out", "20050101");
    addDoc(writer, "http://lucene.apache.org", "Oops. Lucene 2.1 out", "20050102");

    // Until we fix LUCENE-2348, the index must
    // have only 1 segment:
    writer.forceMerge(1);

    reader = writer.getReader();
    writer.close();
    searcher = newSearcher(reader);
  }
  /** make sure we downgrade positions and payloads correctly */
  public void testMixing() throws Exception {
    // no positions
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);

    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);

    for (int i = 0; i < 20; i++) {
      Document doc = new Document();
      if (i < 19 && random().nextBoolean()) {
        for (int j = 0; j < 50; j++) {
          doc.add(new TextField("foo", "i have positions", Field.Store.NO));
        }
      } else {
        for (int j = 0; j < 50; j++) {
          doc.add(new Field("foo", "i have no positions", ft));
        }
      }
      iw.addDocument(doc);
      iw.commit();
    }

    if (random().nextBoolean()) {
      iw.forceMerge(1);
    }

    DirectoryReader ir = iw.getReader();
    FieldInfos fis = MultiFields.getMergedFieldInfos(ir);
    assertEquals(IndexOptions.DOCS_AND_FREQS, fis.fieldInfo("foo").getIndexOptions());
    assertFalse(fis.fieldInfo("foo").hasPayloads());
    iw.close();
    ir.close();
    dir.close(); // checkindex
  }
  public void testSeekCeilNotFound() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    // Get empty string in there!
    doc.add(newStringField("field", "", Field.Store.NO));
    w.addDocument(doc);

    for (int i = 0; i < 36; i++) {
      doc = new Document();
      String term = "" + (char) (97 + i);
      String term2 = "a" + (char) (97 + i);
      doc.add(newTextField("field", term + " " + term2, Field.Store.NO));
      w.addDocument(doc);
    }

    w.forceMerge(1);
    IndexReader r = w.getReader();
    TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);
    assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef(new byte[] {0x22})));
    assertEquals("a", te.term().utf8ToString());
    assertEquals(1L, te.ord());
    r.close();
    w.close();
    dir.close();
  }
 public void testDifferentTypedDocValuesField2() throws Exception {
   Directory d = newDirectory();
   RandomIndexWriter w = new RandomIndexWriter(random(), d);
   Document doc = new Document();
   // Index doc values are single-valued so we should not
   // be able to add same field more than once:
   Field f = new NumericDocValuesField("field", 17);
   doc.add(f);
   doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
   try {
     w.addDocument(doc);
     fail("didn't hit expected exception");
   } catch (IllegalArgumentException iae) {
     // expected
   }
   doc = new Document();
   doc.add(f);
   w.addDocument(doc);
   w.forceMerge(1);
   DirectoryReader r = w.getReader();
   assertEquals(17, getOnlySegmentReader(r).getNumericDocValues("field").get(0));
   r.close();
   w.close();
   d.close();
 }
  public void testMultiValuedDocValuesField() throws Exception {
    Directory d = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Document doc = new Document();
    Field f = new NumericDocValuesField("field", 17);
    // Index doc values are single-valued so we should not
    // be able to add same field more than once:
    doc.add(f);
    doc.add(f);
    try {
      w.addDocument(doc);
      fail("didn't hit expected exception");
    } catch (IllegalArgumentException iae) {
      // expected
    }

    doc = new Document();
    doc.add(f);
    w.addDocument(doc);
    w.forceMerge(1);
    DirectoryReader r = w.getReader();
    w.close();
    assertEquals(17, FieldCache.DEFAULT.getInts(getOnlySegmentReader(r), "field", false).get(0));
    r.close();
    d.close();
  }
  public void testThreeBlocks() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    List<String> terms = new ArrayList<>();
    for (int i = 0; i < 36; i++) {
      Document doc = new Document();
      String term = "" + (char) (97 + i);
      terms.add(term);
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term);
      }
      doc.add(newTextField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    for (int i = 0; i < 36; i++) {
      Document doc = new Document();
      String term = "m" + (char) (97 + i);
      terms.add(term);
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term);
      }
      doc.add(newTextField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    for (int i = 0; i < 36; i++) {
      Document doc = new Document();
      String term = "mo" + (char) (97 + i);
      terms.add(term);
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term);
      }
      doc.add(newTextField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    w.forceMerge(1);
    IndexReader r = w.getReader();
    TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);

    if (VERBOSE) {
      while (te.next() != null) {
        System.out.println("TERM: " + te.ord() + " " + te.term().utf8ToString());
      }
    }

    assertTrue(te.seekExact(new BytesRef("mo")));
    assertEquals(27, te.ord());

    te.seekExact(90);
    assertEquals(new BytesRef("s"), te.term());

    testEnum(te, terms);

    r.close();
    w.close();
    dir.close();
  }
  public void testSortedNumeric() throws Exception {
    Directory dir = newDirectory();

    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      int numValues = random().nextInt(5);
      for (int j = 0; j < numValues; j++) {
        doc.add(
            new SortedNumericDocValuesField(
                "nums", TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE)));
      }
      iw.addDocument(doc);
      if (random().nextInt(17) == 0) {
        iw.commit();
      }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();

    SortedNumericDocValues multi = MultiDocValues.getSortedNumericValues(ir, "nums");
    SortedNumericDocValues single = merged.getSortedNumericDocValues("nums");
    if (multi == null) {
      assertNull(single);
    } else {
      // check values
      for (int i = 0; i < numDocs; i++) {
        single.setDocument(i);
        ArrayList<Long> expectedList = new ArrayList<>();
        for (int j = 0; j < single.count(); j++) {
          expectedList.add(single.valueAt(j));
        }

        multi.setDocument(i);
        assertEquals(expectedList.size(), multi.count());
        for (int j = 0; j < single.count(); j++) {
          assertEquals(expectedList.get(j).longValue(), multi.valueAt(j));
        }
      }
    }

    ir.close();
    ir2.close();
    dir.close();
  }
  /**
   * Indexes at least 1 document with f1, and at least 1 document with f2. returns the norms for
   * "field".
   */
  NumericDocValues getNorms(String field, Field f1, Field f2) throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
            .setMergePolicy(newLogMergePolicy());
    RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);

    // add f1
    Document d = new Document();
    d.add(f1);
    riw.addDocument(d);

    // add f2
    d = new Document();
    d.add(f2);
    riw.addDocument(d);

    // add a mix of f1's and f2's
    int numExtraDocs = TestUtil.nextInt(random(), 1, 1000);
    for (int i = 0; i < numExtraDocs; i++) {
      d = new Document();
      d.add(random().nextBoolean() ? f1 : f2);
      riw.addDocument(d);
    }

    IndexReader ir1 = riw.getReader();
    // todo: generalize
    NumericDocValues norms1 = MultiDocValues.getNormValues(ir1, field);

    // fully merge and validate MultiNorms against single segment.
    riw.forceMerge(1);
    DirectoryReader ir2 = riw.getReader();
    NumericDocValues norms2 = getOnlySegmentReader(ir2).getNormValues(field);

    if (norms1 == null) {
      assertNull(norms2);
    } else {
      for (int docID = 0; docID < ir1.maxDoc(); docID++) {
        assertEquals(norms1.get(docID), norms2.get(docID));
      }
    }
    ir1.close();
    ir2.close();
    riw.close();
    dir.close();
    return norms1;
  }
  public void testDocsWithField() throws Exception {
    Directory dir = newDirectory();

    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      if (random().nextInt(4) >= 0) {
        doc.add(new NumericDocValuesField("numbers", random().nextLong()));
      }
      doc.add(new NumericDocValuesField("numbersAlways", random().nextLong()));
      iw.addDocument(doc);
      if (random().nextInt(17) == 0) {
        iw.commit();
      }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();

    Bits multi = MultiDocValues.getDocsWithField(ir, "numbers");
    Bits single = merged.getDocsWithField("numbers");
    if (multi == null) {
      assertNull(single);
    } else {
      assertEquals(single.length(), multi.length());
      for (int i = 0; i < numDocs; i++) {
        assertEquals(single.get(i), multi.get(i));
      }
    }

    multi = MultiDocValues.getDocsWithField(ir, "numbersAlways");
    single = merged.getDocsWithField("numbersAlways");
    assertEquals(single.length(), multi.length());
    for (int i = 0; i < numDocs; i++) {
      assertEquals(single.get(i), multi.get(i));
    }
    ir.close();
    ir2.close();
    dir.close();
  }
Beispiel #11
0
 public void testSimple() throws Exception {
   Directory dir = newDirectory();
   RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
   int numDocs = atLeast(100);
   for (int i = 0; i < numDocs; i++) {
     iw.addDocument(doc());
   }
   IndexReader ir = iw.getReader();
   verifyCount(ir);
   ir.close();
   iw.forceMerge(1);
   ir = iw.getReader();
   verifyCount(ir);
   ir.close();
   iw.close();
   dir.close();
 }
  public void testSorted() throws Exception {
    Directory dir = newDirectory();
    Document doc = new Document();
    Field field = new SortedDocValuesField("bytes", new BytesRef());
    doc.add(field);

    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
      BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random()));
      field.setBytesValue(ref);
      if (random().nextInt(7) == 0) {
        iw.addDocument(new Document());
      }
      iw.addDocument(doc);
      if (random().nextInt(17) == 0) {
        iw.commit();
      }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();

    SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes");
    SortedDocValues single = merged.getSortedDocValues("bytes");
    assertEquals(single.getValueCount(), multi.getValueCount());
    for (int i = 0; i < numDocs; i++) {
      // check ord
      assertEquals(single.getOrd(i), multi.getOrd(i));
      // check value
      final BytesRef expected = BytesRef.deepCopyOf(single.get(i));
      final BytesRef actual = multi.get(i);
      assertEquals(expected, actual);
    }
    ir.close();
    ir2.close();
    dir.close();
  }
  public void testAddIndexes() throws IOException {
    Directory d1 = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d1);
    Document doc = new Document();
    doc.add(newStringField("id", "1", Field.Store.YES));
    doc.add(new NumericDocValuesField("dv", 1));
    w.addDocument(doc);
    IndexReader r1 = w.getReader();
    w.close();

    Directory d2 = newDirectory();
    w = new RandomIndexWriter(random(), d2);
    doc = new Document();
    doc.add(newStringField("id", "2", Field.Store.YES));
    doc.add(new NumericDocValuesField("dv", 2));
    w.addDocument(doc);
    IndexReader r2 = w.getReader();
    w.close();

    Directory d3 = newDirectory();
    w = new RandomIndexWriter(random(), d3);
    w.addIndexes(SlowCompositeReaderWrapper.wrap(r1), SlowCompositeReaderWrapper.wrap(r2));
    r1.close();
    d1.close();
    r2.close();
    d2.close();

    w.forceMerge(1);
    DirectoryReader r3 = w.getReader();
    w.close();
    AtomicReader sr = getOnlySegmentReader(r3);
    assertEquals(2, sr.numDocs());
    NumericDocValues docValues = sr.getNumericDocValues("dv");
    assertNotNull(docValues);
    r3.close();
    d3.close();
  }
  public void testTwoBlocks() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    List<String> terms = new ArrayList<>();
    for (int i = 0; i < 36; i++) {
      Document doc = new Document();
      String term = "" + (char) (97 + i);
      terms.add(term);
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term);
      }
      doc.add(newTextField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    for (int i = 0; i < 36; i++) {
      Document doc = new Document();
      String term = "m" + (char) (97 + i);
      terms.add(term);
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term);
      }
      doc.add(newTextField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    if (VERBOSE) {
      System.out.println("TEST: now forceMerge");
    }
    w.forceMerge(1);
    IndexReader r = w.getReader();
    TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);

    assertTrue(te.seekExact(new BytesRef("mo")));
    assertEquals(27, te.ord());

    te.seekExact(54);
    assertEquals(new BytesRef("s"), te.term());

    Collections.sort(terms);

    for (int i = terms.size() - 1; i >= 0; i--) {
      te.seekExact(i);
      assertEquals(i, te.ord());
      assertEquals(terms.get(i), te.term().utf8ToString());
    }

    int iters = atLeast(1000);
    for (int iter = 0; iter < iters; iter++) {
      int ord = random().nextInt(terms.size());
      BytesRef term = new BytesRef(terms.get(ord));
      if (random().nextBoolean()) {
        if (VERBOSE) {
          System.out.println("TEST: iter=" + iter + " seek to ord=" + ord + " of " + terms.size());
        }
        te.seekExact(ord);
      } else {
        if (VERBOSE) {
          System.out.println(
              "TEST: iter="
                  + iter
                  + " seek to term="
                  + terms.get(ord)
                  + " ord="
                  + ord
                  + " of "
                  + terms.size());
        }
        te.seekExact(term);
      }
      assertEquals(ord, te.ord());
      assertEquals(term, te.term());
    }

    r.close();
    w.close();
    dir.close();
  }
Beispiel #15
0
  public void testSortedTermsEnum() throws IOException {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    iwconfig.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);

    Document doc = new Document();
    doc.add(new StringField("field", "hello", Field.Store.NO));
    iwriter.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("field", "world", Field.Store.NO));
    iwriter.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("field", "beer", Field.Store.NO));
    iwriter.addDocument(doc);
    iwriter.forceMerge(1);

    DirectoryReader ireader = iwriter.getReader();
    iwriter.close();

    AtomicReader ar = getOnlySegmentReader(ireader);
    SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field");
    assertEquals(3, dv.getValueCount());

    TermsEnum termsEnum = dv.termsEnum();

    // next()
    assertEquals("beer", termsEnum.next().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertEquals("hello", termsEnum.next().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertEquals("world", termsEnum.next().utf8ToString());
    assertEquals(2, termsEnum.ord());

    // seekCeil()
    assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));

    // seekExact()
    assertTrue(termsEnum.seekExact(new BytesRef("beer"), true));
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertTrue(termsEnum.seekExact(new BytesRef("hello"), true));
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertTrue(termsEnum.seekExact(new BytesRef("world"), true));
    assertEquals("world", termsEnum.term().utf8ToString());
    assertEquals(2, termsEnum.ord());
    assertFalse(termsEnum.seekExact(new BytesRef("bogus"), true));

    // seek(ord)
    termsEnum.seekExact(0);
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    termsEnum.seekExact(1);
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    termsEnum.seekExact(2);
    assertEquals("world", termsEnum.term().utf8ToString());
    assertEquals(2, termsEnum.ord());
    ireader.close();
    directory.close();
  }
  public void testRandomStoredFields() throws IOException {
    Directory dir = newDirectory();
    Random rand = random();
    RandomIndexWriter w =
        new RandomIndexWriter(
            rand,
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(_TestUtil.nextInt(rand, 5, 20)));
    // w.w.setNoCFSRatio(0.0);
    final int docCount = atLeast(200);
    final int fieldCount = _TestUtil.nextInt(rand, 1, 5);

    final List<Integer> fieldIDs = new ArrayList<Integer>();

    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setTokenized(false);
    Field idField = newField("id", "", customType);

    for (int i = 0; i < fieldCount; i++) {
      fieldIDs.add(i);
    }

    final Map<String, Document> docs = new HashMap<String, Document>();

    if (VERBOSE) {
      System.out.println("TEST: build index docCount=" + docCount);
    }

    FieldType customType2 = new FieldType();
    customType2.setStored(true);
    for (int i = 0; i < docCount; i++) {
      Document doc = new Document();
      doc.add(idField);
      final String id = "" + i;
      idField.setStringValue(id);
      docs.put(id, doc);
      if (VERBOSE) {
        System.out.println("TEST: add doc id=" + id);
      }

      for (int field : fieldIDs) {
        final String s;
        if (rand.nextInt(4) != 3) {
          s = _TestUtil.randomUnicodeString(rand, 1000);
          doc.add(newField("f" + field, s, customType2));
        } else {
          s = null;
        }
      }
      w.addDocument(doc);
      if (rand.nextInt(50) == 17) {
        // mixup binding of field name -> Number every so often
        Collections.shuffle(fieldIDs);
      }
      if (rand.nextInt(5) == 3 && i > 0) {
        final String delID = "" + rand.nextInt(i);
        if (VERBOSE) {
          System.out.println("TEST: delete doc id=" + delID);
        }
        w.deleteDocuments(new Term("id", delID));
        docs.remove(delID);
      }
    }

    if (VERBOSE) {
      System.out.println("TEST: " + docs.size() + " docs in index; now load fields");
    }
    if (docs.size() > 0) {
      String[] idsList = docs.keySet().toArray(new String[docs.size()]);

      for (int x = 0; x < 2; x++) {
        IndexReader r = w.getReader();
        IndexSearcher s = newSearcher(r);

        if (VERBOSE) {
          System.out.println("TEST: cycle x=" + x + " r=" + r);
        }

        int num = atLeast(1000);
        for (int iter = 0; iter < num; iter++) {
          String testID = idsList[rand.nextInt(idsList.length)];
          if (VERBOSE) {
            System.out.println("TEST: test id=" + testID);
          }
          TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1);
          assertEquals(1, hits.totalHits);
          Document doc = r.document(hits.scoreDocs[0].doc);
          Document docExp = docs.get(testID);
          for (int i = 0; i < fieldCount; i++) {
            assertEquals(
                "doc " + testID + ", field f" + fieldCount + " is wrong",
                docExp.get("f" + i),
                doc.get("f" + i));
          }
        }
        r.close();
        w.forceMerge(1);
      }
    }
    w.close();
    dir.close();
  }
  @Nightly
  public void testBigDocuments() throws IOException {
    // "big" as "much bigger than the chunk size"
    // for this test we force a FS dir
    // we can't just use newFSDirectory, because this test doesn't really index anything.
    // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
    Directory dir =
        new MockDirectoryWrapper(
            random(), new MMapDirectory(_TestUtil.getTempDir("testBigDocuments")));
    IndexWriterConfig iwConf =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);

    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER);
    }

    final Document emptyDoc = new Document(); // emptyDoc
    final Document bigDoc1 = new Document(); // lot of small fields
    final Document bigDoc2 = new Document(); // 1 very big field

    final Field idField = new StringField("id", "", Store.NO);
    emptyDoc.add(idField);
    bigDoc1.add(idField);
    bigDoc2.add(idField);

    final FieldType onlyStored = new FieldType(StringField.TYPE_STORED);
    onlyStored.setIndexed(false);

    final Field smallField =
        new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored);
    final int numFields = RandomInts.randomIntBetween(random(), 500000, 1000000);
    for (int i = 0; i < numFields; ++i) {
      bigDoc1.add(smallField);
    }

    final Field bigField =
        new Field(
            "fld",
            randomByteArray(RandomInts.randomIntBetween(random(), 1000000, 5000000), 2),
            onlyStored);
    bigDoc2.add(bigField);

    final int numDocs = atLeast(5);
    final Document[] docs = new Document[numDocs];
    for (int i = 0; i < numDocs; ++i) {
      docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2));
    }
    for (int i = 0; i < numDocs; ++i) {
      idField.setStringValue("" + i);
      iw.addDocument(docs[i]);
      if (random().nextInt(numDocs) == 0) {
        iw.commit();
      }
    }
    iw.commit();
    iw.forceMerge(1); // look at what happens when big docs are merged
    final DirectoryReader rd = DirectoryReader.open(dir);
    final IndexSearcher searcher = new IndexSearcher(rd);
    for (int i = 0; i < numDocs; ++i) {
      final Query query = new TermQuery(new Term("id", "" + i));
      final TopDocs topDocs = searcher.search(query, 1);
      assertEquals("" + i, 1, topDocs.totalHits);
      final Document doc = rd.document(topDocs.scoreDocs[0].doc);
      assertNotNull(doc);
      final IndexableField[] fieldValues = doc.getFields("fld");
      assertEquals(docs[i].getFields("fld").length, fieldValues.length);
      if (fieldValues.length > 0) {
        assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue());
      }
    }
    rd.close();
    iw.close();
    dir.close();
  }
  public void testWriteReadMerge() throws IOException {
    // get another codec, other than the default: so we are merging segments across different codecs
    final Codec otherCodec;
    if ("SimpleText".equals(Codec.getDefault().getName())) {
      otherCodec = new Lucene46Codec();
    } else {
      otherCodec = new SimpleTextCodec();
    }
    Directory dir = newDirectory();
    IndexWriterConfig iwConf =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf.clone());

    final int docCount = atLeast(200);
    final byte[][][] data = new byte[docCount][][];
    for (int i = 0; i < docCount; ++i) {
      final int fieldCount =
          rarely()
              ? RandomInts.randomIntBetween(random(), 1, 500)
              : RandomInts.randomIntBetween(random(), 1, 5);
      data[i] = new byte[fieldCount][];
      for (int j = 0; j < fieldCount; ++j) {
        final int length = rarely() ? random().nextInt(1000) : random().nextInt(10);
        final int max = rarely() ? 256 : 2;
        data[i][j] = randomByteArray(length, max);
      }
    }

    final FieldType type = new FieldType(StringField.TYPE_STORED);
    type.setIndexed(false);
    type.freeze();
    IntField id = new IntField("id", 0, Store.YES);
    for (int i = 0; i < data.length; ++i) {
      Document doc = new Document();
      doc.add(id);
      id.setIntValue(i);
      for (int j = 0; j < data[i].length; ++j) {
        Field f = new Field("bytes" + j, data[i][j], type);
        doc.add(f);
      }
      iw.w.addDocument(doc);
      if (random().nextBoolean() && (i % (data.length / 10) == 0)) {
        iw.w.close();
        // test merging against a non-compressing codec
        if (iwConf.getCodec() == otherCodec) {
          iwConf.setCodec(Codec.getDefault());
        } else {
          iwConf.setCodec(otherCodec);
        }
        iw = new RandomIndexWriter(random(), dir, iwConf.clone());
      }
    }

    for (int i = 0; i < 10; ++i) {
      final int min = random().nextInt(data.length);
      final int max = min + random().nextInt(20);
      iw.deleteDocuments(NumericRangeQuery.newIntRange("id", min, max, true, false));
    }

    iw.forceMerge(2); // force merges with deletions

    iw.commit();

    final DirectoryReader ir = DirectoryReader.open(dir);
    assertTrue(ir.numDocs() > 0);
    int numDocs = 0;
    for (int i = 0; i < ir.maxDoc(); ++i) {
      final Document doc = ir.document(i);
      if (doc == null) {
        continue;
      }
      ++numDocs;
      final int docId = doc.getField("id").numericValue().intValue();
      assertEquals(data[docId].length + 1, doc.getFields().size());
      for (int j = 0; j < data[docId].length; ++j) {
        final byte[] arr = data[docId][j];
        final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j);
        final byte[] arr2 =
            Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length);
        assertArrayEquals(arr, arr2);
      }
    }
    assertTrue(ir.numDocs() <= numDocs);
    ir.close();

    iw.deleteAll();
    iw.commit();
    iw.forceMerge(1);

    iw.close();
    dir.close();
  }
  // tries to make more dups than testSortedSet
  public void testSortedSetWithDups() throws Exception {
    Directory dir = newDirectory();

    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      int numValues = random().nextInt(5);
      for (int j = 0; j < numValues; j++) {
        doc.add(
            new SortedSetDocValuesField(
                "bytes", new BytesRef(TestUtil.randomSimpleString(random(), 2))));
      }
      iw.addDocument(doc);
      if (random().nextInt(17) == 0) {
        iw.commit();
      }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();

    SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes");
    SortedSetDocValues single = merged.getSortedSetDocValues("bytes");
    if (multi == null) {
      assertNull(single);
    } else {
      assertEquals(single.getValueCount(), multi.getValueCount());
      // check values
      for (long i = 0; i < single.getValueCount(); i++) {
        final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i));
        final BytesRef actual = multi.lookupOrd(i);
        assertEquals(expected, actual);
      }
      // check ord list
      for (int i = 0; i < numDocs; i++) {
        single.setDocument(i);
        ArrayList<Long> expectedList = new ArrayList<>();
        long ord;
        while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
          expectedList.add(ord);
        }

        multi.setDocument(i);
        int upto = 0;
        while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
          assertEquals(expectedList.get(upto).longValue(), ord);
          upto++;
        }
        assertEquals(expectedList.size(), upto);
      }
    }

    ir.close();
    ir2.close();
    dir.close();
  }