コード例 #1
0
  public void testSimpleSkip() throws IOException {
    Directory dir = new CountingRAMDirectory(new RAMDirectory());
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
                .setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()))
                .setMergePolicy(newLogMergePolicy()));
    Term term = new Term("test", "a");
    for (int i = 0; i < 5000; i++) {
      Document d1 = new Document();
      d1.add(newTextField(term.field(), term.text(), Field.Store.NO));
      writer.addDocument(d1);
    }
    writer.commit();
    writer.forceMerge(1);
    writer.close();

    AtomicReader reader = getOnlySegmentReader(DirectoryReader.open(dir));

    for (int i = 0; i < 2; i++) {
      counter = 0;
      DocsAndPositionsEnum tp = reader.termPositionsEnum(term);
      checkSkipTo(tp, 14, 185); // no skips
      checkSkipTo(tp, 17, 190); // one skip on level 0
      checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0

      // this test would fail if we had only one skip level,
      // because than more bytes would be read from the freqStream
      checkSkipTo(tp, 4800, 250); // one skip on level 2
    }
  }
コード例 #2
0
  public void testDocsWithField() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    IndexWriter writer = new IndexWriter(dir, conf);
    Document doc = new Document();
    doc.add(new NumericDocValuesField("dv", 0L));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new TextField("dv", "some text", Field.Store.NO));
    doc.add(new NumericDocValuesField("dv", 0L));
    writer.addDocument(doc);

    DirectoryReader r = writer.getReader();
    writer.close();

    AtomicReader subR = r.leaves().get(0).reader();
    assertEquals(2, subR.numDocs());

    Bits bits = FieldCache.DEFAULT.getDocsWithField(subR, "dv");
    assertTrue(bits.get(0));
    assertTrue(bits.get(1));
    r.close();
    dir.close();
  }
コード例 #3
0
ファイル: MergeState.java プロジェクト: bozydar/solr
 /** Creates a {@link DocMap} instance appropriate for this reader. */
 public static DocMap build(AtomicReader reader) {
   final int maxDoc = reader.maxDoc();
   if (!reader.hasDeletions()) {
     return new NoDelDocMap(maxDoc);
   }
   final Bits liveDocs = reader.getLiveDocs();
   return build(maxDoc, liveDocs);
 }
コード例 #4
0
 protected AtomicReaderContext refreshReader() throws Exception {
   if (readerContext != null) {
     readerContext.reader().close();
   }
   AtomicReader reader =
       SlowCompositeReaderWrapper.wrap(topLevelReader = DirectoryReader.open(writer, true));
   readerContext = reader.getContext();
   return readerContext;
 }
コード例 #5
0
  // indexes Integer.MAX_VALUE docs with a fixed binary field
  public void testFixedSorted() throws Exception {
    BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BFixedSorted"));
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }

    IndexWriter w =
        new IndexWriter(
            dir,
            new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                .setRAMBufferSizeMB(256.0)
                .setMergeScheduler(new ConcurrentMergeScheduler())
                .setMergePolicy(newLogMergePolicy(false, 10))
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));

    Document doc = new Document();
    byte bytes[] = new byte[2];
    BytesRef data = new BytesRef(bytes);
    SortedDocValuesField dvField = new SortedDocValuesField("dv", data);
    doc.add(dvField);

    for (int i = 0; i < Integer.MAX_VALUE; i++) {
      bytes[0] = (byte) (i >> 8);
      bytes[1] = (byte) i;
      w.addDocument(doc);
      if (i % 100000 == 0) {
        System.out.println("indexed: " + i);
        System.out.flush();
      }
    }

    w.forceMerge(1);
    w.close();

    System.out.println("verifying...");
    System.out.flush();

    DirectoryReader r = DirectoryReader.open(dir);
    int expectedValue = 0;
    for (AtomicReaderContext context : r.leaves()) {
      AtomicReader reader = context.reader();
      BytesRef scratch = new BytesRef();
      BinaryDocValues dv = reader.getSortedDocValues("dv");
      for (int i = 0; i < reader.maxDoc(); i++) {
        bytes[0] = (byte) (expectedValue >> 8);
        bytes[1] = (byte) expectedValue;
        dv.get(i, scratch);
        assertEquals(data, scratch);
        expectedValue++;
      }
    }

    r.close();
    dir.close();
  }
コード例 #6
0
 @Test
 public void testNoTerms() throws Exception {
   FieldDataTermsFilter hFilterBytes =
       FieldDataTermsFilter.newBytes(getFieldData(strMapper), new ObjectOpenHashSet<BytesRef>());
   FieldDataTermsFilter hFilterLongs =
       FieldDataTermsFilter.newLongs(getFieldData(lngMapper), new LongOpenHashSet());
   FieldDataTermsFilter hFilterDoubles =
       FieldDataTermsFilter.newDoubles(getFieldData(dblMapper), new DoubleOpenHashSet());
   assertNull(hFilterBytes.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
   assertNull(hFilterLongs.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
   assertNull(hFilterDoubles.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
 }
コード例 #7
0
 /**
  * Expert: Get the list of readers to merge. Note that this list does not necessarily match the
  * list of segments to merge and should only be used to feed SegmentMerger to initialize a
  * merge. When a {@link OneMerge} reorders doc IDs, it must override {@link #getDocMap} too so
  * that deletes that happened during the merge can be applied to the newly merged segment.
  */
 public List<AtomicReader> getMergeReaders() throws IOException {
   if (readers == null) {
     throw new IllegalStateException(
         "IndexWriter has not initialized readers from the segment infos yet");
   }
   final List<AtomicReader> readers = new ArrayList<AtomicReader>(this.readers.size());
   for (AtomicReader reader : this.readers) {
     if (reader.numDocs() > 0) {
       readers.add(reader);
     }
   }
   return Collections.unmodifiableList(readers);
 }
コード例 #8
0
 private static void printDocs(DirectoryReader r) throws Throwable {
   for (AtomicReaderContext ctx : r.leaves()) {
     // TODO: improve this
     AtomicReader sub = ctx.reader();
     Bits liveDocs = sub.getLiveDocs();
     System.out.println("  " + ((SegmentReader) sub).getSegmentInfo());
     for (int docID = 0; docID < sub.maxDoc(); docID++) {
       StoredDocument doc = sub.document(docID);
       if (liveDocs == null || liveDocs.get(docID)) {
         System.out.println("    docID=" + docID + " id:" + doc.get("id"));
       } else {
         System.out.println("    DEL docID=" + docID + " id:" + doc.get("id"));
       }
     }
   }
 }
コード例 #9
0
  public void testSimple() throws Exception {
    Directory dir = newDirectory();
    final RandomIndexWriter w =
        new RandomIndexWriter(
            random(),
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMergePolicy(newLogMergePolicy()));
    Document doc = new Document();
    Field field = newTextField("field", "", Field.Store.NO);
    doc.add(field);
    field.setStringValue("a b c");
    w.addDocument(doc);

    field.setStringValue("d e f");
    w.addDocument(doc);

    field.setStringValue("a f");
    w.addDocument(doc);

    final IndexReader r = w.getReader();
    w.close();

    final AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
    final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
    SortedSetDocValues iter = dto.iterator(ar);

    iter.setDocument(0);
    assertEquals(0, iter.nextOrd());
    assertEquals(1, iter.nextOrd());
    assertEquals(2, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

    iter.setDocument(1);
    assertEquals(3, iter.nextOrd());
    assertEquals(4, iter.nextOrd());
    assertEquals(5, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

    iter.setDocument(2);
    assertEquals(0, iter.nextOrd());
    assertEquals(5, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

    r.close();
    dir.close();
  }
コード例 #10
0
 @After
 public void tearDown() throws Exception {
   super.tearDown();
   reader.close();
   writer.close();
   ifdService.clear();
   SearchContext.removeCurrent();
 }
コード例 #11
0
  public void testAddIndexes() throws IOException {
    Directory d1 = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d1);
    Document doc = new Document();
    doc.add(newStringField("id", "1", Field.Store.YES));
    doc.add(new NumericDocValuesField("dv", 1));
    w.addDocument(doc);
    IndexReader r1 = w.getReader();
    w.close();

    Directory d2 = newDirectory();
    w = new RandomIndexWriter(random(), d2);
    doc = new Document();
    doc.add(newStringField("id", "2", Field.Store.YES));
    doc.add(new NumericDocValuesField("dv", 2));
    w.addDocument(doc);
    IndexReader r2 = w.getReader();
    w.close();

    Directory d3 = newDirectory();
    w = new RandomIndexWriter(random(), d3);
    w.addIndexes(SlowCompositeReaderWrapper.wrap(r1), SlowCompositeReaderWrapper.wrap(r2));
    r1.close();
    d1.close();
    r2.close();
    d2.close();

    w.forceMerge(1);
    DirectoryReader r3 = w.getReader();
    w.close();
    AtomicReader sr = getOnlySegmentReader(r3);
    assertEquals(2, sr.numDocs());
    NumericDocValues docValues = sr.getNumericDocValues("dv");
    assertNotNull(docValues);
    r3.close();
    d3.close();
  }
コード例 #12
0
  @Test
  public void testDoubles() throws Exception {
    List<Integer> docs = Arrays.asList(1, 5, 7);

    DoubleOpenHashSet hTerms = new DoubleOpenHashSet();
    List<Double> cTerms = new ArrayList<Double>(docs.size());
    for (int i = 0; i < docs.size(); i++) {
      double term = Double.valueOf(docs.get(i));
      hTerms.add(term);
      cTerms.add(term);
    }

    FieldDataTermsFilter hFilter = FieldDataTermsFilter.newDoubles(getFieldData(dblMapper), hTerms);

    int size = reader.maxDoc();
    FixedBitSet result = new FixedBitSet(size);

    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));
    result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
    assertThat(result.cardinality(), equalTo(docs.size()));
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertThat(result.get(i), equalTo(docs.contains(i)));
    }

    // filter from mapper
    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));
    result.or(
        dblMapper
            .termsFilter(ifdService, cTerms, null)
            .getDocIdSet(reader.getContext(), reader.getLiveDocs())
            .iterator());
    assertThat(result.cardinality(), equalTo(docs.size()));
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertThat(result.get(i), equalTo(docs.contains(i)));
    }

    hFilter = FieldDataTermsFilter.newDoubles(getFieldData(lngMapper), hTerms);
    assertNull(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
  }
コード例 #13
0
    public static TermsEnum filter(
        TermsEnum toFilter, Terms terms, AtomicReader reader, Settings settings)
        throws IOException {
      int docCount = terms.getDocCount();
      if (docCount == -1) {
        docCount = reader.maxDoc();
      }
      final double minFrequency = settings.getAsDouble("min", 0d);
      final double maxFrequency = settings.getAsDouble("max", docCount + 1d);
      final double minSegmentSize = settings.getAsInt("min_segment_size", 0);
      if (minSegmentSize < docCount) {
        final int minFreq =
            minFrequency >= 1.0 ? (int) minFrequency : (int) (docCount * minFrequency);
        final int maxFreq =
            maxFrequency >= 1.0 ? (int) maxFrequency : (int) (docCount * maxFrequency);
        assert minFreq < maxFreq;
        return new FrequencyFilter(toFilter, minFreq, maxFreq);
      }

      return toFilter;
    }
コード例 #14
0
  @Override
  public DoubleArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
    AtomicReader reader = context.reader();

    Terms terms = reader.terms(getFieldNames().indexName());
    if (terms == null) {
      return DoubleArrayAtomicFieldData.EMPTY;
    }

    // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
    final TDoubleArrayList values = new TDoubleArrayList();
    ArrayList<int[]> ordinals = new ArrayList<int[]>();
    int[] idx = new int[reader.maxDoc()];
    ordinals.add(new int[reader.maxDoc()]);

    values.add(0); // first "t" indicates null value
    int termOrd = 1; // current term number

    TermsEnum termsEnum = terms.iterator(null);
    try {
      DocsEnum docsEnum = null;
      for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
        values.add(FieldCache.NUMERIC_UTILS_DOUBLE_PARSER.parseDouble(term));
        docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0);
        for (int docId = docsEnum.nextDoc();
            docId != DocsEnum.NO_MORE_DOCS;
            docId = docsEnum.nextDoc()) {
          int[] ordinal;
          if (idx[docId] >= ordinals.size()) {
            ordinal = new int[reader.maxDoc()];
            ordinals.add(ordinal);
          } else {
            ordinal = ordinals.get(idx[docId]);
          }
          ordinal[docId] = termOrd;
          idx[docId]++;
        }
        termOrd++;
      }
    } catch (RuntimeException e) {
      if (e.getClass().getName().endsWith("StopFillCacheException")) {
        // all is well, in case numeric parsers are used.
      } else {
        throw e;
      }
    }

    if (ordinals.size() == 1) {
      int[] nativeOrdinals = ordinals.get(0);
      FixedBitSet set = new FixedBitSet(reader.maxDoc());
      double[] sValues = new double[reader.maxDoc()];
      boolean allHaveValue = true;
      for (int i = 0; i < nativeOrdinals.length; i++) {
        int nativeOrdinal = nativeOrdinals[i];
        if (nativeOrdinal == 0) {
          allHaveValue = false;
        } else {
          set.set(i);
          sValues[i] = values.get(nativeOrdinal);
        }
      }
      if (allHaveValue) {
        return new DoubleArrayAtomicFieldData.Single(sValues, reader.maxDoc());
      } else {
        return new DoubleArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
      }
    } else {
      int[][] nativeOrdinals = new int[ordinals.size()][];
      for (int i = 0; i < nativeOrdinals.length; i++) {
        nativeOrdinals[i] = ordinals.get(i);
      }
      return new DoubleArrayAtomicFieldData.WithOrdinals(
          values.toArray(new double[values.size()]),
          reader.maxDoc(),
          Ordinals.Factories.createFromFlatOrdinals(
              nativeOrdinals, termOrd, fieldDataType.getSettings()));
    }
  }
コード例 #15
0
  @Test
  public void testBytes() throws Exception {
    List<Integer> docs = Arrays.asList(1, 5, 7);

    ObjectOpenHashSet<BytesRef> hTerms = new ObjectOpenHashSet<BytesRef>();
    List<BytesRef> cTerms = new ArrayList<BytesRef>(docs.size());
    for (int i = 0; i < docs.size(); i++) {
      BytesRef term = new BytesRef("str" + docs.get(i));
      hTerms.add(term);
      cTerms.add(term);
    }

    FieldDataTermsFilter hFilter = FieldDataTermsFilter.newBytes(getFieldData(strMapper), hTerms);

    int size = reader.maxDoc();
    FixedBitSet result = new FixedBitSet(size);

    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));
    result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
    assertThat(result.cardinality(), equalTo(docs.size()));
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertThat(result.get(i), equalTo(docs.contains(i)));
    }

    // filter from mapper
    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));
    result.or(
        strMapper
            .termsFilter(ifdService, cTerms, null)
            .getDocIdSet(reader.getContext(), reader.getLiveDocs())
            .iterator());
    assertThat(result.cardinality(), equalTo(docs.size()));
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertThat(result.get(i), equalTo(docs.contains(i)));
    }

    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));

    // filter on a numeric field using BytesRef terms
    // should not match any docs
    hFilter = FieldDataTermsFilter.newBytes(getFieldData(lngMapper), hTerms);
    result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
    assertThat(result.cardinality(), equalTo(0));

    // filter on a numeric field using BytesRef terms
    // should not match any docs
    hFilter = FieldDataTermsFilter.newBytes(getFieldData(dblMapper), hTerms);
    result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
    assertThat(result.cardinality(), equalTo(0));
  }
コード例 #16
0
  private void verify(AtomicReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
      throws Exception {

    final DocTermOrds dto =
        new DocTermOrds(
            r,
            r.getLiveDocs(),
            "field",
            prefixRef,
            Integer.MAX_VALUE,
            _TestUtil.nextInt(random(), 2, 10));

    final FieldCache.Ints docIDToID = FieldCache.DEFAULT.getInts(r, "id", false);
    /*
      for(int docID=0;docID<subR.maxDoc();docID++) {
      System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
      }
    */

    if (VERBOSE) {
      System.out.println(
          "TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString()));
      System.out.println("TEST: all TERMS:");
      TermsEnum allTE = MultiFields.getTerms(r, "field").iterator(null);
      int ord = 0;
      while (allTE.next() != null) {
        System.out.println("  ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
      }
    }

    // final TermsEnum te = subR.fields().terms("field").iterator();
    final TermsEnum te = dto.getOrdTermsEnum(r);
    if (dto.numTerms() == 0) {
      if (prefixRef == null) {
        assertNull(MultiFields.getTerms(r, "field"));
      } else {
        Terms terms = MultiFields.getTerms(r, "field");
        if (terms != null) {
          TermsEnum termsEnum = terms.iterator(null);
          TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef, false);
          if (result != TermsEnum.SeekStatus.END) {
            assertFalse(
                "term="
                    + termsEnum.term().utf8ToString()
                    + " matches prefix="
                    + prefixRef.utf8ToString(),
                StringHelper.startsWith(termsEnum.term(), prefixRef));
          } else {
            // ok
          }
        } else {
          // ok
        }
      }
      return;
    }

    if (VERBOSE) {
      System.out.println("TEST: TERMS:");
      te.seekExact(0);
      while (true) {
        System.out.println("  ord=" + te.ord() + " term=" + te.term().utf8ToString());
        if (te.next() == null) {
          break;
        }
      }
    }

    SortedSetDocValues iter = dto.iterator(r);
    for (int docID = 0; docID < r.maxDoc(); docID++) {
      if (VERBOSE) {
        System.out.println(
            "TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")");
      }
      iter.setDocument(docID);
      final int[] answers = idToOrds[docIDToID.get(docID)];
      int upto = 0;
      long ord;
      while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
        te.seekExact(ord);
        final BytesRef expected = termsArray[answers[upto++]];
        if (VERBOSE) {
          System.out.println(
              "  exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
        }
        assertEquals(
            "expected="
                + expected.utf8ToString()
                + " actual="
                + te.term().utf8ToString()
                + " ord="
                + ord,
            expected,
            te.term());
      }
      assertEquals(answers.length, upto);
    }
  }
コード例 #17
0
  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(TermsParams.TERMS, false)) return;

    String[] fields = params.getParams(TermsParams.TERMS_FIELD);

    NamedList<Object> termsResult = new SimpleOrderedMap<>();
    rb.rsp.add("terms", termsResult);

    if (fields == null || fields.length == 0) return;

    int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
    if (limit < 0) {
      limit = Integer.MAX_VALUE;
    }

    String lowerStr = params.get(TermsParams.TERMS_LOWER);
    String upperStr = params.get(TermsParams.TERMS_UPPER);
    boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
    boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
    boolean sort =
        !TermsParams.TERMS_SORT_INDEX.equals(
            params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
    int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1);
    int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT);
    if (freqmax < 0) {
      freqmax = Integer.MAX_VALUE;
    }
    String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
    String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
    Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;

    boolean raw = params.getBool(TermsParams.TERMS_RAW, false);

    final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader();
    Fields lfields = indexReader.fields();

    for (String field : fields) {
      NamedList<Integer> fieldTerms = new NamedList<>();
      termsResult.add(field, fieldTerms);

      Terms terms = lfields == null ? null : lfields.terms(field);
      if (terms == null) {
        // no terms for this field
        continue;
      }

      FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
      if (ft == null) ft = new StrField();

      // prefix must currently be text
      BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix);

      BytesRef upperBytes = null;
      if (upperStr != null) {
        upperBytes = new BytesRef();
        ft.readableToIndexed(upperStr, upperBytes);
      }

      BytesRef lowerBytes;
      if (lowerStr == null) {
        // If no lower bound was specified, use the prefix
        lowerBytes = prefixBytes;
      } else {
        lowerBytes = new BytesRef();
        if (raw) {
          // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists
          // perhaps we detect if the FieldType is non-character and expect hex if so?
          lowerBytes = new BytesRef(lowerStr);
        } else {
          lowerBytes = new BytesRef();
          ft.readableToIndexed(lowerStr, lowerBytes);
        }
      }

      TermsEnum termsEnum = terms.iterator(null);
      BytesRef term = null;

      if (lowerBytes != null) {
        if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) {
          termsEnum = null;
        } else {
          term = termsEnum.term();
          // Only advance the enum if we are excluding the lower bound and the lower Term actually
          // matches
          if (lowerIncl == false && term.equals(lowerBytes)) {
            term = termsEnum.next();
          }
        }
      } else {
        // position termsEnum on first term
        term = termsEnum.next();
      }

      int i = 0;
      BoundedTreeSet<CountPair<BytesRef, Integer>> queue =
          (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null);
      CharsRef external = new CharsRef();
      while (term != null && (i < limit || sort)) {
        boolean externalized = false; // did we fill in "external" yet for this term?

        // stop if the prefix doesn't match
        if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes)) break;

        if (pattern != null) {
          // indexed text or external text?
          // TODO: support "raw" mode?
          ft.indexedToReadable(term, external);
          externalized = true;
          if (!pattern.matcher(external).matches()) {
            term = termsEnum.next();
            continue;
          }
        }

        if (upperBytes != null) {
          int upperCmp = term.compareTo(upperBytes);
          // if we are past the upper term, or equal to it (when don't include upper) then stop.
          if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break;
        }

        // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
        int docFreq = termsEnum.docFreq();
        if (docFreq >= freqmin && docFreq <= freqmax) {
          // add the term to the list
          if (sort) {
            queue.add(new CountPair<>(BytesRef.deepCopyOf(term), docFreq));
          } else {

            // TODO: handle raw somehow
            if (!externalized) {
              ft.indexedToReadable(term, external);
            }
            fieldTerms.add(external.toString(), docFreq);
            i++;
          }
        }

        term = termsEnum.next();
      }

      if (sort) {
        for (CountPair<BytesRef, Integer> item : queue) {
          if (i >= limit) break;
          ft.indexedToReadable(item.key, external);
          fieldTerms.add(external.toString(), item.val);
          i++;
        }
      }
    }
  }
コード例 #18
0
  public void testNumericField() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    final int numDocs = atLeast(500);
    final Number[] answers = new Number[numDocs];
    final NumericType[] typeAnswers = new NumericType[numDocs];
    for (int id = 0; id < numDocs; id++) {
      Document doc = new Document();
      final Field nf;
      final Field sf;
      final Number answer;
      final NumericType typeAnswer;
      if (random().nextBoolean()) {
        // float/double
        if (random().nextBoolean()) {
          final float f = random().nextFloat();
          answer = Float.valueOf(f);
          nf = new FloatField("nf", f, Field.Store.NO);
          sf = new StoredField("nf", f);
          typeAnswer = NumericType.FLOAT;
        } else {
          final double d = random().nextDouble();
          answer = Double.valueOf(d);
          nf = new DoubleField("nf", d, Field.Store.NO);
          sf = new StoredField("nf", d);
          typeAnswer = NumericType.DOUBLE;
        }
      } else {
        // int/long
        if (random().nextBoolean()) {
          final int i = random().nextInt();
          answer = Integer.valueOf(i);
          nf = new IntField("nf", i, Field.Store.NO);
          sf = new StoredField("nf", i);
          typeAnswer = NumericType.INT;
        } else {
          final long l = random().nextLong();
          answer = Long.valueOf(l);
          nf = new LongField("nf", l, Field.Store.NO);
          sf = new StoredField("nf", l);
          typeAnswer = NumericType.LONG;
        }
      }
      doc.add(nf);
      doc.add(sf);
      answers[id] = answer;
      typeAnswers[id] = typeAnswer;
      FieldType ft = new FieldType(IntField.TYPE_STORED);
      ft.setNumericPrecisionStep(Integer.MAX_VALUE);
      doc.add(new IntField("id", id, ft));
      w.addDocument(doc);
    }
    final DirectoryReader r = w.getReader();
    w.close();

    assertEquals(numDocs, r.numDocs());

    for (AtomicReaderContext ctx : r.leaves()) {
      final AtomicReader sub = ctx.reader();
      final FieldCache.Ints ids = FieldCache.DEFAULT.getInts(sub, "id", false);
      for (int docID = 0; docID < sub.numDocs(); docID++) {
        final Document doc = sub.document(docID);
        final Field f = (Field) doc.getField("nf");
        assertTrue("got f=" + f, f instanceof StoredField);
        assertEquals(answers[ids.get(docID)], f.numericValue());
      }
    }
    r.close();
    dir.close();
  }
コード例 #19
0
  int resolveParentDocuments(
      TopDocs topDocs,
      SearchContext context,
      Recycler.V<ObjectObjectOpenHashMap<Object, ParentDoc[]>> parentDocs) {
    int parentHitsResolved = 0;
    Recycler.V<ObjectObjectOpenHashMap<Object, Recycler.V<IntObjectOpenHashMap<ParentDoc>>>>
        parentDocsPerReader =
            cacheRecycler.hashMap(context.searcher().getIndexReader().leaves().size());
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
      int readerIndex =
          ReaderUtil.subIndex(scoreDoc.doc, context.searcher().getIndexReader().leaves());
      AtomicReaderContext subContext =
          context.searcher().getIndexReader().leaves().get(readerIndex);
      int subDoc = scoreDoc.doc - subContext.docBase;

      // find the parent id
      HashedBytesArray parentId =
          context.idCache().reader(subContext.reader()).parentIdByDoc(parentType, subDoc);
      if (parentId == null) {
        // no parent found
        continue;
      }
      // now go over and find the parent doc Id and reader tuple
      for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) {
        AtomicReader indexReader = atomicReaderContext.reader();
        int parentDocId = context.idCache().reader(indexReader).docById(parentType, parentId);
        Bits liveDocs = indexReader.getLiveDocs();
        if (parentDocId != -1 && (liveDocs == null || liveDocs.get(parentDocId))) {
          // we found a match, add it and break

          Recycler.V<IntObjectOpenHashMap<ParentDoc>> readerParentDocs =
              parentDocsPerReader.v().get(indexReader.getCoreCacheKey());
          if (readerParentDocs == null) {
            readerParentDocs = cacheRecycler.intObjectMap(indexReader.maxDoc());
            parentDocsPerReader.v().put(indexReader.getCoreCacheKey(), readerParentDocs);
          }

          ParentDoc parentDoc = readerParentDocs.v().get(parentDocId);
          if (parentDoc == null) {
            parentHitsResolved++; // we have a hit on a parent
            parentDoc = new ParentDoc();
            parentDoc.docId = parentDocId;
            parentDoc.count = 1;
            parentDoc.maxScore = scoreDoc.score;
            parentDoc.sumScores = scoreDoc.score;
            readerParentDocs.v().put(parentDocId, parentDoc);
          } else {
            parentDoc.count++;
            parentDoc.sumScores += scoreDoc.score;
            if (scoreDoc.score > parentDoc.maxScore) {
              parentDoc.maxScore = scoreDoc.score;
            }
          }
        }
      }
    }
    boolean[] states = parentDocsPerReader.v().allocated;
    Object[] keys = parentDocsPerReader.v().keys;
    Object[] values = parentDocsPerReader.v().values;
    for (int i = 0; i < states.length; i++) {
      if (states[i]) {
        Recycler.V<IntObjectOpenHashMap<ParentDoc>> value =
            (Recycler.V<IntObjectOpenHashMap<ParentDoc>>) values[i];
        ParentDoc[] _parentDocs = value.v().values().toArray(ParentDoc.class);
        Arrays.sort(_parentDocs, PARENT_DOC_COMP);
        parentDocs.v().put(keys[i], _parentDocs);
        Releasables.release(value);
      }
    }
    Releasables.release(parentDocsPerReader);
    return parentHitsResolved;
  }
コード例 #20
0
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 10000 : 1000);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet();
    int childDocId = 0;
    int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000);
    ObjectObjectOpenHashMap<String, NavigableSet<String>> childValueToParentIds =
        new ObjectObjectOpenHashMap<String, NavigableSet<String>>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      boolean filterMe = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      if (filterMe) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("filter", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      int numChildDocs;
      if (rarely()) {
        numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25);
      } else {
        numChildDocs = random().nextInt(TEST_NIGHTLY ? 40 : 10);
      }
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableSet<String> parentIds;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIds = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(childValue, parentIds = new TreeSet<String>());
          }
          if (!markParentAsDeleted && !filterMe) {
            parentIds.add(parent);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));

    indexWriter.commit();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.SimpleSearcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    Filter rawParentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent"));
    Filter rawFilterMe = new NotFilter(new TermFilter(new Term("filter", "me")));
    int max = numUniqueChildValues / 4;
    for (int i = 0; i < max; i++) {
      // Randomly pick a cached version: there is specific logic inside ChildrenQuery that deals
      // with the fact
      // that deletes are applied at the top level when filters are cached.
      Filter parentFilter;
      if (random().nextBoolean()) {
        parentFilter = SearchContext.current().filterCache().cache(rawParentFilter);
      } else {
        parentFilter = rawParentFilter;
      }

      // Using this in FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer
      // not get live docs as acceptedDocs
      Filter filterMe;
      if (random().nextBoolean()) {
        filterMe = SearchContext.current().filterCache().cache(rawFilterMe);
      } else {
        filterMe = rawFilterMe;
      }

      // Simulate a parent update
      if (random().nextBoolean()) {
        int numberOfUpdates = 1 + random().nextInt(TEST_NIGHTLY ? 25 : 5);
        for (int j = 0; j < numberOfUpdates; j++) {
          int parentId;
          do {
            parentId = random().nextInt(numParentDocs);
          } while (filteredOrDeletedDocs.contains(parentId));

          String parentUid = Uid.createUid("parent", Integer.toString(parentId));
          indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid));

          Document document = new Document();
          document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES));
          document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
          indexWriter.addDocument(document);
        }

        indexReader.close();
        indexReader = DirectoryReader.open(indexWriter.w, true);
        searcher = new IndexSearcher(indexReader);
        engineSearcher =
            new Engine.SimpleSearcher(
                ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
        ((TestSearchContext) SearchContext.current())
            .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));
      }

      String childValue = childValues[random().nextInt(numUniqueChildValues)];
      TermQuery childQuery = new TermQuery(new Term("field1", childValue));
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      Filter nonNestedDocsFilter = random().nextBoolean() ? NonNestedDocsFilter.INSTANCE : null;
      Query query;
      if (random().nextBoolean()) {
        // Usage in HasChildQueryParser
        query =
            new ChildrenConstantScoreQuery(
                childQuery,
                "parent",
                "child",
                parentFilter,
                shortCircuitParentDocSet,
                nonNestedDocsFilter);
      } else {
        // Usage in HasChildFilterParser
        query =
            new XConstantScoreQuery(
                new CustomQueryWrappingFilter(
                    new ChildrenConstantScoreQuery(
                        childQuery,
                        "parent",
                        "child",
                        parentFilter,
                        shortCircuitParentDocSet,
                        nonNestedDocsFilter)));
      }
      query = new XFilteredQuery(query, filterMe);
      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      searcher.search(query, collector);
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      if (childValueToParentIds.containsKey(childValue)) {
        AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader);
        Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableSet<String> parentIds = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          DocsEnum docsEnum = null;
          for (String id : parentIds) {
            TermsEnum.SeekStatus seekStatus =
                termsEnum.seekCeil(Uid.createUidAsBytes("parent", id));
            if (seekStatus == TermsEnum.SeekStatus.FOUND) {
              docsEnum =
                  termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
              expectedResult.set(docsEnum.nextDoc());
            } else if (seekStatus == TermsEnum.SeekStatus.END) {
              break;
            }
          }
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
    }

    indexWriter.close();
    indexReader.close();
    directory.close();
  }
コード例 #21
0
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 6000 : 600);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    int childDocId = 0;
    int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000);
    ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds =
        new ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      int numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25);
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIdToChildScores = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(
                childValue, parentIdToChildScores = new TreeMap<String, FloatArrayList>());
          }
          if (!markParentAsDeleted) {
            FloatArrayList childScores = parentIdToChildScores.get(parent);
            if (childScores == null) {
              parentIdToChildScores.put(parent, childScores = new FloatArrayList());
            }
            childScores.add(1f);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));

    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.SimpleSearcher(ChildrenQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    TermFilter parentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent"));
    for (String childValue : childValues) {
      Query childQuery = new ConstantScoreQuery(new TermQuery(new Term("field1", childValue)));
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)];
      Query query =
          new ChildrenQuery(
              "parent", "child", parentFilter, childQuery, scoreType, shortCircuitParentDocSet);
      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      int numHits = 1 + random().nextInt(25);
      TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits, false);
      searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector));
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      MockScorer mockScorer = new MockScorer(scoreType);
      TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits, false);
      expectedTopDocsCollector.setScorer(mockScorer);
      if (childValueToParentIds.containsKey(childValue)) {
        AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader);
        Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          DocsEnum docsEnum = null;
          for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) {
            TermsEnum.SeekStatus seekStatus =
                termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey()));
            if (seekStatus == TermsEnum.SeekStatus.FOUND) {
              docsEnum =
                  termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
              expectedResult.set(docsEnum.nextDoc());
              mockScorer.scores = entry.getValue();
              expectedTopDocsCollector.collect(docsEnum.docID());
            } else if (seekStatus == TermsEnum.SeekStatus.END) {
              break;
            }
          }
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
      assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs());
    }

    indexReader.close();
    directory.close();
  }