Пример #1
0
  public void testReuseDocsEnumNoReuse() throws IOException {
    Directory dir = newDirectory();
    Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
    RandomIndexWriter writer =
        new RandomIndexWriter(
            random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setCodec(cp));
    int numdocs = atLeast(20);
    createRandomIndex(numdocs, writer, random());
    writer.commit();

    DirectoryReader open = DirectoryReader.open(dir);
    for (LeafReaderContext ctx : open.leaves()) {
      LeafReader indexReader = ctx.reader();
      Terms terms = indexReader.terms("body");
      TermsEnum iterator = terms.iterator();
      IdentityHashMap<PostingsEnum, Boolean> enums = new IdentityHashMap<>();
      MatchNoBits bits = new Bits.MatchNoBits(indexReader.maxDoc());
      while ((iterator.next()) != null) {
        PostingsEnum docs =
            iterator.postings(
                random().nextBoolean() ? bits : new Bits.MatchNoBits(indexReader.maxDoc()),
                null,
                random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE);
        enums.put(docs, true);
      }

      assertEquals(terms.size(), enums.size());
    }
    writer.commit();
    IOUtils.close(writer, open, dir);
  }
  public synchronized ShapeFieldCache<T> getCache(LeafReader reader) throws IOException {
    ShapeFieldCache<T> idx = sidx.get(reader);
    if (idx != null) {
      return idx;
    }
    long startTime = System.currentTimeMillis();

    log.fine("Building Cache [" + reader.maxDoc() + "]");
    idx = new ShapeFieldCache<>(reader.maxDoc(), defaultSize);
    int count = 0;
    DocsEnum docs = null;
    Terms terms = reader.terms(shapeField);
    TermsEnum te = null;
    if (terms != null) {
      te = terms.iterator(te);
      BytesRef term = te.next();
      while (term != null) {
        T shape = readShape(term);
        if (shape != null) {
          docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
          Integer docid = docs.nextDoc();
          while (docid != DocIdSetIterator.NO_MORE_DOCS) {
            idx.add(docid, shape);
            docid = docs.nextDoc();
            count++;
          }
        }
        term = te.next();
      }
    }
    sidx.put(reader, idx);
    long elapsed = System.currentTimeMillis() - startTime;
    log.fine("Cached: [" + count + " in " + elapsed + "ms] " + idx);
    return idx;
  }
Пример #3
0
  public TermInfo collect(String term) throws IOException {
    TermInfo info = new TermInfo();
    BytesRef luceneTerm = new BytesRef(term.getBytes());
    // this gives documents in which the term is found, but no offset information can be retrieved
    PostingsEnum postings =
        MultiFields.getTermDocsEnum(indexReader, ngramInfoFieldname, luceneTerm);
    // now go through each document
    int docId = postings.nextDoc();
    while (docId != PostingsEnum.NO_MORE_DOCS) {
      // get the term vector for that document.
      TermsEnum it = indexReader.getTermVector(docId, ngramInfoFieldname).iterator();
      // find the term of interest
      it.seekExact(luceneTerm);
      // get its posting info. this will contain offset info
      PostingsEnum postingsInDoc = it.postings(null, PostingsEnum.OFFSETS);
      postingsInDoc.nextDoc();

      Document doc = indexReader.document(docId);
      String id = doc.get(idFieldname);
      JATEDocument jd = new JATEDocument(id);
      Set<int[]> offsets = new HashSet<>();
      int totalFreq = postingsInDoc.freq();
      for (int i = 0; i < totalFreq; i++) {
        postingsInDoc.nextPosition();
        offsets.add(new int[] {postingsInDoc.startOffset(), postingsInDoc.endOffset()});
      }
      info.getOffsets().put(jd, offsets);

      docId = postings.nextDoc();
    }

    return info;
  }
  public void testNormsWithDocValues() throws Exception {
    MemoryIndex mi = new MemoryIndex(true, true);
    MockAnalyzer mockAnalyzer = new MockAnalyzer(random());

    mi.addField(
        new BinaryDocValuesField("text", new BytesRef("quick brown fox")), mockAnalyzer, 5f);
    mi.addField(new TextField("text", "quick brown fox", Field.Store.NO), mockAnalyzer, 5f);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();

    Document doc = new Document();
    doc.add(new BinaryDocValuesField("text", new BytesRef("quick brown fox")));
    Field field = new TextField("text", "quick brown fox", Field.Store.NO);
    field.setBoost(5f);
    doc.add(field);
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer));
    writer.addDocument(doc);
    writer.close();

    IndexReader controlIndexReader = DirectoryReader.open(dir);
    LeafReader controlLeafReader = controlIndexReader.leaves().get(0).reader();

    assertEquals(
        controlLeafReader.getNormValues("text").get(0), leafReader.getNormValues("text").get(0));

    controlIndexReader.close();
    dir.close();
  }
Пример #5
0
  /**
   * Returns a mapping from the old document ID to its new location in the sorted index.
   * Implementations can use the auxiliary {@link #sort(int, DocComparator)} to compute the
   * old-to-new permutation given a list of documents and their corresponding values.
   *
   * <p>A return value of <tt>null</tt> is allowed and means that <code>reader</code> is already
   * sorted.
   *
   * <p><b>NOTE:</b> deleted documents are expected to appear in the mapping as well, they will
   * however be marked as deleted in the sorted view.
   */
  DocMap sort(LeafReader reader) throws IOException {
    SortField fields[] = sort.getSort();
    final int reverseMul[] = new int[fields.length];
    final LeafFieldComparator comparators[] = new LeafFieldComparator[fields.length];

    for (int i = 0; i < fields.length; i++) {
      reverseMul[i] = fields[i].getReverse() ? -1 : 1;
      comparators[i] = fields[i].getComparator(1, i).getLeafComparator(reader.getContext());
      comparators[i].setScorer(FAKESCORER);
    }
    final DocComparator comparator =
        new DocComparator() {
          @Override
          public int compare(int docID1, int docID2) {
            try {
              for (int i = 0; i < comparators.length; i++) {
                // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
                // the segments are always the same here...
                comparators[i].copy(0, docID1);
                comparators[i].setBottom(0);
                int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
                if (comp != 0) {
                  return comp;
                }
              }
              return Integer.compare(docID1, docID2); // docid order tiebreak
            } catch (IOException e) {
              throw new RuntimeException(e);
            }
          }
        };
    return sort(reader.maxDoc(), comparator);
  }
  public void testDocsAndPositionsEnumStart() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    int numIters = atLeast(3);
    MemoryIndex memory = new MemoryIndex(true, false, random().nextInt(50) * 1024 * 1024);
    for (int i = 0; i < numIters; i++) { // check reuse
      memory.addField("foo", "bar", analyzer);
      LeafReader reader = (LeafReader) memory.createSearcher().getIndexReader();
      TestUtil.checkReader(reader);
      assertEquals(1, reader.terms("foo").getSumTotalTermFreq());
      PostingsEnum disi = reader.postings(new Term("foo", "bar"), PostingsEnum.ALL);
      int docid = disi.docID();
      assertEquals(-1, docid);
      assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      assertEquals(0, disi.nextPosition());
      assertEquals(0, disi.startOffset());
      assertEquals(3, disi.endOffset());

      // now reuse and check again
      TermsEnum te = reader.terms("foo").iterator();
      assertTrue(te.seekExact(new BytesRef("bar")));
      disi = te.postings(disi);
      docid = disi.docID();
      assertEquals(-1, docid);
      assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      reader.close();
      memory.reset();
    }
  }
Пример #7
0
  public void testNumerics() throws Exception {
    Directory dir = newDirectory();
    Document doc = new Document();
    Field field = new NumericDocValuesField("numbers", 0);
    doc.add(field);

    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
      field.setLongValue(random().nextLong());
      iw.addDocument(doc);
      if (random().nextInt(17) == 0) {
        iw.commit();
      }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();

    NumericDocValues multi = MultiDocValues.getNumericValues(ir, "numbers");
    NumericDocValues single = merged.getNumericDocValues("numbers");
    for (int i = 0; i < numDocs; i++) {
      assertEquals(single.get(i), multi.get(i));
    }
    ir.close();
    ir2.close();
    dir.close();
  }
    Query createCandidateQuery(IndexReader indexReader) throws IOException {
      List<Term> extractedTerms = new ArrayList<>();
      // include extractionResultField:failed, because docs with this term have no
      // extractedTermsField
      // and otherwise we would fail to return these docs. Docs that failed query term extraction
      // always need to be verified by MemoryIndex:
      extractedTerms.add(new Term(extractionResultField.name(), EXTRACTION_FAILED));

      LeafReader reader = indexReader.leaves().get(0).reader();
      Fields fields = reader.fields();
      for (String field : fields) {
        Terms terms = fields.terms(field);
        if (terms == null) {
          continue;
        }

        BytesRef fieldBr = new BytesRef(field);
        TermsEnum tenum = terms.iterator();
        for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
          BytesRefBuilder builder = new BytesRefBuilder();
          builder.append(fieldBr);
          builder.append(FIELD_VALUE_SEPARATOR);
          builder.append(term);
          extractedTerms.add(new Term(queryTermsField.name(), builder.toBytesRef()));
        }
      }
      return new TermsQuery(extractedTerms);
    }
Пример #9
0
 protected LeafReaderContext refreshReader() throws Exception {
   if (readerContext != null) {
     readerContext.reader().close();
   }
   LeafReader reader =
       SlowCompositeReaderWrapper.wrap(topLevelReader = DirectoryReader.open(writer, true));
   readerContext = reader.getContext();
   return readerContext;
 }
 @Override
 public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
   LeafReader reader = context.reader();
   FieldInfo info = reader.getFieldInfos().fieldInfo(field);
   if (info != null) {
     Geo3DDocValuesField.checkCompatible(info);
   }
   currentDocs = DocValues.getSortedNumeric(reader, field);
   return this;
 }
Пример #11
0
 private void tstFilterCard(String mes, int expected, Filter filt) throws Exception {
   final DocIdSet docIdSet = filt.getDocIdSet(reader.getContext(), reader.getLiveDocs());
   int actual = 0;
   if (docIdSet != null) {
     DocIdSetIterator disi = docIdSet.iterator();
     while (disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
       actual++;
     }
   }
   assertEquals(mes, expected, actual);
 }
Пример #12
0
  public void testChangeGaps() throws Exception {
    // LUCENE-5324: check that it is possible to change the wrapper's gaps
    final int positionGap = random().nextInt(1000);
    final int offsetGap = random().nextInt(1000);
    final Analyzer delegate = new MockAnalyzer(random());
    final Analyzer a =
        new DelegatingAnalyzerWrapper(delegate.getReuseStrategy()) {
          @Override
          protected Analyzer getWrappedAnalyzer(String fieldName) {
            return delegate;
          }

          @Override
          public int getPositionIncrementGap(String fieldName) {
            return positionGap;
          }

          @Override
          public int getOffsetGap(String fieldName) {
            return offsetGap;
          }
        };

    final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory(), a);
    final Document doc = new Document();
    final FieldType ft = new FieldType();
    ft.setIndexOptions(IndexOptions.DOCS);
    ft.setTokenized(true);
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorPositions(true);
    ft.setStoreTermVectorOffsets(true);
    doc.add(new Field("f", "a", ft));
    doc.add(new Field("f", "a", ft));
    writer.addDocument(doc);
    final LeafReader reader = getOnlySegmentReader(writer.getReader());
    final Fields fields = reader.getTermVectors(0);
    final Terms terms = fields.terms("f");
    final TermsEnum te = terms.iterator();
    assertEquals(new BytesRef("a"), te.next());
    final PostingsEnum dpe = te.postings(null, PostingsEnum.ALL);
    assertEquals(0, dpe.nextDoc());
    assertEquals(2, dpe.freq());
    assertEquals(0, dpe.nextPosition());
    assertEquals(0, dpe.startOffset());
    final int endOffset = dpe.endOffset();
    assertEquals(1 + positionGap, dpe.nextPosition());
    assertEquals(1 + endOffset + offsetGap, dpe.endOffset());
    assertEquals(null, te.next());
    reader.close();
    writer.close();
    writer.w.getDirectory().close();
  }
Пример #13
0
  public void testSortedNumeric() throws Exception {
    Directory dir = newDirectory();

    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      int numValues = random().nextInt(5);
      for (int j = 0; j < numValues; j++) {
        doc.add(
            new SortedNumericDocValuesField(
                "nums", TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE)));
      }
      iw.addDocument(doc);
      if (random().nextInt(17) == 0) {
        iw.commit();
      }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();

    SortedNumericDocValues multi = MultiDocValues.getSortedNumericValues(ir, "nums");
    SortedNumericDocValues single = merged.getSortedNumericDocValues("nums");
    if (multi == null) {
      assertNull(single);
    } else {
      // check values
      for (int i = 0; i < numDocs; i++) {
        single.setDocument(i);
        ArrayList<Long> expectedList = new ArrayList<>();
        for (int j = 0; j < single.count(); j++) {
          expectedList.add(single.valueAt(j));
        }

        multi.setDocument(i);
        assertEquals(expectedList.size(), multi.count());
        for (int j = 0; j < single.count(); j++) {
          assertEquals(expectedList.get(j).longValue(), multi.valueAt(j));
        }
      }
    }

    ir.close();
    ir2.close();
    dir.close();
  }
Пример #14
0
 InsaneReader(LeafReader in, String insaneField) {
   super(in);
   this.insaneField = insaneField;
   ArrayList<FieldInfo> filteredInfos = new ArrayList<>();
   for (FieldInfo fi : in.getFieldInfos()) {
     if (fi.name.equals(insaneField)) {
       filteredInfos.add(
           new FieldInfo(
               fi.name,
               fi.number,
               fi.hasVectors(),
               fi.omitsNorms(),
               fi.hasPayloads(),
               fi.getIndexOptions(),
               DocValuesType.NONE,
               -1,
               Collections.emptyMap(),
               fi.getPointDimensionCount(),
               fi.getPointNumBytes()));
     } else {
       filteredInfos.add(fi);
     }
   }
   fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()]));
 }
 private FieldLookup loadFieldData(String name) {
   FieldLookup data = cachedFieldData.get(name);
   if (data == null) {
     MappedFieldType fieldType = mapperService.smartNameFieldType(name, types);
     if (fieldType == null) {
       throw new IllegalArgumentException(
           "No field found for ["
               + name
               + "] in mapping with types "
               + Arrays.toString(types)
               + "");
     }
     data = new FieldLookup(fieldType);
     cachedFieldData.put(name, data);
   }
   if (data.fields() == null) {
     String fieldName = data.fieldType().names().indexName();
     fieldVisitor.reset(fieldName);
     try {
       reader.document(docId, fieldVisitor);
       fieldVisitor.postProcess(data.fieldType());
       data.fields(
           ImmutableMap.of(name, fieldVisitor.fields().get(data.fieldType().names().indexName())));
     } catch (IOException e) {
       throw new ElasticsearchParseException("failed to load field [{}]", e, name);
     }
   }
   return data;
 }
Пример #16
0
 /**
  * Find terms in the index based on a prefix. Useful for autocomplete.
  *
  * @param index the index
  * @param fieldName the field
  * @param prefix the prefix we're looking for (null or empty string for all terms)
  * @param sensitive match case-sensitively or not?
  * @param maxResults max. number of results to return (or -1 for all)
  * @return the matching terms
  */
 public static List<String> findTermsByPrefix(
     LeafReader index, String fieldName, String prefix, boolean sensitive, int maxResults) {
   boolean allTerms = prefix == null || prefix.length() == 0;
   if (allTerms) {
     prefix = "";
     sensitive = true; // don't do unnecessary work in this case
   }
   try {
     if (!sensitive) prefix = StringUtil.removeAccents(prefix).toLowerCase();
     org.apache.lucene.index.Terms terms = index.terms(fieldName);
     List<String> results = new ArrayList<>();
     TermsEnum termsEnum = terms.iterator();
     BytesRef brPrefix = new BytesRef(prefix.getBytes(LUCENE_DEFAULT_CHARSET));
     termsEnum.seekCeil(brPrefix); // find the prefix in the terms list
     while (maxResults < 0 || results.size() < maxResults) {
       BytesRef term = termsEnum.next();
       if (term == null) break;
       String termText = term.utf8ToString();
       String optDesensitized = termText;
       if (!sensitive) optDesensitized = StringUtil.removeAccents(termText).toLowerCase();
       if (!allTerms && !optDesensitized.substring(0, prefix.length()).equalsIgnoreCase(prefix)) {
         // Doesn't match prefix or different field; no more matches
         break;
       }
       // Match, add term
       results.add(termText);
     }
     return results;
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }
Пример #17
0
  public void testDocsWithField() throws Exception {
    Directory dir = newDirectory();

    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      if (random().nextInt(4) >= 0) {
        doc.add(new NumericDocValuesField("numbers", random().nextLong()));
      }
      doc.add(new NumericDocValuesField("numbersAlways", random().nextLong()));
      iw.addDocument(doc);
      if (random().nextInt(17) == 0) {
        iw.commit();
      }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();

    Bits multi = MultiDocValues.getDocsWithField(ir, "numbers");
    Bits single = merged.getDocsWithField("numbers");
    if (multi == null) {
      assertNull(single);
    } else {
      assertEquals(single.length(), multi.length());
      for (int i = 0; i < numDocs; i++) {
        assertEquals(single.get(i), multi.get(i));
      }
    }

    multi = MultiDocValues.getDocsWithField(ir, "numbersAlways");
    single = merged.getDocsWithField("numbersAlways");
    assertEquals(single.length(), multi.length());
    for (int i = 0; i < numDocs; i++) {
      assertEquals(single.get(i), multi.get(i));
    }
    ir.close();
    ir2.close();
    dir.close();
  }
 private Set<Uid> getShardDocUIDs(final IndexShard shard) throws IOException {
   shard.refresh("get_uids");
   try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
     Set<Uid> ids = new HashSet<>();
     for (LeafReaderContext leafContext : searcher.reader().leaves()) {
       LeafReader reader = leafContext.reader();
       Bits liveDocs = reader.getLiveDocs();
       for (int i = 0; i < reader.maxDoc(); i++) {
         if (liveDocs == null || liveDocs.get(i)) {
           Document uuid = reader.document(i, Collections.singleton(UidFieldMapper.NAME));
           ids.add(Uid.createUid(uuid.get(UidFieldMapper.NAME)));
         }
       }
     }
     return ids;
   }
 }
Пример #19
0
 public PostingsEnum randomDocsEnum(
     String field, BytesRef term, List<LeafReaderContext> readers, Bits bits) throws IOException {
   if (random().nextInt(10) == 0) {
     return null;
   }
   LeafReader indexReader = readers.get(random().nextInt(readers.size())).reader();
   Terms terms = indexReader.terms(field);
   if (terms == null) {
     return null;
   }
   TermsEnum iterator = terms.iterator();
   if (iterator.seekExact(term)) {
     return iterator.postings(
         bits, null, random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE);
   }
   return null;
 }
Пример #20
0
  public void testBasics() throws Exception {
    // sanity check of norms writer
    // TODO: generalize
    LeafReader slow = SlowCompositeReaderWrapper.wrap(reader);
    NumericDocValues fooNorms = slow.getNormValues("foo");
    NumericDocValues barNorms = slow.getNormValues("bar");
    for (int i = 0; i < slow.maxDoc(); i++) {
      assertFalse(fooNorms.get(i) == barNorms.get(i));
    }

    // sanity check of searching
    TopDocs foodocs = searcher.search(new TermQuery(new Term("foo", "brown")), 10);
    assertTrue(foodocs.totalHits > 0);
    TopDocs bardocs = searcher.search(new TermQuery(new Term("bar", "brown")), 10);
    assertTrue(bardocs.totalHits > 0);
    assertTrue(foodocs.scoreDocs[0].score < bardocs.scoreDocs[0].score);
  }
Пример #21
0
  public void testDocValues() throws IOException {
    assertU(adoc("id", "1"));
    assertU(commit());
    try (SolrCore core = h.getCoreInc()) {
      final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
      final SolrIndexSearcher searcher = searcherRef.get();
      try {
        final LeafReader reader = searcher.getLeafReader();
        assertEquals(1, reader.numDocs());
        final FieldInfos infos = reader.getFieldInfos();
        assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("floatdv").getDocValuesType());
        assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("intdv").getDocValuesType());
        assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("doubledv").getDocValuesType());
        assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("longdv").getDocValuesType());
        assertEquals(DocValuesType.SORTED, infos.fieldInfo("stringdv").getDocValuesType());

        assertEquals((long) Float.floatToIntBits(1), reader.getNumericDocValues("floatdv").get(0));
        assertEquals(2L, reader.getNumericDocValues("intdv").get(0));
        assertEquals(Double.doubleToLongBits(3), reader.getNumericDocValues("doubledv").get(0));
        assertEquals(4L, reader.getNumericDocValues("longdv").get(0));

        final IndexSchema schema = core.getLatestSchema();
        final SchemaField floatDv = schema.getField("floatdv");
        final SchemaField intDv = schema.getField("intdv");
        final SchemaField doubleDv = schema.getField("doubledv");
        final SchemaField longDv = schema.getField("longdv");

        FunctionValues values =
            floatDv
                .getType()
                .getValueSource(floatDv, null)
                .getValues(null, searcher.getLeafReader().leaves().get(0));
        assertEquals(1f, values.floatVal(0), 0f);
        assertEquals(1f, values.objectVal(0));
        values =
            intDv
                .getType()
                .getValueSource(intDv, null)
                .getValues(null, searcher.getLeafReader().leaves().get(0));
        assertEquals(2, values.intVal(0));
        assertEquals(2, values.objectVal(0));
        values =
            doubleDv
                .getType()
                .getValueSource(doubleDv, null)
                .getValues(null, searcher.getLeafReader().leaves().get(0));
        assertEquals(3d, values.doubleVal(0), 0d);
        assertEquals(3d, values.objectVal(0));
        values =
            longDv
                .getType()
                .getValueSource(longDv, null)
                .getValues(null, searcher.getLeafReader().leaves().get(0));
        assertEquals(4L, values.longVal(0));
        assertEquals(4L, values.objectVal(0));
      } finally {
        searcherRef.decref();
      }
    }
  }
Пример #22
0
  public void testSorted() throws Exception {
    Directory dir = newDirectory();
    Document doc = new Document();
    Field field = new SortedDocValuesField("bytes", new BytesRef());
    doc.add(field);

    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
      BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random()));
      field.setBytesValue(ref);
      if (random().nextInt(7) == 0) {
        iw.addDocument(new Document());
      }
      iw.addDocument(doc);
      if (random().nextInt(17) == 0) {
        iw.commit();
      }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();

    SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes");
    SortedDocValues single = merged.getSortedDocValues("bytes");
    assertEquals(single.getValueCount(), multi.getValueCount());
    for (int i = 0; i < numDocs; i++) {
      // check ord
      assertEquals(single.getOrd(i), multi.getOrd(i));
      // check value
      final BytesRef expected = BytesRef.deepCopyOf(single.get(i));
      final BytesRef actual = multi.get(i);
      assertEquals(expected, actual);
    }
    ir.close();
    ir2.close();
    dir.close();
  }
    @Override
    public FunctionValues getValues(Map context, LeafReaderContext readerContext)
        throws IOException {
      final LeafReader reader = readerContext.reader();
      final BinaryDocValues docValues = reader.getBinaryDocValues(fieldName);
      if (docValues == null) return null;

      return new DoubleDocValues(this) {
        public BytesRef scratch = null;

        // TODO why is the value being requested twice per doc?!
        int lastDoc = -1;
        double lastVal;

        @Override
        public double doubleVal(int doc) {
          if (doc == lastDoc) return lastVal;

          // sanity check; shouldn't be necessary
          if (doc < 0 || doc >= reader.maxDoc())
            throw new IllegalStateException("Bad doc " + doc + " for reader " + reader);

          BytesRef bytes = null;
          try { // shouldn't be necessary
            scratch = docValues.get(doc);
            bytes = scratch;
          } catch (ArrayIndexOutOfBoundsException e) {
            if (log.isErrorEnabled())
              log.error(
                  "DocValues index corruption for docid "
                      + doc
                      + " reader "
                      + reader); // don't log 'e'
          }
          if (bytes != null) lastVal = MultiPointEncoding.calcDistance(point, bytes, ctx);
          else lastVal = 1; // 1 degree away, 111.2km

          lastDoc = doc;
          return lastVal;
        }
      };
    }
 public void testSameFieldAddedMultipleTimes() throws IOException {
   MemoryIndex mindex = randomMemoryIndex();
   MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
   mindex.addField("field", "the quick brown fox", mockAnalyzer);
   mindex.addField("field", "jumps over the", mockAnalyzer);
   LeafReader reader = (LeafReader) mindex.createSearcher().getIndexReader();
   TestUtil.checkReader(reader);
   assertEquals(7, reader.terms("field").getSumTotalTermFreq());
   PhraseQuery query = new PhraseQuery("field", "fox", "jumps");
   assertTrue(mindex.search(query) > 0.1);
   mindex.reset();
   mockAnalyzer.setPositionIncrementGap(1 + random().nextInt(10));
   mindex.addField("field", "the quick brown fox", mockAnalyzer);
   mindex.addField("field", "jumps over the", mockAnalyzer);
   assertEquals(0, mindex.search(query), 0.00001f);
   query = new PhraseQuery(10, "field", "fox", "jumps");
   assertTrue(
       "posGap" + mockAnalyzer.getPositionIncrementGap("field"), mindex.search(query) > 0.0001);
   TestUtil.checkReader(mindex.createSearcher().getIndexReader());
 }
 public void testRefCounts2() throws IOException {
   Directory dir1 = getDir1(random());
   Directory dir2 = getDir2(random());
   LeafReader ir1 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir1));
   LeafReader ir2 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir2));
   // don't close subreaders, so ParallelReader will increment refcounts
   ParallelLeafReader pr = new ParallelLeafReader(false, ir1, ir2);
   // check RefCounts
   assertEquals(2, ir1.getRefCount());
   assertEquals(2, ir2.getRefCount());
   pr.close();
   assertEquals(1, ir1.getRefCount());
   assertEquals(1, ir2.getRefCount());
   ir1.close();
   ir2.close();
   assertEquals(0, ir1.getRefCount());
   assertEquals(0, ir2.getRefCount());
   dir1.close();
   dir2.close();
 }
  /**
   * Returns total in-heap bytes used by all suggesters. This method has CPU cost <code>
   * O(numIndexedFields)</code>.
   *
   * @param fieldNamePatterns if non-null, any completion field name matching any of these patterns
   *     will break out its in-heap bytes separately in the returned {@link CompletionStats}
   */
  public CompletionStats completionStats(IndexReader indexReader, String... fieldNamePatterns) {
    CompletionStats completionStats = new CompletionStats();
    for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
      LeafReader atomicReader = atomicReaderContext.reader();
      try {
        Fields fields = atomicReader.fields();
        for (String fieldName : fields) {
          Terms terms = fields.terms(fieldName);
          if (terms instanceof CompletionTerms) {
            CompletionTerms completionTerms = (CompletionTerms) terms;
            completionStats.add(completionTerms.stats(fieldNamePatterns));
          }
        }
      } catch (IOException ioe) {
        logger.error("Could not get completion stats", ioe);
      }
    }

    return completionStats;
  }
  public void testCloseInnerReader() throws Exception {
    Directory dir1 = getDir1(random());
    LeafReader ir1 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir1));

    // with overlapping
    ParallelLeafReader pr =
        new ParallelLeafReader(true, new LeafReader[] {ir1}, new LeafReader[] {ir1});

    ir1.close();

    try {
      pr.document(0);
      fail("ParallelLeafReader should be already closed because inner reader was closed!");
    } catch (AlreadyClosedException e) {
      // pass
    }

    // noop:
    pr.close();
    dir1.close();
  }
Пример #28
0
  public AssertingLeafReader(LeafReader in) {
    super(in);
    // check some basic reader sanity
    assert in.maxDoc() >= 0;
    assert in.numDocs() <= in.maxDoc();
    assert in.numDeletedDocs() + in.numDocs() == in.maxDoc();
    assert !in.hasDeletions() || in.numDeletedDocs() > 0 && in.numDocs() < in.maxDoc();

    addCoreClosedListener(
        new CoreClosedListener() {
          @Override
          public void onClose(Object ownerCoreCacheKey) throws IOException {
            final Object expectedKey = getCoreCacheKey();
            assert expectedKey == ownerCoreCacheKey
                : "Core closed listener called on a different key "
                    + expectedKey
                    + " <> "
                    + ownerCoreCacheKey;
          }
        });
  }
Пример #29
0
  public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException {
    DirectoryReader reader = searcher.getRawReader(); // raw reader to avoid extra wrapping overhead
    int maxDoc = searcher.getIndexReader().maxDoc();
    int smallSetSize = smallSetSize(maxDoc);

    String field = term.field();
    BytesRef termVal = term.bytes();

    int maxCount = 0;
    int firstReader = -1;
    List<LeafReaderContext> leaves = reader.leaves();
    PostingsEnum[] postList =
        new PostingsEnum
            [leaves
                .size()]; // use array for slightly higher scanning cost, but fewer memory
                          // allocations
    for (LeafReaderContext ctx : leaves) {
      assert leaves.get(ctx.ord) == ctx;
      LeafReader r = ctx.reader();
      Fields f = r.fields();
      Terms t = f.terms(field);
      if (t == null) continue; // field is missing
      TermsEnum te = t.iterator();
      if (te.seekExact(termVal)) {
        maxCount += te.docFreq();
        postList[ctx.ord] = te.postings(null, PostingsEnum.NONE);
        if (firstReader < 0) firstReader = ctx.ord;
      }
    }

    if (maxCount == 0) {
      return DocSet.EMPTY;
    }

    if (maxCount <= smallSetSize) {
      return createSmallSet(leaves, postList, maxCount, firstReader);
    }

    return createBigSet(leaves, postList, maxDoc, firstReader);
  }
  public void testDocsEnumStart() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    MemoryIndex memory =
        new MemoryIndex(random().nextBoolean(), false, random().nextInt(50) * 1024 * 1024);
    memory.addField("foo", "bar", analyzer);
    LeafReader reader = (LeafReader) memory.createSearcher().getIndexReader();
    TestUtil.checkReader(reader);
    PostingsEnum disi =
        TestUtil.docs(random(), reader, "foo", new BytesRef("bar"), null, PostingsEnum.NONE);
    int docid = disi.docID();
    assertEquals(-1, docid);
    assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

    // now reuse and check again
    TermsEnum te = reader.terms("foo").iterator();
    assertTrue(te.seekExact(new BytesRef("bar")));
    disi = te.postings(disi, PostingsEnum.NONE);
    docid = disi.docID();
    assertEquals(-1, docid);
    assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    reader.close();
  }