public void test() throws Exception {
    BaseDirectoryWrapper d = newDirectory();
    d.setCheckIndexOnClose(false);
    // we nuke files, but verify the reader still works
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    int numDocs = atLeast(100);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      doc.add(newField("foo", "bar", TextField.TYPE_NOT_STORED));
      w.addDocument(doc);
    }

    IndexReader r = w.getReader();
    w.commit();
    w.close();

    for (String fileName : d.listAll()) {
      try {
        d.deleteFile(fileName);
        // may succeed, e.g. if the file is completely read into RAM.
      } catch (IOException ioe) {
        // ignore: this means codec (correctly) is holding
        // the file open
      }
    }

    for (LeafReaderContext cxt : r.leaves()) {
      TestUtil.checkReader(cxt.reader());
    }

    r.close();
    d.close();
  }
    Query createCandidateQuery(IndexReader indexReader) throws IOException {
      List<Term> extractedTerms = new ArrayList<>();
      // include extractionResultField:failed, because docs with this term have no
      // extractedTermsField
      // and otherwise we would fail to return these docs. Docs that failed query term extraction
      // always need to be verified by MemoryIndex:
      extractedTerms.add(new Term(extractionResultField.name(), EXTRACTION_FAILED));

      LeafReader reader = indexReader.leaves().get(0).reader();
      Fields fields = reader.fields();
      for (String field : fields) {
        Terms terms = fields.terms(field);
        if (terms == null) {
          continue;
        }

        BytesRef fieldBr = new BytesRef(field);
        TermsEnum tenum = terms.iterator();
        for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
          BytesRefBuilder builder = new BytesRefBuilder();
          builder.append(fieldBr);
          builder.append(FIELD_VALUE_SEPARATOR);
          builder.append(term);
          extractedTerms.add(new Term(queryTermsField.name(), builder.toBytesRef()));
        }
      }
      return new TermsQuery(extractedTerms);
    }
Example #3
0
 public static Map<String, Integer> termFrequencies(
     IndexSearcher indexSearcher,
     Query documentFilterQuery,
     String fieldName,
     String propName,
     String altName) {
   try {
     String luceneField = ComplexFieldUtil.propertyField(fieldName, propName, altName);
     Weight weight = indexSearcher.createNormalizedWeight(documentFilterQuery, false);
     Map<String, Integer> freq = new HashMap<>();
     IndexReader indexReader = indexSearcher.getIndexReader();
     for (LeafReaderContext arc : indexReader.leaves()) {
       if (weight == null) throw new RuntimeException("weight == null");
       if (arc == null) throw new RuntimeException("arc == null");
       if (arc.reader() == null) throw new RuntimeException("arc.reader() == null");
       Scorer scorer = weight.scorer(arc, arc.reader().getLiveDocs());
       if (scorer != null) {
         while (scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
           getFrequenciesFromTermVector(
               indexReader, scorer.docID() + arc.docBase, luceneField, freq);
         }
       }
     }
     return freq;
   } catch (IOException e) {
     throw ExUtil.wrapRuntimeException(e);
   }
 }
 private void createIndex(
     IndexWriterConfig config,
     Directory target,
     IndexReader reader,
     Filter preserveFilter,
     boolean negateFilter)
     throws IOException {
   boolean success = false;
   final IndexWriter w = new IndexWriter(target, config);
   try {
     final List<LeafReaderContext> leaves = reader.leaves();
     final IndexReader[] subReaders = new IndexReader[leaves.size()];
     int i = 0;
     for (final LeafReaderContext ctx : leaves) {
       subReaders[i++] = new DocumentFilteredLeafIndexReader(ctx, preserveFilter, negateFilter);
     }
     w.addIndexes(subReaders);
     success = true;
   } finally {
     if (success) {
       w.close();
     } else {
       IOUtils.closeWhileHandlingException(w);
     }
   }
 }
  public void testNormsWithDocValues() throws Exception {
    MemoryIndex mi = new MemoryIndex(true, true);
    MockAnalyzer mockAnalyzer = new MockAnalyzer(random());

    mi.addField(
        new BinaryDocValuesField("text", new BytesRef("quick brown fox")), mockAnalyzer, 5f);
    mi.addField(new TextField("text", "quick brown fox", Field.Store.NO), mockAnalyzer, 5f);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();

    Document doc = new Document();
    doc.add(new BinaryDocValuesField("text", new BytesRef("quick brown fox")));
    Field field = new TextField("text", "quick brown fox", Field.Store.NO);
    field.setBoost(5f);
    doc.add(field);
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer));
    writer.addDocument(doc);
    writer.close();

    IndexReader controlIndexReader = DirectoryReader.open(dir);
    LeafReader controlLeafReader = controlIndexReader.leaves().get(0).reader();

    assertEquals(
        controlLeafReader.getNormValues("text").get(0), leafReader.getNormValues("text").get(0));

    controlIndexReader.close();
    dir.close();
  }
  private void getPrefixTerms(
      ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment
    // into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf
    // individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
      Terms _terms = leaf.reader().terms(field);
      if (_terms == null) {
        continue;
      }

      TermsEnum termsEnum = _terms.iterator();
      TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
      if (TermsEnum.SeekStatus.END == seekStatus) {
        continue;
      }

      for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
        if (!StringHelper.startsWith(term, prefix.bytes())) {
          break;
        }

        terms.add(new Term(field, BytesRef.deepCopyOf(term)));
        if (terms.size() >= maxExpansions) {
          return;
        }
      }
    }
  }
 String buildDocumentTitle(AtomicReader inSegmentReader, final DocumentDescriptor inDescriptor) {
   AtomicReader segmentReader = _indexReader.leaves().get(inDescriptor.segmentNumber).reader();
   String author = Attributes.getDocAttribute(segmentReader, inDescriptor.localId, "author"),
       header = Attributes.getDocAttribute(segmentReader, inDescriptor.localId, "header"),
       medium = Attributes.getDocAttribute(segmentReader, inDescriptor.localId, "medium"),
       created = Attributes.getDocAttribute(inSegmentReader, inDescriptor.localId, "grcreated"),
       publ_year = Attributes.getDocAttribute(inSegmentReader, inDescriptor.localId, "publ_year"),
       res = author + ". " + header;
   if ( // gazeta
   medium.equals("\u0433\u0430\u0437\u0435\u0442\u0430")
       // zhurnal
       || medium.equals("\u0436\u0443\u0440\u043d\u0430\u043b")
       // elektronnoe izdanie
       || medium.equals(
           "\u044d\u043b\u0435\u043a\u0442\u0440\u043e\u043d\u043d\u043e\u0435 \u0438\u0437\u0434\u0430\u043d\u0438\u0435")) {
     String publication =
         Attributes.getDocAttribute(inSegmentReader, inDescriptor.localId, "publication");
     if (created.equals(publ_year)) {
       res += " // " + publication + ", " + publ_year;
     } else {
       res += " (" + created + ") // " + publication + ", " + publ_year;
     }
   } else {
     res += " (" + created + ")";
   }
   return res;
 }
Example #8
0
  public static void main(String[] args) throws IOException, InterruptedException {
    Path indexPath = Paths.get(args[0]);

    String sparseOrNot = args[1];
    boolean sparse;
    if (sparseOrNot.equals("sparse")) {
      sparse = true;
    } else if (sparseOrNot.equals("nonsparse")) {
      sparse = false;
    } else {
      throw new IllegalArgumentException("expected sparse or nonsparse but got: " + sparseOrNot);
    }

    Directory dir = FSDirectory.open(indexPath);

    IndexReader reader = DirectoryReader.open(dir);
    System.out.println("READER: " + reader);
    long bytes = 0;
    for (LeafReaderContext ctx : reader.leaves()) {
      CodecReader cr = (CodecReader) ctx.reader();
      System.out.println("\nREADER: " + cr);
      for (Accountable acc : cr.getChildResources()) {
        System.out.println("  " + Accountables.toString(acc));
      }
      bytes += cr.ramBytesUsed();
    }
    System.out.println("HEAP: " + bytes);

    IndexSearcher searcher = new IndexSearcher(reader);

    Random random = new Random(17);

    SearchThread[] threads = new SearchThread[2];
    for (int i = 0; i < threads.length; i++) {
      threads[i] = new SearchThread(i, sparse, searcher, 500, new Random(random.nextLong()));
      threads[i].start();
    }

    for (SearchThread thread : threads) {
      thread.join();
    }

    /*
    SearchThread[] threads = new SearchThread[] {new SearchThread(0, sparse, searcher, 1000, new Random(random.nextLong()))};
    threads[0].run();
    */

    for (SearchThread thread : threads) {
      for (String line : thread.results) {
        System.out.println(line);
      }
    }

    IOUtils.close(reader, dir);
  }
  @Test
  public void testNestedChildrenFilter() throws Exception {
    int numParentDocs = scaledRandomIntBetween(0, 32);
    int maxChildDocsPerParent = scaledRandomIntBetween(8, 16);

    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numParentDocs; i++) {
      int numChildDocs = scaledRandomIntBetween(0, maxChildDocsPerParent);
      List<Document> docs = new ArrayList<>(numChildDocs + 1);
      for (int j = 0; j < numChildDocs; j++) {
        Document childDoc = new Document();
        childDoc.add(new StringField("type", "child", Field.Store.NO));
        docs.add(childDoc);
      }

      Document parenDoc = new Document();
      parenDoc.add(new StringField("type", "parent", Field.Store.NO));
      parenDoc.add(new IntField("num_child_docs", numChildDocs, Field.Store.YES));
      docs.add(parenDoc);
      writer.addDocuments(docs);
    }

    IndexReader reader = writer.getReader();
    writer.close();

    IndexSearcher searcher = new IndexSearcher(reader);
    FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext();
    BitDocIdSetFilter parentFilter =
        new BitDocIdSetCachingWrapperFilter(
            new QueryWrapperFilter(new TermQuery(new Term("type", "parent"))));
    Filter childFilter = new QueryWrapperFilter(new TermQuery(new Term("type", "child")));
    int checkedParents = 0;
    for (LeafReaderContext leaf : reader.leaves()) {
      DocIdSetIterator parents = parentFilter.getDocIdSet(leaf).iterator();
      for (int parentDoc = parents.nextDoc();
          parentDoc != DocIdSetIterator.NO_MORE_DOCS;
          parentDoc = parents.nextDoc()) {
        int expectedChildDocs =
            leaf.reader().document(parentDoc).getField("num_child_docs").numericValue().intValue();
        hitContext.reset(null, leaf, parentDoc, searcher);
        NestedChildrenFilter nestedChildrenFilter =
            new NestedChildrenFilter(parentFilter, childFilter, hitContext);
        TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
        searcher.search(new ConstantScoreQuery(nestedChildrenFilter), totalHitCountCollector);
        assertThat(totalHitCountCollector.getTotalHits(), equalTo(expectedChildDocs));
        checkedParents++;
      }
    }
    assertThat(checkedParents, equalTo(numParentDocs));
    reader.close();
    dir.close();
  }
 public void testCollector() throws IOException {
   TotalHitCountCollector collector = new TotalHitCountCollector();
   ProfileCollector profileCollector = new ProfileCollector(collector);
   assertEquals(0, profileCollector.getTime());
   final LeafCollector leafCollector = profileCollector.getLeafCollector(reader.leaves().get(0));
   assertThat(profileCollector.getTime(), greaterThan(0L));
   long time = profileCollector.getTime();
   leafCollector.setScorer(null);
   assertThat(profileCollector.getTime(), greaterThan(time));
   time = profileCollector.getTime();
   leafCollector.collect(0);
   assertThat(profileCollector.getTime(), greaterThan(time));
 }
  @Override
  public void setContext(TransformContext context) {
    try {
      IndexReader reader = qparser.getReq().getSearcher().getIndexReader();
      readerContexts = reader.leaves();
      docValuesArr = new FunctionValues[readerContexts.size()];

      searcher = qparser.getReq().getSearcher();
      fcontext = ValueSource.newContext(searcher);
      this.valueSource.createWeight(fcontext, searcher);
    } catch (IOException e) {
      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
    }
  }
 @Override
 public IndexOrdinalsFieldData loadGlobal(IndexReader indexReader) {
   if (indexReader.leaves().size() <= 1) {
     // ordinals are already global
     return this;
   }
   try {
     return cache.load(indexReader, this);
   } catch (Throwable e) {
     if (e instanceof ElasticsearchException) {
       throw (ElasticsearchException) e;
     } else {
       throw new ElasticsearchException(e.getMessage(), e);
     }
   }
 }
 @Override
 public Query rewrite(IndexReader reader) throws IOException {
   if (getBoost() != 1f) {
     return super.rewrite(reader);
   }
   if (this.terms.isEmpty()) {
     return new MatchNoDocsQuery();
   } else if (this.terms.size() == 1) {
     return newTermQuery(this.terms.get(0), null);
   }
   final List<LeafReaderContext> leaves = reader.leaves();
   final int maxDoc = reader.maxDoc();
   final TermContext[] contextArray = new TermContext[terms.size()];
   final Term[] queryTerms = this.terms.toArray(new Term[0]);
   collectTermContext(reader, leaves, contextArray, queryTerms);
   return buildQuery(maxDoc, contextArray, queryTerms);
 }
  void createDocumentNode(final DocumentDescriptor inDescriptor) throws IOException {
    try {
      _document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
      _rootNode = _document.createElement("document");
    } catch (ParserConfigurationException e) {
      e.printStackTrace();
      System.exit(1);
    }
    AtomicReader segmentReader = _indexReader.leaves().get(inDescriptor.segmentNumber).reader();
    _rootNode.setAttribute("id", DocumentIdOperations.documentDescriptorToId(inDescriptor));
    // TODO: implement the proper way of building a title from the production report
    _rootNode.setAttribute("title", buildDocumentTitle(segmentReader, inDescriptor));
    _rootNode.setAttribute("path", "ruscorpora.ru");
    _rootNode.setAttribute(
        "tagging", Attributes.getDocAttribute(segmentReader, inDescriptor.localId, "tagging"));
    _rootNode.setAttribute("snippets", "0");
    Element attributesNode = _document.createElement("attributes");

    _rootNode.appendChild(attributesNode);

    FieldInfos fields = segmentReader.getFieldInfos();
    for (int fieldIndex = 0; fieldIndex != fields.size(); ++fieldIndex) {
      FieldInfo field = fields.fieldInfo(fieldIndex);
      // TODO: understand why field may turn into null
      if (field == null) {
        continue;
      }
      String name = field.name;
      if (Attributes.ATTRIBUTES.contains(name)
          || Attributes.ATTRIBUTES_FOR_REPORT.contains(name)
          || Attributes.ATTRIBUTES_FOR_WORD_INFO.contains(name)
          || !field.hasDocValues()) {
        // it's a word attribute
        continue;
      }
      Element attrNode = _document.createElement("attr");
      attrNode.setAttribute("name", name);
      attrNode.setAttribute(
          "value", Attributes.getDocAttribute(segmentReader, inDescriptor.localId, name));
      attributesNode.appendChild(attrNode);
    }
  }
 private Query newTermQuery(IndexReader reader, Term term) throws IOException {
   if (ignoreTF) {
     return new ConstantScoreQuery(new TermQuery(term));
   } else {
     // we build an artificial TermContext that will give an overall df and ttf
     // equal to 1
     TermContext context = new TermContext(reader.getContext());
     for (LeafReaderContext leafContext : reader.leaves()) {
       Terms terms = leafContext.reader().terms(term.field());
       if (terms != null) {
         TermsEnum termsEnum = terms.iterator();
         if (termsEnum.seekExact(term.bytes())) {
           int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1
           context.register(termsEnum.termState(), leafContext.ord, freq, freq);
         }
       }
     }
     return new TermQuery(term, context);
   }
 }
  public void initialize(Engine.Searcher docSearcher, ParsedDocument parsedDocument) {
    this.docSearcher = docSearcher;

    IndexReader indexReader = docSearcher.reader();
    LeafReaderContext atomicReaderContext = indexReader.leaves().get(0);
    LeafSearchLookup leafLookup = lookup().getLeafSearchLookup(atomicReaderContext);
    leafLookup.setDocument(0);
    leafLookup.source().setSource(parsedDocument.source());

    Map<String, SearchHitField> fields = new HashMap<>();
    for (IndexableField field : parsedDocument.rootDoc().getFields()) {
      fields.put(field.name(), new InternalSearchHitField(field.name(), Collections.emptyList()));
    }
    hitContext()
        .reset(
            new InternalSearchHit(0, "unknown", new StringText(parsedDocument.type()), fields),
            atomicReaderContext,
            0,
            docSearcher.searcher());
  }
  /**
   * Returns total in-heap bytes used by all suggesters. This method has CPU cost <code>
   * O(numIndexedFields)</code>.
   *
   * @param fieldNamePatterns if non-null, any completion field name matching any of these patterns
   *     will break out its in-heap bytes separately in the returned {@link CompletionStats}
   */
  public CompletionStats completionStats(IndexReader indexReader, String... fieldNamePatterns) {
    CompletionStats completionStats = new CompletionStats();
    for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
      LeafReader atomicReader = atomicReaderContext.reader();
      try {
        Fields fields = atomicReader.fields();
        for (String fieldName : fields) {
          Terms terms = fields.terms(fieldName);
          if (terms instanceof CompletionTerms) {
            CompletionTerms completionTerms = (CompletionTerms) terms;
            completionStats.add(completionTerms.stats(fieldNamePatterns));
          }
        }
      } catch (IOException ioe) {
        logger.error("Could not get completion stats", ioe);
      }
    }

    return completionStats;
  }
Example #18
0
  public static long getTotalTermFreq(IndexReader reader, Term term) throws Exception {
    long totalTF = 0L;
    for (final AtomicReaderContext ctx : reader.leaves()) {
      AtomicReader r = ctx.reader();
      if (!r.hasDeletions()) {
        // TODO: we could do this up front, during the scan
        // (next()), instead of after-the-fact here w/ seek,
        // if the codec supports it and there are no del
        // docs...
        final long totTF = r.totalTermFreq(term);
        if (totTF != -1) {
          totalTF += totTF;
          continue;
        } // otherwise we fall-through
      }
      // note: what should we do if field omits freqs? currently it counts as 1...
      DocsEnum de = r.termDocsEnum(term);
      if (de != null) {
        while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) totalTF += de.freq();
      }
    }

    return totalTF;
  }
  // LUCENE-5644: for first segment, two threads each indexed one doc (likely concurrently), but for
  // second segment, each thread indexed the
  // doc NOT at the same time, and should have shared the same thread state / segment
  public void testSegmentCountOnFlushBasic() throws Exception {
    Directory dir = newDirectory();
    final IndexWriter w =
        new IndexWriter(
            dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    final CountDownLatch startingGun = new CountDownLatch(1);
    final CountDownLatch startDone = new CountDownLatch(2);
    final CountDownLatch middleGun = new CountDownLatch(1);
    final CountDownLatch finalGun = new CountDownLatch(1);
    Thread[] threads = new Thread[2];
    for (int i = 0; i < threads.length; i++) {
      final int threadID = i;
      threads[i] =
          new Thread() {
            @Override
            public void run() {
              try {
                startingGun.await();
                Document doc = new Document();
                doc.add(newTextField("field", "here is some text", Field.Store.NO));
                w.addDocument(doc);
                startDone.countDown();

                middleGun.await();
                if (threadID == 0) {
                  w.addDocument(doc);
                } else {
                  finalGun.await();
                  w.addDocument(doc);
                }
              } catch (Exception e) {
                throw new RuntimeException(e);
              }
            }
          };
      threads[i].start();
    }

    startingGun.countDown();
    startDone.await();

    IndexReader r = DirectoryReader.open(w, true);
    assertEquals(2, r.numDocs());
    int numSegments = r.leaves().size();
    // 1 segment if the threads ran sequentially, else 2:
    assertTrue(numSegments <= 2);
    r.close();

    middleGun.countDown();
    threads[0].join();

    finalGun.countDown();
    threads[1].join();

    r = DirectoryReader.open(w, true);
    assertEquals(4, r.numDocs());
    // Both threads should have shared a single thread state since they did not try to index
    // concurrently:
    assertEquals(1 + numSegments, r.leaves().size());
    r.close();

    w.close();
    dir.close();
  }
  public void testDocValuesMemoryIndexVsNormalIndex() throws Exception {
    Document doc = new Document();
    long randomLong = random().nextLong();
    doc.add(new NumericDocValuesField("numeric", randomLong));
    if (random().nextBoolean()) {
      doc.add(new LegacyLongField("numeric", randomLong, Field.Store.NO));
    }
    int numValues = atLeast(5);
    for (int i = 0; i < numValues; i++) {
      randomLong = random().nextLong();
      doc.add(new SortedNumericDocValuesField("sorted_numeric", randomLong));
      if (random().nextBoolean()) {
        // randomly duplicate field/value
        doc.add(new SortedNumericDocValuesField("sorted_numeric", randomLong));
      }
      if (random().nextBoolean()) {
        doc.add(new LegacyLongField("numeric", randomLong, Field.Store.NO));
      }
    }
    BytesRef randomTerm = new BytesRef(randomTerm());
    doc.add(new BinaryDocValuesField("binary", randomTerm));
    if (random().nextBoolean()) {
      doc.add(new StringField("binary", randomTerm, Field.Store.NO));
    }
    randomTerm = new BytesRef(randomTerm());
    doc.add(new SortedDocValuesField("sorted", randomTerm));
    if (random().nextBoolean()) {
      doc.add(new StringField("sorted", randomTerm, Field.Store.NO));
    }
    numValues = atLeast(5);
    for (int i = 0; i < numValues; i++) {
      randomTerm = new BytesRef(randomTerm());
      doc.add(new SortedSetDocValuesField("sorted_set", randomTerm));
      if (random().nextBoolean()) {
        // randomly duplicate field/value
        doc.add(new SortedSetDocValuesField("sorted_set", randomTerm));
      }
      if (random().nextBoolean()) {
        // randomily just add a normal string field
        doc.add(new StringField("sorted_set", randomTerm, Field.Store.NO));
      }
    }

    MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
    MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, mockAnalyzer);
    IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
    LeafReader leafReader = indexReader.leaves().get(0).reader();

    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer));
    writer.addDocument(doc);
    writer.close();
    IndexReader controlIndexReader = DirectoryReader.open(dir);
    LeafReader controlLeafReader = controlIndexReader.leaves().get(0).reader();

    NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
    NumericDocValues controlNumericDocValues = controlLeafReader.getNumericDocValues("numeric");
    assertEquals(controlNumericDocValues.get(0), numericDocValues.get(0));

    SortedNumericDocValues sortedNumericDocValues =
        leafReader.getSortedNumericDocValues("sorted_numeric");
    sortedNumericDocValues.setDocument(0);
    SortedNumericDocValues controlSortedNumericDocValues =
        controlLeafReader.getSortedNumericDocValues("sorted_numeric");
    controlSortedNumericDocValues.setDocument(0);
    assertEquals(controlSortedNumericDocValues.count(), sortedNumericDocValues.count());
    for (int i = 0; i < controlSortedNumericDocValues.count(); i++) {
      assertEquals(controlSortedNumericDocValues.valueAt(i), sortedNumericDocValues.valueAt(i));
    }

    BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
    BinaryDocValues controlBinaryDocValues = controlLeafReader.getBinaryDocValues("binary");
    assertEquals(controlBinaryDocValues.get(0), binaryDocValues.get(0));

    SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
    SortedDocValues controlSortedDocValues = controlLeafReader.getSortedDocValues("sorted");
    assertEquals(controlSortedDocValues.getValueCount(), sortedDocValues.getValueCount());
    assertEquals(controlSortedDocValues.get(0), sortedDocValues.get(0));
    assertEquals(controlSortedDocValues.getOrd(0), sortedDocValues.getOrd(0));
    assertEquals(controlSortedDocValues.lookupOrd(0), sortedDocValues.lookupOrd(0));

    SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
    sortedSetDocValues.setDocument(0);
    SortedSetDocValues controlSortedSetDocValues =
        controlLeafReader.getSortedSetDocValues("sorted_set");
    controlSortedSetDocValues.setDocument(0);
    assertEquals(controlSortedSetDocValues.getValueCount(), sortedSetDocValues.getValueCount());
    for (long controlOrd = controlSortedSetDocValues.nextOrd();
        controlOrd != SortedSetDocValues.NO_MORE_ORDS;
        controlOrd = controlSortedSetDocValues.nextOrd()) {
      assertEquals(controlOrd, sortedSetDocValues.nextOrd());
      assertEquals(
          controlSortedSetDocValues.lookupOrd(controlOrd),
          sortedSetDocValues.lookupOrd(controlOrd));
    }
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());

    indexReader.close();
    controlIndexReader.close();
    dir.close();
  }
Example #21
0
  @Override
  protected Suggest.Suggestion<
          ? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>
      innerExecute(
          String name,
          CompletionSuggestionContext suggestionContext,
          IndexReader indexReader,
          CharsRef spare)
          throws IOException {
    if (suggestionContext.mapper() == null
        || !(suggestionContext.mapper() instanceof CompletionFieldMapper)) {
      throw new ElasticsearchException(
          "Field [" + suggestionContext.getField() + "] is not a completion suggest field");
    }

    CompletionSuggestion completionSuggestion =
        new CompletionSuggestion(name, suggestionContext.getSize());
    UnicodeUtil.UTF8toUTF16(suggestionContext.getText(), spare);

    CompletionSuggestion.Entry completionSuggestEntry =
        new CompletionSuggestion.Entry(new StringText(spare.toString()), 0, spare.length());
    completionSuggestion.addTerm(completionSuggestEntry);

    String fieldName = suggestionContext.getField();
    Map<String, CompletionSuggestion.Entry.Option> results =
        Maps.newHashMapWithExpectedSize(indexReader.leaves().size() * suggestionContext.getSize());
    for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
      AtomicReader atomicReader = atomicReaderContext.reader();
      Terms terms = atomicReader.fields().terms(fieldName);
      if (terms instanceof Completion090PostingsFormat.CompletionTerms) {
        final Completion090PostingsFormat.CompletionTerms lookupTerms =
            (Completion090PostingsFormat.CompletionTerms) terms;
        final Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), suggestionContext);
        if (lookup == null) {
          // we don't have a lookup for this segment.. this might be possible if a merge dropped all
          // docs from the segment that had a value in this segment.
          continue;
        }
        List<Lookup.LookupResult> lookupResults =
            lookup.lookup(spare, false, suggestionContext.getSize());
        for (Lookup.LookupResult res : lookupResults) {

          final String key = res.key.toString();
          final float score = res.value;
          final Option value = results.get(key);
          if (value == null) {
            final Option option =
                new CompletionSuggestion.Entry.Option(
                    new StringText(key),
                    score,
                    res.payload == null ? null : new BytesArray(res.payload));
            results.put(key, option);
          } else if (value.getScore() < score) {
            value.setScore(score);
            value.setPayload(res.payload == null ? null : new BytesArray(res.payload));
          }
        }
      }
    }
    final List<CompletionSuggestion.Entry.Option> options =
        new ArrayList<CompletionSuggestion.Entry.Option>(results.values());
    CollectionUtil.introSort(options, scoreComparator);

    int optionCount = Math.min(suggestionContext.getSize(), options.size());
    for (int i = 0; i < optionCount; i++) {
      completionSuggestEntry.addOption(options.get(i));
    }

    return completionSuggestion;
  }
  @Override
  public IndexFieldData.WithOrdinals build(
      final IndexReader indexReader,
      IndexFieldData.WithOrdinals indexFieldData,
      Settings settings,
      CircuitBreakerService breakerService)
      throws IOException {
    assert indexReader.leaves().size() > 1;
    long startTime = System.currentTimeMillis();

    // It makes sense to make the overhead ratio configurable for the mapping from segment ords to
    // global ords
    // However, other mappings are never the bottleneck and only used to get the original value from
    // an ord, so
    // it makes sense to force COMPACT for them
    final float acceptableOverheadRatio =
        settings.getAsFloat("acceptable_overhead_ratio", PackedInts.FAST);
    final AppendingPackedLongBuffer globalOrdToFirstSegment =
        new AppendingPackedLongBuffer(PackedInts.COMPACT);
    final MonotonicAppendingLongBuffer globalOrdToFirstSegmentDelta =
        new MonotonicAppendingLongBuffer(PackedInts.COMPACT);

    FieldDataType fieldDataType = indexFieldData.getFieldDataType();
    int defaultThreshold =
        settings.getAsInt(
            ORDINAL_MAPPING_THRESHOLD_INDEX_SETTING_KEY, ORDINAL_MAPPING_THRESHOLD_DEFAULT);
    int threshold =
        fieldDataType.getSettings().getAsInt(ORDINAL_MAPPING_THRESHOLD_KEY, defaultThreshold);
    OrdinalMappingSourceBuilder ordinalMappingBuilder =
        new OrdinalMappingSourceBuilder(
            indexReader.leaves().size(), acceptableOverheadRatio, threshold);

    long currentGlobalOrdinal = 0;
    final AtomicFieldData.WithOrdinals[] withOrdinals =
        new AtomicFieldData.WithOrdinals[indexReader.leaves().size()];
    TermIterator termIterator =
        new TermIterator(indexFieldData, indexReader.leaves(), withOrdinals);
    for (BytesRef term = termIterator.next(); term != null; term = termIterator.next()) {
      globalOrdToFirstSegment.add(termIterator.firstReaderIndex());
      long globalOrdinalDelta = currentGlobalOrdinal - termIterator.firstLocalOrdinal();
      globalOrdToFirstSegmentDelta.add(globalOrdinalDelta);
      for (TermIterator.LeafSource leafSource : termIterator.competitiveLeafs()) {
        ordinalMappingBuilder.onOrdinal(
            leafSource.context.ord, leafSource.tenum.ord(), currentGlobalOrdinal);
      }
      currentGlobalOrdinal++;
    }

    // ram used for the globalOrd to segmentOrd and segmentOrd to firstReaderIndex lookups
    long memorySizeInBytesCounter = 0;
    globalOrdToFirstSegment.freeze();
    memorySizeInBytesCounter += globalOrdToFirstSegment.ramBytesUsed();
    globalOrdToFirstSegmentDelta.freeze();
    memorySizeInBytesCounter += globalOrdToFirstSegmentDelta.ramBytesUsed();

    final long maxOrd = currentGlobalOrdinal;
    OrdinalMappingSource[] segmentOrdToGlobalOrdLookups = ordinalMappingBuilder.build(maxOrd);
    // add ram used for the main segmentOrd to globalOrd lookups
    memorySizeInBytesCounter += ordinalMappingBuilder.getMemorySizeInBytes();

    final long memorySizeInBytes = memorySizeInBytesCounter;
    breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes);

    if (logger.isDebugEnabled()) {
      // this does include the [] from the array in the impl name
      String implName = segmentOrdToGlobalOrdLookups.getClass().getSimpleName();
      logger.debug(
          "Global-ordinals[{}][{}][{}] took {} ms",
          implName,
          indexFieldData.getFieldNames().fullName(),
          maxOrd,
          (System.currentTimeMillis() - startTime));
    }
    return new InternalGlobalOrdinalsIndexFieldData(
        indexFieldData.index(),
        settings,
        indexFieldData.getFieldNames(),
        fieldDataType,
        withOrdinals,
        globalOrdToFirstSegment,
        globalOrdToFirstSegmentDelta,
        segmentOrdToGlobalOrdLookups,
        memorySizeInBytes);
  }
Example #23
0
  @Override
  public void execute(String[] args, PrintStream out) throws Exception {
    String field = null;
    String termVal = null;
    try {
      field = args[0];
    } catch (Exception e) {
      field = null;
    }

    if (field != null) {
      String[] parts = field.split(":");
      if (parts.length > 1) {
        field = parts[0];
        termVal = parts[1];
      }
    }

    if (field == null || termVal == null) {
      out.println("usage: field:term");
      out.flush();
      return;
    }

    IndexReader reader = ctx.getIndexReader();
    List<AtomicReaderContext> leaves = reader.leaves();
    int docBase = 0;
    int numPerPage = 20;
    for (AtomicReaderContext leaf : leaves) {
      AtomicReader atomicReader = leaf.reader();
      Terms terms = atomicReader.terms(field);
      if (terms == null) {
        continue;
      }
      boolean hasPositions = terms.hasPositions();
      if (terms != null && termVal != null) {
        TermsEnum te = terms.iterator(null);
        int count = 0;
        if (te.seekExact(new BytesRef(termVal), true)) {

          if (hasPositions) {
            DocsAndPositionsEnum iter = te.docsAndPositions(atomicReader.getLiveDocs(), null);
            int docid;
            while ((docid = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
              count++;
              out.print("docid: " + (docid + docBase) + ", freq: " + iter.freq() + ", ");
              for (int i = 0; i < iter.freq(); ++i) {
                out.print("pos " + i + ": " + iter.nextPosition());
                BytesRef payload = iter.getPayload();
                if (payload != null) {
                  out.print(",payload: " + payload);
                }
                out.print(";");
              }
              out.println();
              if (ctx.isInteractiveMode()) {
                if (count % numPerPage == 0) {
                  out.println("Ctrl-D to break");
                  int ch = System.in.read();
                  if (ch == -1) {
                    out.flush();
                    return;
                  }
                }
              }
            }
          } else {
            DocsEnum iter = te.docs(atomicReader.getLiveDocs(), null);

            int docid;
            while ((docid = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
              count++;
              out.println("docid: " + (docid + docBase));
              if (ctx.isInteractiveMode()) {
                if (count % numPerPage == 0) {
                  out.println("Ctrl-D to break");
                  int ch = System.in.read();
                  if (ch == -1) {
                    out.flush();
                    return;
                  }
                }
              }
            }
          }
        }
      }
      docBase += atomicReader.maxDoc();
    }
  }