Esempio n. 1
1
    @Override
    public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals) throws IOException {
      final long numOrds = globalOrdinals.getValueCount();
      final LongBitSet acceptedGlobalOrdinals = new LongBitSet(numOrds);
      final TermsEnum termEnum = globalOrdinals.termsEnum();

      BytesRef term = termEnum.next();
      while (term != null) {
        if (Math.floorMod(
                StringHelper.murmurhash3_x86_32(term, HASH_PARTITIONING_SEED), incNumPartitions)
            == incZeroBasedPartition) {
          acceptedGlobalOrdinals.set(termEnum.ord());
        }
        term = termEnum.next();
      }
      return acceptedGlobalOrdinals;
    }
  public void testThreeBlocks() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    List<String> terms = new ArrayList<>();
    for (int i = 0; i < 36; i++) {
      Document doc = new Document();
      String term = "" + (char) (97 + i);
      terms.add(term);
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term);
      }
      doc.add(newTextField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    for (int i = 0; i < 36; i++) {
      Document doc = new Document();
      String term = "m" + (char) (97 + i);
      terms.add(term);
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term);
      }
      doc.add(newTextField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    for (int i = 0; i < 36; i++) {
      Document doc = new Document();
      String term = "mo" + (char) (97 + i);
      terms.add(term);
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term);
      }
      doc.add(newTextField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    w.forceMerge(1);
    IndexReader r = w.getReader();
    TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);

    if (VERBOSE) {
      while (te.next() != null) {
        System.out.println("TERM: " + te.ord() + " " + te.term().utf8ToString());
      }
    }

    assertTrue(te.seekExact(new BytesRef("mo")));
    assertEquals(27, te.ord());

    te.seekExact(90);
    assertEquals(new BytesRef("s"), te.term());

    testEnum(te, terms);

    r.close();
    w.close();
    dir.close();
  }
  /*
   *  Utility function to display a term vector.
   */
  static void termVectorDisplay(Terms terms) throws IOException {

    if ((terms == null) || (terms.size() == -1)) System.out.println("    The field is not stored.");
    else {
      /*
       *  The terms for this field are stored.
       */
      System.out.println("    Vocabulary size: " + terms.size() + " terms");

      TermsEnum ithTerm = terms.iterator(null);

      /*
       *  Iterate over the terms in this document.
       *  Information about a term's occurrences (tf and
       *  positions) is accessed via the indexing API, which
       *  returns inverted lists that describe (only) the
       *  current document.
       */
      while (ithTerm.next() != null) {
        System.out.format(
            "      %10d %-20s %d ",
            ithTerm.ord(), ithTerm.term().utf8ToString(), ithTerm.totalTermFreq());

        DocsAndPositionsEnum currDoc = ithTerm.docsAndPositions(null, null);
        currDoc.nextDoc();

        for (int jthPosition = 0; jthPosition < ithTerm.totalTermFreq(); jthPosition++)
          System.out.print(currDoc.nextPosition() + " ");

        System.out.println();
      }
      ;
    }
    ;
  }
  public void testSeekCeilNotFound() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    // Get empty string in there!
    doc.add(newStringField("field", "", Field.Store.NO));
    w.addDocument(doc);

    for (int i = 0; i < 36; i++) {
      doc = new Document();
      String term = "" + (char) (97 + i);
      String term2 = "a" + (char) (97 + i);
      doc.add(newTextField("field", term + " " + term2, Field.Store.NO));
      w.addDocument(doc);
    }

    w.forceMerge(1);
    IndexReader r = w.getReader();
    TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);
    assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef(new byte[] {0x22})));
    assertEquals("a", te.term().utf8ToString());
    assertEquals(1L, te.ord());
    r.close();
    w.close();
    dir.close();
  }
Esempio n. 5
0
 @Override
 public long lookupTerm(BytesRef key) {
   try {
     switch (te.seekCeil(key)) {
       case FOUND:
         assert te.ord() >= 0;
         return te.ord();
       case NOT_FOUND:
         assert te.ord() >= 0;
         return -te.ord() - 1;
       default: /* END */
         return -numTerms() - 1;
     }
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }
  public void testNonRootFloorBlocks() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(dir, iwc);
    List<String> terms = new ArrayList<>();
    for (int i = 0; i < 36; i++) {
      Document doc = new Document();
      String term = "" + (char) (97 + i);
      terms.add(term);
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term);
      }
      doc.add(newTextField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    for (int i = 0; i < 128; i++) {
      Document doc = new Document();
      String term = "m" + (char) i;
      terms.add(term);
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term + " bytes=" + new BytesRef(term));
      }
      doc.add(newStringField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    w.forceMerge(1);
    IndexReader r = DirectoryReader.open(w, true);
    TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);

    BytesRef term;
    int ord = 0;
    while ((term = te.next()) != null) {
      if (VERBOSE) {
        System.out.println("TEST: " + te.ord() + ": " + term.utf8ToString());
      }
      assertEquals(ord, te.ord());
      ord++;
    }

    testEnum(te, terms);

    r.close();
    w.close();
    dir.close();
  }
Esempio n. 7
0
 /** Computes which global ordinals are accepted by this IncludeExclude instance. */
 @Override
 public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals) throws IOException {
   LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
   TermsEnum globalTermsEnum;
   Terms globalTerms = new DocValuesTerms(globalOrdinals);
   // TODO: specialize based on compiled.type: for ALL and prefixes (sinkState >= 0 ) we can
   // avoid i/o and just set bits.
   globalTermsEnum = compiled.getTermsEnum(globalTerms);
   for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
     acceptedGlobalOrdinals.set(globalTermsEnum.ord());
   }
   return acceptedGlobalOrdinals;
 }
  private void testEnum(TermsEnum te, List<String> terms) throws IOException {
    Collections.sort(terms);
    for (int i = terms.size() - 1; i >= 0; i--) {
      if (VERBOSE) {
        System.out.println("TEST: seek to ord=" + i);
      }
      te.seekExact(i);
      assertEquals(i, te.ord());
      assertEquals(terms.get(i), te.term().utf8ToString());
    }

    int iters = atLeast(1000);
    for (int iter = 0; iter < iters; iter++) {
      int ord = random().nextInt(terms.size());
      if (random().nextBoolean()) {
        te.seekExact(ord);
        assertEquals(terms.get(ord), te.term().utf8ToString());
      } else {
        te.seekExact(new BytesRef(terms.get(ord)));
        assertEquals(ord, te.ord());
      }
    }
  }
  public void testFloorBlocks() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(dir, iwc);
    for (int i = 0; i < 128; i++) {
      Document doc = new Document();
      String term = "" + (char) i;
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term + " bytes=" + new BytesRef(term));
      }
      doc.add(newStringField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    w.forceMerge(1);
    IndexReader r = DirectoryReader.open(w, true);
    TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);

    if (VERBOSE) {
      BytesRef term;
      while ((term = te.next()) != null) {
        System.out.println("  " + te.ord() + ": " + term.utf8ToString());
      }
    }

    assertTrue(te.seekExact(new BytesRef("a")));
    assertEquals(97, te.ord());

    te.seekExact(98);
    assertEquals(new BytesRef("b"), te.term());

    assertTrue(te.seekExact(new BytesRef("z")));
    assertEquals(122, te.ord());

    r.close();
    w.close();
    dir.close();
  }
  public void testSeveralNonRootBlocks() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(dir, iwc);
    List<String> terms = new ArrayList<>();
    for (int i = 0; i < 30; i++) {
      for (int j = 0; j < 30; j++) {
        Document doc = new Document();
        String term = "" + (char) (97 + i) + (char) (97 + j);
        terms.add(term);
        if (VERBOSE) {
          System.out.println("term=" + term);
        }
        doc.add(newTextField("body", term, Field.Store.NO));
        w.addDocument(doc);
      }
    }
    w.forceMerge(1);
    IndexReader r = DirectoryReader.open(w, true);
    TermsEnum te = MultiFields.getTerms(r, "body").iterator(null);

    for (int i = 0; i < 30; i++) {
      for (int j = 0; j < 30; j++) {
        String term = "" + (char) (97 + i) + (char) (97 + j);
        if (VERBOSE) {
          System.out.println("TEST: check term=" + term);
        }
        assertEquals(term, te.next().utf8ToString());
        assertEquals(30 * i + j, te.ord());
      }
    }

    testEnum(te, terms);

    te.seekExact(0);
    assertEquals("aa", te.term().utf8ToString());

    r.close();
    w.close();
    dir.close();
  }
  public void testBasic() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(newTextField("field", "a b c", Field.Store.NO));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);

    // Test next()
    assertEquals(new BytesRef("a"), te.next());
    assertEquals(0L, te.ord());
    assertEquals(new BytesRef("b"), te.next());
    assertEquals(1L, te.ord());
    assertEquals(new BytesRef("c"), te.next());
    assertEquals(2L, te.ord());
    assertNull(te.next());

    // Test seekExact by term
    assertTrue(te.seekExact(new BytesRef("b")));
    assertEquals(1, te.ord());
    assertTrue(te.seekExact(new BytesRef("a")));
    assertEquals(0, te.ord());
    assertTrue(te.seekExact(new BytesRef("c")));
    assertEquals(2, te.ord());

    // Test seekExact by ord
    te.seekExact(1);
    assertEquals(new BytesRef("b"), te.term());
    te.seekExact(0);
    assertEquals(new BytesRef("a"), te.term());
    te.seekExact(2);
    assertEquals(new BytesRef("c"), te.term());

    r.close();
    w.close();
    dir.close();
  }
    @Override
    protected void doSetNextReader(LeafReaderContext context) throws IOException {
      if (segmentFacetCounts != null) {
        segmentResults.add(createSegmentResult());
      }

      groupFieldTermsIndex = DocValues.getSorted(context.reader(), groupField);
      facetFieldDocTermOrds = DocValues.getSortedSet(context.reader(), facetField);
      facetFieldNumTerms = (int) facetFieldDocTermOrds.getValueCount();
      if (facetFieldNumTerms == 0) {
        facetOrdTermsEnum = null;
      } else {
        facetOrdTermsEnum = facetFieldDocTermOrds.termsEnum();
      }
      // [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet
      // field
      segmentFacetCounts = new int[facetFieldNumTerms + 1];
      segmentTotalCount = 0;

      segmentGroupedFacetHits.clear();
      for (GroupedFacetHit groupedFacetHit : groupedFacetHits) {
        int groupOrd =
            groupedFacetHit.groupValue == null
                ? -1
                : groupFieldTermsIndex.lookupTerm(groupedFacetHit.groupValue);
        if (groupedFacetHit.groupValue != null && groupOrd < 0) {
          continue;
        }

        int facetOrd;
        if (groupedFacetHit.facetValue != null) {
          if (facetOrdTermsEnum == null
              || !facetOrdTermsEnum.seekExact(groupedFacetHit.facetValue)) {
            continue;
          }
          facetOrd = (int) facetOrdTermsEnum.ord();
        } else {
          facetOrd = facetFieldNumTerms;
        }

        // (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not
        // containing facet field
        int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
        segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
      }

      if (facetPrefix != null) {
        TermsEnum.SeekStatus seekStatus;
        if (facetOrdTermsEnum != null) {
          seekStatus = facetOrdTermsEnum.seekCeil(facetPrefix);
        } else {
          seekStatus = TermsEnum.SeekStatus.END;
        }

        if (seekStatus != TermsEnum.SeekStatus.END) {
          startFacetOrd = (int) facetOrdTermsEnum.ord();
        } else {
          startFacetOrd = 0;
          endFacetOrd = 0;
          return;
        }

        BytesRefBuilder facetEndPrefix = new BytesRefBuilder();
        facetEndPrefix.append(facetPrefix);
        facetEndPrefix.append(UnicodeUtil.BIG_TERM);
        seekStatus = facetOrdTermsEnum.seekCeil(facetEndPrefix.get());
        if (seekStatus != TermsEnum.SeekStatus.END) {
          endFacetOrd = (int) facetOrdTermsEnum.ord();
        } else {
          endFacetOrd = facetFieldNumTerms; // Don't include null...
        }
      } else {
        startFacetOrd = 0;
        endFacetOrd = facetFieldNumTerms + 1;
      }
    }
  public void testTwoBlocks() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    List<String> terms = new ArrayList<>();
    for (int i = 0; i < 36; i++) {
      Document doc = new Document();
      String term = "" + (char) (97 + i);
      terms.add(term);
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term);
      }
      doc.add(newTextField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    for (int i = 0; i < 36; i++) {
      Document doc = new Document();
      String term = "m" + (char) (97 + i);
      terms.add(term);
      if (VERBOSE) {
        System.out.println("i=" + i + " term=" + term);
      }
      doc.add(newTextField("field", term, Field.Store.NO));
      w.addDocument(doc);
    }
    if (VERBOSE) {
      System.out.println("TEST: now forceMerge");
    }
    w.forceMerge(1);
    IndexReader r = w.getReader();
    TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);

    assertTrue(te.seekExact(new BytesRef("mo")));
    assertEquals(27, te.ord());

    te.seekExact(54);
    assertEquals(new BytesRef("s"), te.term());

    Collections.sort(terms);

    for (int i = terms.size() - 1; i >= 0; i--) {
      te.seekExact(i);
      assertEquals(i, te.ord());
      assertEquals(terms.get(i), te.term().utf8ToString());
    }

    int iters = atLeast(1000);
    for (int iter = 0; iter < iters; iter++) {
      int ord = random().nextInt(terms.size());
      BytesRef term = new BytesRef(terms.get(ord));
      if (random().nextBoolean()) {
        if (VERBOSE) {
          System.out.println("TEST: iter=" + iter + " seek to ord=" + ord + " of " + terms.size());
        }
        te.seekExact(ord);
      } else {
        if (VERBOSE) {
          System.out.println(
              "TEST: iter="
                  + iter
                  + " seek to term="
                  + terms.get(ord)
                  + " ord="
                  + ord
                  + " of "
                  + terms.size());
        }
        te.seekExact(term);
      }
      assertEquals(ord, te.ord());
      assertEquals(term, te.term());
    }

    r.close();
    w.close();
    dir.close();
  }