public void testCopyFieldsAndFieldBoostsAndDocBoosts() throws Exception {
    SolrCore core = h.getCore();
    IndexSchema schema = core.getLatestSchema();
    SolrInputDocument doc = new SolrInputDocument();

    final float DOC_BOOST = 3.0F;
    doc.setDocumentBoost(DOC_BOOST);
    doc.addField("id", "42");

    SolrInputField inTitle = new SolrInputField("title");
    inTitle.addValue("titleA", 2.0F);
    inTitle.addValue("titleB", 7.0F);
    final float TITLE_BOOST = 2.0F * 7.0F;
    assertEquals(TITLE_BOOST, inTitle.getBoost(), 0.0F);
    doc.put(inTitle.getName(), inTitle);

    SolrInputField inFoo = new SolrInputField("foo_t");
    inFoo.addValue("summer time", 1.0F);
    inFoo.addValue("in the city", 5.0F);
    inFoo.addValue("living is easy", 11.0F);
    final float FOO_BOOST = 1.0F * 5.0F * 11.0F;
    assertEquals(FOO_BOOST, inFoo.getBoost(), 0.0F);
    doc.put(inFoo.getName(), inFoo);

    Document out = DocumentBuilder.toDocument(doc, schema);

    IndexableField[] outTitle = out.getFields(inTitle.getName());
    assertEquals("wrong number of title values", 2, outTitle.length);

    IndexableField[] outNoNorms = out.getFields("title_stringNoNorms");
    assertEquals("wrong number of nonorms values", 2, outNoNorms.length);

    IndexableField[] outFoo = out.getFields(inFoo.getName());
    assertEquals("wrong number of foo values", 3, outFoo.length);

    IndexableField[] outText = out.getFields("text");
    assertEquals("wrong number of text values", 5, outText.length);

    // since Lucene no longer has native document boosts, we should find
    // the doc boost multiplied into the boost on the first field value
    // of each field.  All other field values should be 1.0f
    // (lucene will multiply all of the field value boosts later)
    assertEquals(TITLE_BOOST * DOC_BOOST, outTitle[0].boost(), 0.0F);
    assertEquals(1.0F, outTitle[1].boost(), 0.0F);
    //
    assertEquals(FOO_BOOST * DOC_BOOST, outFoo[0].boost(), 0.0F);
    assertEquals(1.0F, outFoo[1].boost(), 0.0F);
    assertEquals(1.0F, outFoo[2].boost(), 0.0F);
    //
    assertEquals(TITLE_BOOST * DOC_BOOST, outText[0].boost(), 0.0F);
    assertEquals(1.0F, outText[1].boost(), 0.0F);
    assertEquals(FOO_BOOST, outText[2].boost(), 0.0F);
    assertEquals(1.0F, outText[3].boost(), 0.0F);
    assertEquals(1.0F, outText[4].boost(), 0.0F);

    // copyField dest with no norms should not have recieved any boost
    assertEquals(1.0F, outNoNorms[0].boost(), 0.0F);
    assertEquals(1.0F, outNoNorms[1].boost(), 0.0F);

    // now index that SolrInputDocument to check the computed norms

    assertU(adoc(doc));
    assertU(commit());

    SolrQueryRequest req = req("q", "id:42");
    try {
      // very hack-ish

      SolrQueryResponse rsp = new SolrQueryResponse();
      core.execute(core.getRequestHandler(req.getParams().get(CommonParams.QT)), req, rsp);

      DocList dl = ((ResultContext) rsp.getValues().get("response")).docs;
      assertTrue("can't find the doc we just added", 1 == dl.size());
      int docid = dl.iterator().nextDoc();

      SolrIndexSearcher searcher = req.getSearcher();
      AtomicReader reader =
          SlowCompositeReaderWrapper.wrap(searcher.getTopReaderContext().reader());

      assertTrue(
          "similarity doesn't extend DefaultSimilarity, "
              + "config or defaults have changed since test was written",
          searcher.getSimilarity() instanceof DefaultSimilarity);

      DefaultSimilarity sim = (DefaultSimilarity) searcher.getSimilarity();

      NumericDocValues titleNorms = reader.getNormValues("title");
      NumericDocValues fooNorms = reader.getNormValues("foo_t");
      NumericDocValues textNorms = reader.getNormValues("text");

      assertEquals(expectedNorm(sim, 2, TITLE_BOOST * DOC_BOOST), titleNorms.get(docid));

      assertEquals(expectedNorm(sim, 8 - 3, FOO_BOOST * DOC_BOOST), fooNorms.get(docid));

      assertEquals(
          expectedNorm(sim, 2 + 8 - 3, TITLE_BOOST * FOO_BOOST * DOC_BOOST), textNorms.get(docid));

    } finally {
      req.close();
    }
  }
Пример #2
0
  public static NamedList<Integer> getCounts(
      SolrIndexSearcher searcher,
      DocSet docs,
      String fieldName,
      int offset,
      int limit,
      int mincount,
      boolean missing,
      String sort,
      String prefix)
      throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    NamedList<Integer> res = new NamedList<Integer>();

    final SortedSetDocValues si; // for term lookups only
    OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
    if (schemaField.multiValued()) {
      si = searcher.getAtomicReader().getSortedSetDocValues(fieldName);
      if (si instanceof MultiSortedSetDocValues) {
        ordinalMap = ((MultiSortedSetDocValues) si).mapping;
      }
    } else {
      SortedDocValues single = searcher.getAtomicReader().getSortedDocValues(fieldName);
      si = single == null ? null : new SingletonSortedSetDocValues(single);
      if (single instanceof MultiSortedDocValues) {
        ordinalMap = ((MultiSortedDocValues) single).mapping;
      }
    }
    if (si == null) {
      return finalize(res, searcher, schemaField, docs, -1, missing);
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
      throw new UnsupportedOperationException(
          "Currently this faceting method is limited to " + Integer.MAX_VALUE + " unique terms");
    }

    final BytesRef br = new BytesRef();

    final BytesRef prefixRef;
    if (prefix == null) {
      prefixRef = null;
    } else if (prefix.length() == 0) {
      prefix = null;
      prefixRef = null;
    } else {
      prefixRef = new BytesRef(prefix);
    }

    int startTermIndex, endTermIndex;
    if (prefix != null) {
      startTermIndex = (int) si.lookupTerm(prefixRef);
      if (startTermIndex < 0) startTermIndex = -startTermIndex - 1;
      prefixRef.append(UnicodeUtil.BIG_TERM);
      endTermIndex = (int) si.lookupTerm(prefixRef);
      assert endTermIndex < 0;
      endTermIndex = -endTermIndex - 1;
    } else {
      startTermIndex = -1;
      endTermIndex = (int) si.getValueCount();
    }

    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRef charsRef = new CharsRef(10);
    if (nTerms > 0 && docs.size() >= mincount) {

      // count collection array only needs to be as big as the number of terms we are
      // going to collect counts for.
      final int[] counts = new int[nTerms];

      Filter filter = docs.getTopFilter();
      List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();
      for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        AtomicReaderContext leaf = leaves.get(subIndex);
        DocIdSet dis =
            filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
        DocIdSetIterator disi = null;
        if (dis != null) {
          disi = dis.iterator();
        }
        if (disi != null) {
          if (schemaField.multiValued()) {
            SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
            if (sub == null) {
              sub = SortedSetDocValues.EMPTY;
            }
            if (sub instanceof SingletonSortedSetDocValues) {
              // some codecs may optimize SORTED_SET storage for single-valued fields
              final SortedDocValues values =
                  ((SingletonSortedSetDocValues) sub).getSortedDocValues();
              accumSingle(counts, startTermIndex, values, disi, subIndex, ordinalMap);
            } else {
              accumMulti(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
            }
          } else {
            SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
            if (sub == null) {
              sub = SortedDocValues.EMPTY;
            }
            accumSingle(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
          }
        }
      }

      if (startTermIndex == -1) {
        missingCount = counts[0];
      }

      // IDEA: we could also maintain a count of "other"... everything that fell outside
      // of the top 'N'

      int off = offset;
      int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

      if (sort.equals(FacetParams.FACET_SORT_COUNT)
          || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
        int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
        maxsize = Math.min(maxsize, nTerms);
        LongPriorityQueue queue =
            new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);

        int min = mincount - 1; // the smallest value in the top 'N' values
        for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
          int c = counts[i];
          if (c > min) {
            // NOTE: we use c>min rather than c>=min as an optimization because we are going in
            // index order, so we already know that the keys are ordered.  This can be very
            // important if a lot of the counts are repeated (like zero counts would be).

            // smaller term numbers sort higher, so subtract the term number instead
            long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
            boolean displaced = queue.insert(pair);
            if (displaced) min = (int) (queue.top() >>> 32);
          }
        }

        // if we are deep paging, we don't have to order the highest "offset" counts.
        int collectCount = Math.max(0, queue.size() - off);
        assert collectCount <= lim;

        // the start and end indexes of our list "sorted" (starting with the highest value)
        int sortedIdxStart = queue.size() - (collectCount - 1);
        int sortedIdxEnd = queue.size() + 1;
        final long[] sorted = queue.sort(collectCount);

        for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
          long pair = sorted[i];
          int c = (int) (pair >>> 32);
          int tnum = Integer.MAX_VALUE - (int) pair;
          si.lookupOrd(startTermIndex + tnum, br);
          ft.indexedToReadable(br, charsRef);
          res.add(charsRef.toString(), c);
        }

      } else {
        // add results in index order
        int i = (startTermIndex == -1) ? 1 : 0;
        if (mincount <= 0) {
          // if mincount<=0, then we won't discard any terms and we know exactly
          // where to start.
          i += off;
          off = 0;
        }

        for (; i < nTerms; i++) {
          int c = counts[i];
          if (c < mincount || --off >= 0) continue;
          if (--lim < 0) break;
          si.lookupOrd(startTermIndex + i, br);
          ft.indexedToReadable(br, charsRef);
          res.add(charsRef.toString(), c);
        }
      }
    }

    return finalize(res, searcher, schemaField, docs, missingCount, missing);
  }