Java DocSet.getTopFilter Exemples, org.apache.solr.search.DocSet.getTopFilter Java Exemples

Exemple #1

0

Afficher le fichier

Fichier : SimpleFacets.java Projet : kleopatra999/lucene-solr

  protected DocSet computeDocSet(DocSet baseDocSet, List<String> excludeTagList)
      throws SyntaxError, IOException {
    Map<?, ?> tagMap = (Map<?, ?>) req.getContext().get("tags");
    // rb can be null if facets are being calculated from a RequestHandler e.g. MoreLikeThisHandler
    if (tagMap == null || rb == null) {
      return baseDocSet;
    }

    IdentityHashMap<Query, Boolean> excludeSet = new IdentityHashMap<>();
    for (String excludeTag : excludeTagList) {
      Object olst = tagMap.get(excludeTag);
      // tagMap has entries of List<String,List<QParser>>, but subject to change in the future
      if (!(olst instanceof Collection)) continue;
      for (Object o : (Collection<?>) olst) {
        if (!(o instanceof QParser)) continue;
        QParser qp = (QParser) o;
        excludeSet.put(qp.getQuery(), Boolean.TRUE);
      }
    }
    if (excludeSet.size() == 0) return baseDocSet;

    List<Query> qlist = new ArrayList<>();

    // add the base query
    if (!excludeSet.containsKey(rb.getQuery())) {
      qlist.add(rb.getQuery());
    }

    // add the filters
    if (rb.getFilters() != null) {
      for (Query q : rb.getFilters()) {
        if (!excludeSet.containsKey(q)) {
          qlist.add(q);
        }
      }
    }

    // get the new base docset for this facet
    DocSet base = searcher.getDocSet(qlist);
    if (rb.grouping() && rb.getGroupingSpec().isTruncateGroups()) {
      Grouping grouping = new Grouping(searcher, null, rb.getQueryCommand(), false, 0, false);
      grouping.setWithinGroupSort(rb.getGroupingSpec().getSortWithinGroup());
      if (rb.getGroupingSpec().getFields().length > 0) {
        grouping.addFieldCommand(rb.getGroupingSpec().getFields()[0], req);
      } else if (rb.getGroupingSpec().getFunctions().length > 0) {
        grouping.addFunctionCommand(rb.getGroupingSpec().getFunctions()[0], req);
      } else {
        return base;
      }
      AbstractAllGroupHeadsCollector allGroupHeadsCollector =
          grouping.getCommands().get(0).createAllGroupCollector();
      searcher.search(base.getTopFilter(), allGroupHeadsCollector);
      return new BitDocSet(allGroupHeadsCollector.retrieveGroupHeads(searcher.maxDoc()));
    } else {
      return base;
    }
  }

Exemple #2

0

Afficher le fichier

Fichier : SimpleFacets.java Projet : kleopatra999/lucene-solr

  /**
   * Returns a grouped facet count for the facet query
   *
   * @see FacetParams#FACET_QUERY
   */
  public int getGroupedFacetQueryCount(Query facetQuery, DocSet docSet) throws IOException {
    // It is okay to retrieve group.field from global because it is never a local param
    String groupField = global.get(GroupParams.GROUP_FIELD);
    if (groupField == null) {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          "Specify the group.field as parameter or local parameter");
    }

    TermAllGroupsCollector collector = new TermAllGroupsCollector(groupField);
    Filter mainQueryFilter =
        docSet.getTopFilter(); // This returns a filter that only matches documents matching with q
    // param and fq params
    Query filteredFacetQuery =
        new BooleanQuery.Builder()
            .add(facetQuery, Occur.MUST)
            .add(mainQueryFilter, Occur.FILTER)
            .build();
    searcher.search(filteredFacetQuery, collector);
    return collector.getGroupCount();
  }

Exemple #3

0

Afficher le fichier

Fichier : SimpleFacets.java Projet : kleopatra999/lucene-solr

  public NamedList<Integer> getGroupedCounts(
      SolrIndexSearcher searcher,
      DocSet base,
      String field,
      boolean multiToken,
      int offset,
      int limit,
      int mincount,
      boolean missing,
      String sort,
      String prefix,
      String contains,
      boolean ignoreCase)
      throws IOException {
    GroupingSpecification groupingSpecification = rb.getGroupingSpec();
    final String groupField =
        groupingSpecification != null ? groupingSpecification.getFields()[0] : null;
    if (groupField == null) {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          "Specify the group.field as parameter or local parameter");
    }

    BytesRef prefixBytesRef = prefix != null ? new BytesRef(prefix) : null;
    final TermGroupFacetCollector collector =
        TermGroupFacetCollector.createTermGroupFacetCollector(
            groupField, field, multiToken, prefixBytesRef, 128);

    SchemaField sf = searcher.getSchema().getFieldOrNull(groupField);

    if (sf != null
        && sf.hasDocValues() == false
        && sf.multiValued() == false
        && sf.getType().getNumericType() != null) {
      // it's a single-valued numeric field: we must currently create insanity :(
      // there isn't a GroupedFacetCollector that works on numerics right now...
      searcher.search(
          base.getTopFilter(),
          new FilterCollector(collector) {
            @Override
            public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
              LeafReader insane = Insanity.wrapInsanity(context.reader(), groupField);
              return in.getLeafCollector(insane.getContext());
            }
          });
    } else {
      searcher.search(base.getTopFilter(), collector);
    }

    boolean orderByCount =
        sort.equals(FacetParams.FACET_SORT_COUNT)
            || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY);
    TermGroupFacetCollector.GroupedFacetResult result =
        collector.mergeSegmentResults(
            limit < 0 ? Integer.MAX_VALUE : (offset + limit), mincount, orderByCount);

    CharsRefBuilder charsRef = new CharsRefBuilder();
    FieldType facetFieldType = searcher.getSchema().getFieldType(field);
    NamedList<Integer> facetCounts = new NamedList<>();
    List<TermGroupFacetCollector.FacetEntry> scopedEntries =
        result.getFacetEntries(offset, limit < 0 ? Integer.MAX_VALUE : limit);
    for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) {
      // :TODO:can we do contains earlier than this to make it more efficient?
      if (contains != null
          && !contains(facetEntry.getValue().utf8ToString(), contains, ignoreCase)) {
        continue;
      }
      facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef);
      facetCounts.add(charsRef.toString(), facetEntry.getCount());
    }

    if (missing) {
      facetCounts.add(null, result.getTotalMissingCount());
    }

    return facetCounts;
  }

Exemple #4

0

Afficher le fichier

Fichier : DocValuesFacets.java Projet : Pin-Wang/lottory-wp

  public static NamedList<Integer> getCounts(
      SolrIndexSearcher searcher,
      DocSet docs,
      String fieldName,
      int offset,
      int limit,
      int mincount,
      boolean missing,
      String sort,
      String prefix)
      throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    NamedList<Integer> res = new NamedList<Integer>();

    final SortedSetDocValues si; // for term lookups only
    OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
    if (schemaField.multiValued()) {
      si = searcher.getAtomicReader().getSortedSetDocValues(fieldName);
      if (si instanceof MultiSortedSetDocValues) {
        ordinalMap = ((MultiSortedSetDocValues) si).mapping;
      }
    } else {
      SortedDocValues single = searcher.getAtomicReader().getSortedDocValues(fieldName);
      si = single == null ? null : new SingletonSortedSetDocValues(single);
      if (single instanceof MultiSortedDocValues) {
        ordinalMap = ((MultiSortedDocValues) single).mapping;
      }
    }
    if (si == null) {
      return finalize(res, searcher, schemaField, docs, -1, missing);
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
      throw new UnsupportedOperationException(
          "Currently this faceting method is limited to " + Integer.MAX_VALUE + " unique terms");
    }

    final BytesRef br = new BytesRef();

    final BytesRef prefixRef;
    if (prefix == null) {
      prefixRef = null;
    } else if (prefix.length() == 0) {
      prefix = null;
      prefixRef = null;
    } else {
      prefixRef = new BytesRef(prefix);
    }

    int startTermIndex, endTermIndex;
    if (prefix != null) {
      startTermIndex = (int) si.lookupTerm(prefixRef);
      if (startTermIndex < 0) startTermIndex = -startTermIndex - 1;
      prefixRef.append(UnicodeUtil.BIG_TERM);
      endTermIndex = (int) si.lookupTerm(prefixRef);
      assert endTermIndex < 0;
      endTermIndex = -endTermIndex - 1;
    } else {
      startTermIndex = -1;
      endTermIndex = (int) si.getValueCount();
    }

    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRef charsRef = new CharsRef(10);
    if (nTerms > 0 && docs.size() >= mincount) {

      // count collection array only needs to be as big as the number of terms we are
      // going to collect counts for.
      final int[] counts = new int[nTerms];

      Filter filter = docs.getTopFilter();
      List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();
      for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        AtomicReaderContext leaf = leaves.get(subIndex);
        DocIdSet dis =
            filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
        DocIdSetIterator disi = null;
        if (dis != null) {
          disi = dis.iterator();
        }
        if (disi != null) {
          if (schemaField.multiValued()) {
            SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
            if (sub == null) {
              sub = SortedSetDocValues.EMPTY;
            }
            if (sub instanceof SingletonSortedSetDocValues) {
              // some codecs may optimize SORTED_SET storage for single-valued fields
              final SortedDocValues values =
                  ((SingletonSortedSetDocValues) sub).getSortedDocValues();
              accumSingle(counts, startTermIndex, values, disi, subIndex, ordinalMap);
            } else {
              accumMulti(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
            }
          } else {
            SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
            if (sub == null) {
              sub = SortedDocValues.EMPTY;
            }
            accumSingle(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
          }
        }
      }

      if (startTermIndex == -1) {
        missingCount = counts[0];
      }

      // IDEA: we could also maintain a count of "other"... everything that fell outside
      // of the top 'N'

      int off = offset;
      int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

      if (sort.equals(FacetParams.FACET_SORT_COUNT)
          || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
        int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
        maxsize = Math.min(maxsize, nTerms);
        LongPriorityQueue queue =
            new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);

        int min = mincount - 1; // the smallest value in the top 'N' values
        for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
          int c = counts[i];
          if (c > min) {
            // NOTE: we use c>min rather than c>=min as an optimization because we are going in
            // index order, so we already know that the keys are ordered.  This can be very
            // important if a lot of the counts are repeated (like zero counts would be).

            // smaller term numbers sort higher, so subtract the term number instead
            long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
            boolean displaced = queue.insert(pair);
            if (displaced) min = (int) (queue.top() >>> 32);
          }
        }

        // if we are deep paging, we don't have to order the highest "offset" counts.
        int collectCount = Math.max(0, queue.size() - off);
        assert collectCount <= lim;

        // the start and end indexes of our list "sorted" (starting with the highest value)
        int sortedIdxStart = queue.size() - (collectCount - 1);
        int sortedIdxEnd = queue.size() + 1;
        final long[] sorted = queue.sort(collectCount);

        for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
          long pair = sorted[i];
          int c = (int) (pair >>> 32);
          int tnum = Integer.MAX_VALUE - (int) pair;
          si.lookupOrd(startTermIndex + tnum, br);
          ft.indexedToReadable(br, charsRef);
          res.add(charsRef.toString(), c);
        }

      } else {
        // add results in index order
        int i = (startTermIndex == -1) ? 1 : 0;
        if (mincount <= 0) {
          // if mincount<=0, then we won't discard any terms and we know exactly
          // where to start.
          i += off;
          off = 0;
        }

        for (; i < nTerms; i++) {
          int c = counts[i];
          if (c < mincount || --off >= 0) continue;
          if (--lim < 0) break;
          si.lookupOrd(startTermIndex + i, br);
          ft.indexedToReadable(br, charsRef);
          res.add(charsRef.toString(), c);
        }
      }
    }

    return finalize(res, searcher, schemaField, docs, missingCount, missing);
  }

Exemple #5

0

Afficher le fichier

Fichier : SpatialHeatmapFacets.java Projet : yida-lxw/solr-5.3.1-web

  /** Called by {@link org.apache.solr.request.SimpleFacets} to compute heatmap facets. */
  public static NamedList<Object> getHeatmapForField(
      String fieldKey, String fieldName, ResponseBuilder rb, SolrParams params, DocSet docSet)
      throws IOException {
    // get the strategy from the field type
    final SchemaField schemaField = rb.req.getSchema().getField(fieldName);
    final FieldType type = schemaField.getType();

    final PrefixTreeStrategy strategy;
    final DistanceUnits distanceUnits;
    // note: the two instanceof conditions is not ideal, versus one. If we start needing to add more
    // then refactor.
    if ((type instanceof AbstractSpatialPrefixTreeFieldType)) {
      AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type;
      strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName);
      distanceUnits = rptType.getDistanceUnits();
    } else if (type instanceof RptWithGeometrySpatialField) {
      RptWithGeometrySpatialField rptSdvType = (RptWithGeometrySpatialField) type;
      strategy = rptSdvType.getStrategy(fieldName).getIndexStrategy();
      distanceUnits = rptSdvType.getDistanceUnits();
    } else {
      // FYI we support the term query one too but few people use that one
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          "heatmap field needs to be of type "
              + SpatialRecursivePrefixTreeFieldType.class
              + " or "
              + RptWithGeometrySpatialField.class);
    }

    final SpatialContext ctx = strategy.getSpatialContext();

    // get the bbox (query Rectangle)
    String geomStr = params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_GEOM);
    final Shape boundsShape =
        geomStr == null ? ctx.getWorldBounds() : SpatialUtils.parseGeomSolrException(geomStr, ctx);

    // get the grid level (possibly indirectly via distErr or distErrPct)
    final int gridLevel;
    Integer gridLevelObj = params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_LEVEL);
    final int maxGridLevel = strategy.getGrid().getMaxLevels();
    if (gridLevelObj != null) {
      gridLevel = gridLevelObj;
      if (gridLevel <= 0 || gridLevel > maxGridLevel) {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST,
            FacetParams.FACET_HEATMAP_LEVEL + " should be > 0 and <= " + maxGridLevel);
      }
    } else {
      // SpatialArgs has utility methods to resolve a 'distErr' from optionally set distErr &
      // distErrPct. Arguably that
      // should be refactored to feel less weird than using it like this.
      SpatialArgs spatialArgs =
          new SpatialArgs(
              SpatialOperation.Intersects /*ignored*/,
              boundsShape == null ? ctx.getWorldBounds() : boundsShape);
      final Double distErrObj = params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR);
      if (distErrObj != null) {
        // convert distErr units based on configured units
        spatialArgs.setDistErr(distErrObj * distanceUnits.multiplierFromThisUnitToDegrees());
      }
      spatialArgs.setDistErrPct(
          params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR_PCT));
      double distErr = spatialArgs.resolveDistErr(ctx, DEFAULT_DIST_ERR_PCT);
      if (distErr <= 0) {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST,
            FacetParams.FACET_HEATMAP_DIST_ERR_PCT
                + " or "
                + FacetParams.FACET_HEATMAP_DIST_ERR
                + " should be > 0 or instead provide "
                + FacetParams.FACET_HEATMAP_LEVEL
                + "="
                + maxGridLevel
                + " if you insist on maximum detail");
      }
      // The SPT (grid) can lookup a grid level satisfying an error distance constraint
      gridLevel = strategy.getGrid().getLevelForDistance(distErr);
    }

    // Compute!
    final HeatmapFacetCounter.Heatmap heatmap;
    try {
      heatmap =
          HeatmapFacetCounter.calcFacets(
              strategy,
              rb.req.getSearcher().getTopReaderContext(),
              docSet.getTopFilter(),
              boundsShape,
              gridLevel,
              params.getFieldInt(
                  fieldKey, FacetParams.FACET_HEATMAP_MAX_CELLS, 100_000) // will throw if exceeded
              );
    } catch (IllegalArgumentException e) { // e.g. too many cells
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.toString(), e);
    }

    // Populate response
    NamedList<Object> result = new NamedList<>();
    result.add("gridLevel", gridLevel);
    result.add("columns", heatmap.columns);
    result.add("rows", heatmap.rows);
    result.add("minX", heatmap.region.getMinX());
    result.add("maxX", heatmap.region.getMaxX());
    result.add("minY", heatmap.region.getMinY());
    result.add("maxY", heatmap.region.getMaxY());

    boolean hasNonZero = false;
    for (int count : heatmap.counts) {
      if (count > 0) {
        hasNonZero = true;
        break;
      }
    }
    formatCountsAndAddToNL(
        fieldKey,
        rb,
        params,
        heatmap.columns,
        heatmap.rows,
        hasNonZero ? heatmap.counts : null,
        result);

    return result;
  }