Java FieldType.multiValuedFieldCache Beispiele

Programmiersprache: Java

Namespace / Paketname: org.apache.solr.schema

Klasse / Typ: FieldType

Methode / Funktion: multiValuedFieldCache

Beispiele auf hotexamples.com: 3

Java FieldType.multiValuedFieldCache - 3 Beispiele gefunden. Dies sind die am besten bewerteten Java Beispiele für die org.apache.solr.schema.FieldType.multiValuedFieldCache, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

indexedToReadable(9)

getTypeName(7)

getQueryAnalyzer(6)

toInternal(4)

toObject(4)

readableToIndexed(4)

getAnalyzer(4)

unmarshalSortValue(3)

getNumericType(3)

marshalSortValue(3)

multiValuedFieldCache(3)

getFieldQuery(2)

getClass(2)

getValueSource(2)

createField(2)

init(2)

createFields(2)

isTokenized(2)

toString(1)

toExternal(1)

checkSchemaField(1)

storedToIndexed(1)

isPolyField(1)

getSortField(1)

getPrefixQuery(1)

getIndexAnalyzer(1)

write(1)

Beispiel #1

Datei anzeigen

Datei: FacetField.java Projekt: apache/lucene-solr

  @Override
  public FacetProcessor createFacetProcessor(FacetContext fcontext) {
    SchemaField sf = fcontext.searcher.getSchema().getField(field);
    FieldType ft = sf.getType();
    boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();

    LegacyNumericType ntype = ft.getNumericType();
    // ensure we can support the requested options for numeric faceting:
    if (ntype != null) {
      if (prefix != null) {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST,
            "Doesn't make sense to set facet prefix on a numeric field");
      }
      if (mincount == 0) {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST,
            "Numeric fields do not support facet mincount=0; try indexing as terms");
        // TODO if indexed=true then we could add support
      }
    }

    // TODO auto-pick ENUM/STREAM SOLR-9351 when index asc and DocSet cardinality is *not* much
    // smaller than term cardinality
    if (method == FacetMethod.ENUM) { // at the moment these two are the same
      method = FacetMethod.STREAM;
    }
    if (method == FacetMethod.STREAM
        && sf.indexed()
        && "index".equals(sortVariable)
        && sortDirection == SortDirection.asc) {
      return new FacetFieldProcessorByEnumTermsStream(fcontext, this, sf);
    }

    // TODO if method=UIF and not single-valued numerics then simply choose that now? TODO add
    // FieldType.getDocValuesType()

    if (!multiToken) {
      if (mincount > 0 && prefix == null && (ntype != null || method == FacetMethod.DVHASH)) {
        // TODO can we auto-pick for strings when term cardinality is much greater than DocSet
        // cardinality?
        //   or if we don't know cardinality but DocSet size is very small
        return new FacetFieldProcessorByHashDV(fcontext, this, sf);
      } else if (ntype == null) {
        // single valued string...
        return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
      } else {
        throw new SolrException(
            SolrException.ErrorCode.SERVER_ERROR, "Couldn't pick facet algorithm for field " + sf);
      }
    }

    // multi-valued after this point

    if (sf.hasDocValues() || method == FacetMethod.DV) {
      // single and multi-valued string docValues
      return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
    }

    // Top-level multi-valued field cache (UIF)
    return new FacetFieldProcessorByArrayUIF(fcontext, this, sf);
  }

Beispiel #2

Datei anzeigen

Datei: SimpleFacets.java Projekt: kleopatra999/lucene-solr

  /**
   * Term counts for use in field faceting that resepcts the specified mincount - if mincount is
   * null, the "zeros" param is consulted for the appropriate backcompat default
   *
   * @see FacetParams#FACET_ZEROS
   */
  private NamedList<Integer> getTermCounts(String field, Integer mincount, ParsedParams parsed)
      throws IOException {
    final SolrParams params = parsed.params;
    final DocSet docs = parsed.docs;
    final int threads = parsed.threads;
    int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
    int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
    if (limit == 0) return new NamedList<>();
    if (mincount == null) {
      Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
      // mincount = (zeros!=null && zeros) ? 0 : 1;
      mincount = (zeros != null && !zeros) ? 1 : 0;
      // current default is to include zeros.
    }
    boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
    // default to sorting if there is a limit.
    String sort =
        params.getFieldParam(
            field,
            FacetParams.FACET_SORT,
            limit > 0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
    String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
    String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS);
    boolean ignoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false);

    NamedList<Integer> counts;
    SchemaField sf = searcher.getSchema().getField(field);
    FieldType ft = sf.getType();

    // determine what type of faceting method to use
    final String methodStr = params.getFieldParam(field, FacetParams.FACET_METHOD);
    FacetMethod method = null;
    if (FacetParams.FACET_METHOD_enum.equals(methodStr)) {
      method = FacetMethod.ENUM;
    } else if (FacetParams.FACET_METHOD_fcs.equals(methodStr)) {
      method = FacetMethod.FCS;
    } else if (FacetParams.FACET_METHOD_fc.equals(methodStr)) {
      method = FacetMethod.FC;
    }

    if (method == FacetMethod.ENUM && TrieField.getMainValuePrefix(ft) != null) {
      // enum can't deal with trie fields that index several terms per value
      method = sf.multiValued() ? FacetMethod.FC : FacetMethod.FCS;
    }

    if (method == null && ft instanceof BoolField) {
      // Always use filters for booleans... we know the number of values is very small.
      method = FacetMethod.ENUM;
    }

    final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();

    if (ft.getNumericType() != null && !sf.multiValued()) {
      // the per-segment approach is optimal for numeric field types since there
      // are no global ords to merge and no need to create an expensive
      // top-level reader
      method = FacetMethod.FCS;
    }

    if (method == null) {
      // TODO: default to per-segment or not?
      method = FacetMethod.FC;
    }

    if (method == FacetMethod.FCS && multiToken) {
      // only fc knows how to deal with multi-token fields
      method = FacetMethod.FC;
    }

    if (method == FacetMethod.ENUM && sf.hasDocValues()) {
      // only fc can handle docvalues types
      method = FacetMethod.FC;
    }

    if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
      counts =
          getGroupedCounts(
              searcher,
              docs,
              field,
              multiToken,
              offset,
              limit,
              mincount,
              missing,
              sort,
              prefix,
              contains,
              ignoreCase);
    } else {
      assert method != null;
      switch (method) {
        case ENUM:
          assert TrieField.getMainValuePrefix(ft) == null;
          counts =
              getFacetTermEnumCounts(
                  searcher,
                  docs,
                  field,
                  offset,
                  limit,
                  mincount,
                  missing,
                  sort,
                  prefix,
                  contains,
                  ignoreCase,
                  params);
          break;
        case FCS:
          assert !multiToken;
          if (ft.getNumericType() != null && !sf.multiValued()) {
            // force numeric faceting
            if (prefix != null && !prefix.isEmpty()) {
              throw new SolrException(
                  ErrorCode.BAD_REQUEST,
                  FacetParams.FACET_PREFIX + " is not supported on numeric types");
            }
            if (contains != null && !contains.isEmpty()) {
              throw new SolrException(
                  ErrorCode.BAD_REQUEST,
                  FacetParams.FACET_CONTAINS + " is not supported on numeric types");
            }
            counts =
                NumericFacets.getCounts(
                    searcher, docs, field, offset, limit, mincount, missing, sort);
          } else {
            PerSegmentSingleValuedFaceting ps =
                new PerSegmentSingleValuedFaceting(
                    searcher,
                    docs,
                    field,
                    offset,
                    limit,
                    mincount,
                    missing,
                    sort,
                    prefix,
                    contains,
                    ignoreCase);
            Executor executor = threads == 0 ? directExecutor : facetExecutor;
            ps.setNumThreads(threads);
            counts = ps.getFacetCounts(executor);
          }
          break;
        case FC:
          counts =
              DocValuesFacets.getCounts(
                  searcher,
                  docs,
                  field,
                  offset,
                  limit,
                  mincount,
                  missing,
                  sort,
                  prefix,
                  contains,
                  ignoreCase);
          break;
        default:
          throw new AssertionError();
      }
    }

    return counts;
  }

Beispiel #3

Datei anzeigen

Datei: DocValuesFacets.java Projekt: fullstorydev/lucene-solr

  public static NamedList<Integer> getCounts(
      SolrIndexSearcher searcher,
      DocSet docs,
      String fieldName,
      int offset,
      int limit,
      int mincount,
      boolean missing,
      String sort,
      String prefix,
      String contains,
      boolean ignoreCase)
      throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    NamedList<Integer> res = new NamedList<>();

    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();

    final SortedSetDocValues si; // for term lookups only
    OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
    if (multiValued) {
      si = searcher.getLeafReader().getSortedSetDocValues(fieldName);
      if (si instanceof MultiSortedSetDocValues) {
        ordinalMap = ((MultiSortedSetDocValues) si).mapping;
      }
    } else {
      SortedDocValues single = searcher.getLeafReader().getSortedDocValues(fieldName);
      si = single == null ? null : DocValues.singleton(single);
      if (single instanceof MultiSortedDocValues) {
        ordinalMap = ((MultiSortedDocValues) single).mapping;
      }
    }
    if (si == null) {
      return finalize(res, searcher, schemaField, docs, -1, missing);
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
      throw new UnsupportedOperationException(
          "Currently this faceting method is limited to " + Integer.MAX_VALUE + " unique terms");
    }

    final BytesRefBuilder prefixRef;
    if (prefix == null) {
      prefixRef = null;
    } else if (prefix.length() == 0) {
      prefix = null;
      prefixRef = null;
    } else {
      prefixRef = new BytesRefBuilder();
      prefixRef.copyChars(prefix);
    }

    int startTermIndex, endTermIndex;
    if (prefix != null) {
      startTermIndex = (int) si.lookupTerm(prefixRef.get());
      if (startTermIndex < 0) startTermIndex = -startTermIndex - 1;
      prefixRef.append(UnicodeUtil.BIG_TERM);
      endTermIndex = (int) si.lookupTerm(prefixRef.get());
      assert endTermIndex < 0;
      endTermIndex = -endTermIndex - 1;
    } else {
      startTermIndex = -1;
      endTermIndex = (int) si.getValueCount();
    }

    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRefBuilder charsRef = new CharsRefBuilder();
    if (nTerms > 0 && docs.size() >= mincount) {

      // count collection array only needs to be as big as the number of terms we are
      // going to collect counts for.
      final int[] counts = new int[nTerms];

      Filter filter = docs.getTopFilter();
      List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
      for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        LeafReaderContext leaf = leaves.get(subIndex);
        DocIdSet dis =
            filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
        DocIdSetIterator disi = null;
        if (dis != null) {
          disi = dis.iterator();
        }
        if (disi != null) {
          if (multiValued) {
            SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
            if (sub == null) {
              sub = DocValues.emptySortedSet();
            }
            final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
            if (singleton != null) {
              // some codecs may optimize SORTED_SET storage for single-valued fields
              accumSingle(counts, startTermIndex, singleton, disi, subIndex, ordinalMap);
            } else {
              accumMulti(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
            }
          } else {
            SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
            if (sub == null) {
              sub = DocValues.emptySorted();
            }
            accumSingle(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
          }
        }
      }

      if (startTermIndex == -1) {
        missingCount = counts[0];
      }

      // IDEA: we could also maintain a count of "other"... everything that fell outside
      // of the top 'N'

      int off = offset;
      int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

      if (sort.equals(FacetParams.FACET_SORT_COUNT)
          || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
        int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
        maxsize = Math.min(maxsize, nTerms);
        LongPriorityQueue queue =
            new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);

        int min = mincount - 1; // the smallest value in the top 'N' values
        for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
          int c = counts[i];
          if (contains != null) {
            final BytesRef term = si.lookupOrd(startTermIndex + i);
            if (!SimpleFacets.contains(term.utf8ToString(), contains, ignoreCase)) {
              continue;
            }
          }
          if (c > min) {
            // NOTE: we use c>min rather than c>=min as an optimization because we are going in
            // index order, so we already know that the keys are ordered.  This can be very
            // important if a lot of the counts are repeated (like zero counts would be).

            // smaller term numbers sort higher, so subtract the term number instead
            long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
            boolean displaced = queue.insert(pair);
            if (displaced) min = (int) (queue.top() >>> 32);
          }
        }

        // if we are deep paging, we don't have to order the highest "offset" counts.
        int collectCount = Math.max(0, queue.size() - off);
        assert collectCount <= lim;

        // the start and end indexes of our list "sorted" (starting with the highest value)
        int sortedIdxStart = queue.size() - (collectCount - 1);
        int sortedIdxEnd = queue.size() + 1;
        final long[] sorted = queue.sort(collectCount);

        for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
          long pair = sorted[i];
          int c = (int) (pair >>> 32);
          int tnum = Integer.MAX_VALUE - (int) pair;
          final BytesRef term = si.lookupOrd(startTermIndex + tnum);
          ft.indexedToReadable(term, charsRef);
          res.add(charsRef.toString(), c);
        }

      } else {
        // add results in index order
        int i = (startTermIndex == -1) ? 1 : 0;
        if (mincount <= 0 && contains == null) {
          // if mincount<=0 and we're not examining the values for contains, then
          // we won't discard any terms and we know exactly where to start.
          i += off;
          off = 0;
        }

        for (; i < nTerms; i++) {
          int c = counts[i];
          if (c < mincount) continue;
          BytesRef term = null;
          if (contains != null) {
            term = si.lookupOrd(startTermIndex + i);
            if (!SimpleFacets.contains(term.utf8ToString(), contains, ignoreCase)) {
              continue;
            }
          }
          if (--off >= 0) continue;
          if (--lim < 0) break;
          if (term == null) {
            term = si.lookupOrd(startTermIndex + i);
          }
          ft.indexedToReadable(term, charsRef);
          res.add(charsRef.toString(), c);
        }
      }
    }

    return finalize(res, searcher, schemaField, docs, missingCount, missing);
  }