Java SolrParams.getFieldInt示例，org.apache.solr.common.params.SolrParams.getFieldInt Java示例

示例#1

0

显示文件

文件： RegexFragmenter.java 项目： ashishlal/echidna_2.0

  public Fragmenter getFragmenter(String fieldName, SolrParams params) {
    numRequests++;
    if (defaults != null) {
      params = new DefaultSolrParams(params, defaults);
    }
    int fragsize =
        params.getFieldInt(
            fieldName, HighlightParams.FRAGSIZE, LuceneRegexFragmenter.DEFAULT_FRAGMENT_SIZE);
    int increment =
        params.getFieldInt(
            fieldName, HighlightParams.INCREMENT, LuceneRegexFragmenter.DEFAULT_INCREMENT_GAP);
    float slop =
        params.getFieldFloat(fieldName, HighlightParams.SLOP, LuceneRegexFragmenter.DEFAULT_SLOP);
    int maxchars =
        params.getFieldInt(
            fieldName,
            HighlightParams.MAX_RE_CHARS,
            LuceneRegexFragmenter.DEFAULT_MAX_ANALYZED_CHARS);
    String rawpat =
        params.getFieldParam(
            fieldName, HighlightParams.PATTERN, LuceneRegexFragmenter.DEFAULT_PATTERN_RAW);

    Pattern p = rawpat == defaultPatternRaw ? defaultPattern : Pattern.compile(rawpat);

    if (fragsize <= 0) {
      return new NullFragmenter();
    }

    return new LuceneRegexFragmenter(fragsize, increment, slop, maxchars, p);
  }

示例#2

0

显示文件

文件： DefaultSolrHighlighter.java 项目： ieure/lucene-solr-snapshot

 private void doHighlightingByFastVectorHighlighter(
     FastVectorHighlighter highlighter,
     FieldQuery fieldQuery,
     SolrQueryRequest req,
     NamedList docSummaries,
     int docId,
     Document doc,
     String fieldName)
     throws IOException {
   SolrParams params = req.getParams();
   SolrFragmentsBuilder solrFb = getSolrFragmentsBuilder(fieldName, params);
   String[] snippets =
       highlighter.getBestFragments(
           fieldQuery,
           req.getSearcher().getIndexReader(),
           docId,
           fieldName,
           params.getFieldInt(fieldName, HighlightParams.FRAGSIZE, 100),
           params.getFieldInt(fieldName, HighlightParams.SNIPPETS, 1),
           getFragListBuilder(fieldName, params),
           getFragmentsBuilder(fieldName, params),
           solrFb.getPreTags(params, fieldName),
           solrFb.getPostTags(params, fieldName),
           getEncoder(fieldName, params));
   if (snippets != null && snippets.length > 0) docSummaries.add(fieldName, snippets);
   else alternateField(docSummaries, params, doc, fieldName);
 }

示例#3

0

显示文件

文件： PivotFacetField.java 项目： sail-umkc/Examples

  private PivotFacetField(ResponseBuilder rb, PivotFacetValue parent, String fieldName) {

    field = fieldName;
    parentValue = parent;

    // facet params
    SolrParams parameters = rb.req.getParams();
    facetFieldMinimumCount = parameters.getFieldInt(field, FacetParams.FACET_PIVOT_MINCOUNT, 1);
    facetFieldOffset = parameters.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
    facetFieldLimit = parameters.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
    String defaultSort =
        (facetFieldLimit > 0) ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX;
    facetFieldSort = parameters.getFieldParam(field, FacetParams.FACET_SORT, defaultSort);

    valueCollection =
        new PivotFacetFieldValueCollection(
            facetFieldMinimumCount, facetFieldOffset, facetFieldLimit, facetFieldSort);

    if ((facetFieldLimit < 0)
        ||
        // TODO: possible refinement issue if limit=0 & mincount=0 & missing=true
        // (ie: we only want the missing count for this field)
        (facetFieldLimit <= 0 && facetFieldMinimumCount == 0)
        || (facetFieldSort.equals(FacetParams.FACET_SORT_INDEX) && facetFieldMinimumCount <= 0)) {
      // in any of these cases, there's no need to refine this level of the pivot
      needRefinementAtThisLevel = false;
    }
  }

示例#4

0

显示文件

文件： DefaultSolrHighlighter.java 项目： ieure/lucene-solr-snapshot

 private void alternateField(
     NamedList docSummaries, SolrParams params, Document doc, String fieldName) {
   String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
   if (alternateField != null && alternateField.length() > 0) {
     String[] altTexts = doc.getValues(alternateField);
     if (altTexts != null && altTexts.length > 0) {
       int alternateFieldLen =
           params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0);
       if (alternateFieldLen <= 0) {
         docSummaries.add(fieldName, altTexts);
       } else {
         List<String> altList = new ArrayList<String>();
         int len = 0;
         for (String altText : altTexts) {
           altList.add(
               len + altText.length() > alternateFieldLen
                   ? new String(altText.substring(0, alternateFieldLen - len))
                   : altText);
           len += altText.length();
           if (len >= alternateFieldLen) break;
         }
         docSummaries.add(fieldName, altList);
       }
     }
   }
 }

示例#5

0

显示文件

文件： DefaultSolrHighlighter.java 项目： netboynb/search-core

  private void alternateField(
      NamedList docSummaries, SolrParams params, Document doc, String fieldName) {
    String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
    if (alternateField != null && alternateField.length() > 0) {
      IndexableField[] docFields = doc.getFields(alternateField);
      List<String> listFields = new ArrayList<String>();
      for (IndexableField field : docFields) {
        if (field.binaryValue() == null) listFields.add(field.stringValue());
      }

      String[] altTexts = listFields.toArray(new String[listFields.size()]);

      if (altTexts != null && altTexts.length > 0) {
        Encoder encoder = getEncoder(fieldName, params);
        int alternateFieldLen =
            params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0);
        List<String> altList = new ArrayList<String>();
        int len = 0;
        for (String altText : altTexts) {
          if (alternateFieldLen <= 0) {
            altList.add(encoder.encodeText(altText));
          } else {
            altList.add(
                len + altText.length() > alternateFieldLen
                    ? encoder.encodeText(new String(altText.substring(0, alternateFieldLen - len)))
                    : encoder.encodeText(altText));
            len += altText.length();
            if (len >= alternateFieldLen) break;
          }
        }
        docSummaries.add(fieldName, altList);
      }
    }
  }

示例#6

0

显示文件

文件： GapFragmenter.java 项目： elpipesalazar/capaon

  public Fragmenter getFragmenter(String fieldName, SolrParams params) {
    numRequests++;
    params = SolrParams.wrapDefaults(params, defaults);

    int fragsize = params.getFieldInt(fieldName, HighlightParams.FRAGSIZE, 100);
    return (fragsize <= 0) ? new NullFragmenter() : new LuceneGapFragmenter(fragsize);
  }

示例#7

0

显示文件

文件： DefaultSolrHighlighter.java 项目： ieure/lucene-solr-snapshot

  private void doHighlightingByHighlighter(
      Query query,
      SolrQueryRequest req,
      NamedList docSummaries,
      int docId,
      Document doc,
      String fieldName)
      throws IOException {
    SolrParams params = req.getParams();
    String[] docTexts = doc.getValues(fieldName);
    // according to Document javadoc, doc.getValues() never returns null. check empty instead of
    // null
    if (docTexts.length == 0) return;

    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    TokenStream tstream = null;
    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

    String[] summaries = null;
    List<TextFragment> frags = new ArrayList<TextFragment>();

    TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
    try {
      TokenStream tvStream =
          TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName);
      if (tvStream != null) {
        tots = new TermOffsetsTokenStream(tvStream);
      }
    } catch (IllegalArgumentException e) {
      // No problem. But we can't use TermOffsets optimization.
    }

    for (int j = 0; j < docTexts.length; j++) {
      if (tots != null) {
        // if we're using TermOffsets optimization, then get the next
        // field value's TokenStream (i.e. get field j's TokenStream) from tots:
        tstream = tots.getMultiValuedTokenStream(docTexts[j].length());
      } else {
        // fall back to analyzer
        tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
      }

      Highlighter highlighter;
      if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
        // TODO: this is not always necessary - eventually we would like to avoid this wrap
        //       when it is not needed.
        tstream = new CachingTokenFilter(tstream);

        // get highlighter
        highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);

        // after highlighter initialization, reset tstream since construction of highlighter already
        // used it
        tstream.reset();
      } else {
        // use "the old way"
        highlighter = getHighlighter(query, fieldName, req);
      }

      int maxCharsToAnalyze =
          params.getFieldInt(
              fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
      if (maxCharsToAnalyze < 0) {
        highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
      } else {
        highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
      }

      try {
        TextFragment[] bestTextFragments =
            highlighter.getBestTextFragments(
                tstream, docTexts[j], mergeContiguousFragments, numFragments);
        for (int k = 0; k < bestTextFragments.length; k++) {
          if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
            frags.add(bestTextFragments[k]);
          }
        }
      } catch (InvalidTokenOffsetsException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
      }
    }
    // sort such that the fragments with the highest score come first
    Collections.sort(
        frags,
        new Comparator<TextFragment>() {
          public int compare(TextFragment arg0, TextFragment arg1) {
            return Math.round(arg1.getScore() - arg0.getScore());
          }
        });

    // convert fragments back into text
    // TODO: we can include score and position information in output as snippet attributes
    if (frags.size() > 0) {
      ArrayList<String> fragTexts = new ArrayList<String>();
      for (TextFragment fragment : frags) {
        if ((fragment != null) && (fragment.getScore() > 0)) {
          fragTexts.add(fragment.toString());
        }
        if (fragTexts.size() >= numFragments) break;
      }
      summaries = fragTexts.toArray(new String[0]);
      if (summaries.length > 0) docSummaries.add(fieldName, summaries);
    }
    // no summeries made, copy text from alternate field
    if (summaries == null || summaries.length == 0) {
      alternateField(docSummaries, params, doc, fieldName);
    }
  }

示例#8

0

显示文件

文件： DefaultSolrHighlighter.java 项目： ieure/lucene-solr-snapshot

 /**
  * Return the max number of snippets for this field. If this has not been configured for this
  * field, fall back to the configured default or the solr default.
  *
  * @param fieldName The name of the field
  * @param params The params controlling Highlighting
  */
 protected int getMaxSnippets(String fieldName, SolrParams params) {
   return params.getFieldInt(fieldName, HighlightParams.SNIPPETS, 1);
 }

示例#9

0

显示文件

文件： SimpleFacets.java 项目： kleopatra999/lucene-solr

  /**
   * Returns a list of terms in the specified field along with the corresponding count of documents
   * in the set that match that constraint. This method uses the FilterCache to get the intersection
   * count between <code>docs</code> and the DocSet for each term in the filter.
   *
   * @see FacetParams#FACET_LIMIT
   * @see FacetParams#FACET_ZEROS
   * @see FacetParams#FACET_MISSING
   */
  public NamedList<Integer> getFacetTermEnumCounts(
      SolrIndexSearcher searcher,
      DocSet docs,
      String field,
      int offset,
      int limit,
      int mincount,
      boolean missing,
      String sort,
      String prefix,
      String contains,
      boolean ignoreCase,
      SolrParams params)
      throws IOException {

    /* :TODO: potential optimization...
     * cache the Terms with the highest docFreq and try them first
     * don't enum if we get our max from them
     */

    // Minimum term docFreq in order to use the filterCache for that term.
    int minDfFilterCache = global.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);

    // make sure we have a set that is fast for random access, if we will use it for that
    DocSet fastForRandomSet = docs;
    if (minDfFilterCache > 0 && docs instanceof SortedIntDocSet) {
      SortedIntDocSet sset = (SortedIntDocSet) docs;
      fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
    }

    IndexSchema schema = searcher.getSchema();
    LeafReader r = searcher.getLeafReader();
    FieldType ft = schema.getFieldType(field);

    boolean sortByCount = sort.equals("count") || sort.equals("true");
    final int maxsize = limit >= 0 ? offset + limit : Integer.MAX_VALUE - 1;
    final BoundedTreeSet<CountPair<BytesRef, Integer>> queue =
        sortByCount ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(maxsize) : null;
    final NamedList<Integer> res = new NamedList<>();

    int min = mincount - 1; // the smallest value in the top 'N' values
    int off = offset;
    int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

    BytesRef prefixTermBytes = null;
    if (prefix != null) {
      String indexedPrefix = ft.toInternal(prefix);
      prefixTermBytes = new BytesRef(indexedPrefix);
    }

    Fields fields = r.fields();
    Terms terms = fields == null ? null : fields.terms(field);
    TermsEnum termsEnum = null;
    SolrIndexSearcher.DocsEnumState deState = null;
    BytesRef term = null;
    if (terms != null) {
      termsEnum = terms.iterator();

      // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for
      // facet.offset when sorting by index order.

      if (prefixTermBytes != null) {
        if (termsEnum.seekCeil(prefixTermBytes) == TermsEnum.SeekStatus.END) {
          termsEnum = null;
        } else {
          term = termsEnum.term();
        }
      } else {
        // position termsEnum on first term
        term = termsEnum.next();
      }
    }

    PostingsEnum postingsEnum = null;
    CharsRefBuilder charsRef = new CharsRefBuilder();

    if (docs.size() >= mincount) {
      while (term != null) {

        if (prefixTermBytes != null && !StringHelper.startsWith(term, prefixTermBytes)) break;

        if (contains == null || contains(term.utf8ToString(), contains, ignoreCase)) {
          int df = termsEnum.docFreq();

          // If we are sorting, we can use df>min (rather than >=) since we
          // are going in index order.  For certain term distributions this can
          // make a large difference (for example, many terms with df=1).
          if (df > 0 && df > min) {
            int c;

            if (df >= minDfFilterCache) {
              // use the filter cache

              if (deState == null) {
                deState = new SolrIndexSearcher.DocsEnumState();
                deState.fieldName = field;
                deState.liveDocs = r.getLiveDocs();
                deState.termsEnum = termsEnum;
                deState.postingsEnum = postingsEnum;
              }

              c = searcher.numDocs(docs, deState);

              postingsEnum = deState.postingsEnum;
            } else {
              // iterate over TermDocs to calculate the intersection

              // TODO: specialize when base docset is a bitset or hash set (skipDocs)?  or does it
              // matter for this?
              // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class
              // impl)
              // TODO: would passing deleted docs lead to better efficiency over checking the
              // fastForRandomSet?
              postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
              c = 0;

              if (postingsEnum instanceof MultiPostingsEnum) {
                MultiPostingsEnum.EnumWithSlice[] subs =
                    ((MultiPostingsEnum) postingsEnum).getSubs();
                int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
                for (int subindex = 0; subindex < numSubs; subindex++) {
                  MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
                  if (sub.postingsEnum == null) continue;
                  int base = sub.slice.start;
                  int docid;
                  while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    if (fastForRandomSet.exists(docid + base)) c++;
                  }
                }
              } else {
                int docid;
                while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                  if (fastForRandomSet.exists(docid)) c++;
                }
              }
            }

            if (sortByCount) {
              if (c > min) {
                BytesRef termCopy = BytesRef.deepCopyOf(term);
                queue.add(new CountPair<>(termCopy, c));
                if (queue.size() >= maxsize) min = queue.last().val;
              }
            } else {
              if (c >= mincount && --off < 0) {
                if (--lim < 0) break;
                ft.indexedToReadable(term, charsRef);
                res.add(charsRef.toString(), c);
              }
            }
          }
        }
        term = termsEnum.next();
      }
    }

    if (sortByCount) {
      for (CountPair<BytesRef, Integer> p : queue) {
        if (--off >= 0) continue;
        if (--lim < 0) break;
        ft.indexedToReadable(p.key, charsRef);
        res.add(charsRef.toString(), p.val);
      }
    }

    if (missing) {
      res.add(null, getFieldMissingCount(searcher, docs, field));
    }

    return res;
  }

示例#10

0

显示文件

文件： SimpleFacets.java 项目： kleopatra999/lucene-solr

  /**
   * Term counts for use in field faceting that resepcts the specified mincount - if mincount is
   * null, the "zeros" param is consulted for the appropriate backcompat default
   *
   * @see FacetParams#FACET_ZEROS
   */
  private NamedList<Integer> getTermCounts(String field, Integer mincount, ParsedParams parsed)
      throws IOException {
    final SolrParams params = parsed.params;
    final DocSet docs = parsed.docs;
    final int threads = parsed.threads;
    int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
    int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
    if (limit == 0) return new NamedList<>();
    if (mincount == null) {
      Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
      // mincount = (zeros!=null && zeros) ? 0 : 1;
      mincount = (zeros != null && !zeros) ? 1 : 0;
      // current default is to include zeros.
    }
    boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
    // default to sorting if there is a limit.
    String sort =
        params.getFieldParam(
            field,
            FacetParams.FACET_SORT,
            limit > 0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
    String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
    String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS);
    boolean ignoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false);

    NamedList<Integer> counts;
    SchemaField sf = searcher.getSchema().getField(field);
    FieldType ft = sf.getType();

    // determine what type of faceting method to use
    final String methodStr = params.getFieldParam(field, FacetParams.FACET_METHOD);
    FacetMethod method = null;
    if (FacetParams.FACET_METHOD_enum.equals(methodStr)) {
      method = FacetMethod.ENUM;
    } else if (FacetParams.FACET_METHOD_fcs.equals(methodStr)) {
      method = FacetMethod.FCS;
    } else if (FacetParams.FACET_METHOD_fc.equals(methodStr)) {
      method = FacetMethod.FC;
    }

    if (method == FacetMethod.ENUM && TrieField.getMainValuePrefix(ft) != null) {
      // enum can't deal with trie fields that index several terms per value
      method = sf.multiValued() ? FacetMethod.FC : FacetMethod.FCS;
    }

    if (method == null && ft instanceof BoolField) {
      // Always use filters for booleans... we know the number of values is very small.
      method = FacetMethod.ENUM;
    }

    final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();

    if (ft.getNumericType() != null && !sf.multiValued()) {
      // the per-segment approach is optimal for numeric field types since there
      // are no global ords to merge and no need to create an expensive
      // top-level reader
      method = FacetMethod.FCS;
    }

    if (method == null) {
      // TODO: default to per-segment or not?
      method = FacetMethod.FC;
    }

    if (method == FacetMethod.FCS && multiToken) {
      // only fc knows how to deal with multi-token fields
      method = FacetMethod.FC;
    }

    if (method == FacetMethod.ENUM && sf.hasDocValues()) {
      // only fc can handle docvalues types
      method = FacetMethod.FC;
    }

    if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
      counts =
          getGroupedCounts(
              searcher,
              docs,
              field,
              multiToken,
              offset,
              limit,
              mincount,
              missing,
              sort,
              prefix,
              contains,
              ignoreCase);
    } else {
      assert method != null;
      switch (method) {
        case ENUM:
          assert TrieField.getMainValuePrefix(ft) == null;
          counts =
              getFacetTermEnumCounts(
                  searcher,
                  docs,
                  field,
                  offset,
                  limit,
                  mincount,
                  missing,
                  sort,
                  prefix,
                  contains,
                  ignoreCase,
                  params);
          break;
        case FCS:
          assert !multiToken;
          if (ft.getNumericType() != null && !sf.multiValued()) {
            // force numeric faceting
            if (prefix != null && !prefix.isEmpty()) {
              throw new SolrException(
                  ErrorCode.BAD_REQUEST,
                  FacetParams.FACET_PREFIX + " is not supported on numeric types");
            }
            if (contains != null && !contains.isEmpty()) {
              throw new SolrException(
                  ErrorCode.BAD_REQUEST,
                  FacetParams.FACET_CONTAINS + " is not supported on numeric types");
            }
            counts =
                NumericFacets.getCounts(
                    searcher, docs, field, offset, limit, mincount, missing, sort);
          } else {
            PerSegmentSingleValuedFaceting ps =
                new PerSegmentSingleValuedFaceting(
                    searcher,
                    docs,
                    field,
                    offset,
                    limit,
                    mincount,
                    missing,
                    sort,
                    prefix,
                    contains,
                    ignoreCase);
            Executor executor = threads == 0 ? directExecutor : facetExecutor;
            ps.setNumThreads(threads);
            counts = ps.getFacetCounts(executor);
          }
          break;
        case FC:
          counts =
              DocValuesFacets.getCounts(
                  searcher,
                  docs,
                  field,
                  offset,
                  limit,
                  mincount,
                  missing,
                  sort,
                  prefix,
                  contains,
                  ignoreCase);
          break;
        default:
          throw new AssertionError();
      }
    }

    return counts;
  }

示例#11

0

显示文件

文件： DefaultSolrHighlighter.java 项目： netboynb/search-core

  private void doHighlightingByHighlighter(
      Query query,
      SolrQueryRequest req,
      NamedList docSummaries,
      int docId,
      Document doc,
      String fieldName)
      throws IOException {
    final SolrIndexSearcher searcher = req.getSearcher();
    final IndexSchema schema = searcher.getSchema();

    // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
    // so we disable them until fixed (see LUCENE-3080)!
    // BEGIN: Hack
    final SchemaField schemaField = schema.getFieldOrNull(fieldName);
    if (schemaField != null
        && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField)
            || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField))) return;
    // END: Hack

    SolrParams params = req.getParams();
    IndexableField[] docFields = doc.getFields(fieldName);
    List<String> listFields = new ArrayList<String>();
    for (IndexableField field : docFields) {
      listFields.add(field.stringValue());
    }

    // preserve order of values in a multiValued list
    boolean preserveMulti = params.getFieldBool(fieldName, HighlightParams.PRESERVE_MULTI, false);

    String[] docTexts = (String[]) listFields.toArray(new String[listFields.size()]);

    // according to Document javadoc, doc.getValues() never returns null. check empty instead of
    // null
    if (docTexts.length == 0) return;

    TokenStream tstream = null;
    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

    String[] summaries = null;
    List<TextFragment> frags = new ArrayList<TextFragment>();

    TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
    TokenStream tvStream =
        TokenSources.getTokenStreamWithOffsets(searcher.getIndexReader(), docId, fieldName);
    if (tvStream != null) {
      tots = new TermOffsetsTokenStream(tvStream);
    }

    for (int j = 0; j < docTexts.length; j++) {
      if (tots != null) {
        // if we're using TermOffsets optimization, then get the next
        // field value's TokenStream (i.e. get field j's TokenStream) from tots:
        tstream = tots.getMultiValuedTokenStream(docTexts[j].length());
      } else {
        // fall back to analyzer
        tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
      }

      int maxCharsToAnalyze =
          params.getFieldInt(
              fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);

      Highlighter highlighter;
      if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
        if (maxCharsToAnalyze < 0) {
          tstream = new CachingTokenFilter(tstream);
        } else {
          tstream = new CachingTokenFilter(new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze));
        }

        // get highlighter
        highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);

        // after highlighter initialization, reset tstream since construction of highlighter already
        // used it
        tstream.reset();
      } else {
        // use "the old way"
        highlighter = getHighlighter(query, fieldName, req);
      }

      if (maxCharsToAnalyze < 0) {
        highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
      } else {
        highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
      }

      try {
        TextFragment[] bestTextFragments =
            highlighter.getBestTextFragments(
                tstream, docTexts[j], mergeContiguousFragments, numFragments);
        for (int k = 0; k < bestTextFragments.length; k++) {
          if (preserveMulti) {
            if (bestTextFragments[k] != null) {
              frags.add(bestTextFragments[k]);
            }
          } else {
            if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
              frags.add(bestTextFragments[k]);
            }
          }
        }
      } catch (InvalidTokenOffsetsException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
      }
    }
    // sort such that the fragments with the highest score come first
    if (!preserveMulti) {
      Collections.sort(
          frags,
          new Comparator<TextFragment>() {
            @Override
            public int compare(TextFragment arg0, TextFragment arg1) {
              return Math.round(arg1.getScore() - arg0.getScore());
            }
          });
    }

    // convert fragments back into text
    // TODO: we can include score and position information in output as snippet attributes
    if (frags.size() > 0) {
      ArrayList<String> fragTexts = new ArrayList<String>();
      for (TextFragment fragment : frags) {
        if (preserveMulti) {
          if (fragment != null) {
            fragTexts.add(fragment.toString());
          }
        } else {
          if ((fragment != null) && (fragment.getScore() > 0)) {
            fragTexts.add(fragment.toString());
          }
        }

        if (fragTexts.size() >= numFragments && !preserveMulti) break;
      }
      summaries = fragTexts.toArray(new String[0]);
      if (summaries.length > 0) docSummaries.add(fieldName, summaries);
    }
    // no summeries made, copy text from alternate field
    if (summaries == null || summaries.length == 0) {
      alternateField(docSummaries, params, doc, fieldName);
    }
  }

示例#12

0

显示文件

文件： SpatialHeatmapFacets.java 项目： yida-lxw/solr-5.3.1-web

  /** Called by {@link org.apache.solr.request.SimpleFacets} to compute heatmap facets. */
  public static NamedList<Object> getHeatmapForField(
      String fieldKey, String fieldName, ResponseBuilder rb, SolrParams params, DocSet docSet)
      throws IOException {
    // get the strategy from the field type
    final SchemaField schemaField = rb.req.getSchema().getField(fieldName);
    final FieldType type = schemaField.getType();

    final PrefixTreeStrategy strategy;
    final DistanceUnits distanceUnits;
    // note: the two instanceof conditions is not ideal, versus one. If we start needing to add more
    // then refactor.
    if ((type instanceof AbstractSpatialPrefixTreeFieldType)) {
      AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type;
      strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName);
      distanceUnits = rptType.getDistanceUnits();
    } else if (type instanceof RptWithGeometrySpatialField) {
      RptWithGeometrySpatialField rptSdvType = (RptWithGeometrySpatialField) type;
      strategy = rptSdvType.getStrategy(fieldName).getIndexStrategy();
      distanceUnits = rptSdvType.getDistanceUnits();
    } else {
      // FYI we support the term query one too but few people use that one
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          "heatmap field needs to be of type "
              + SpatialRecursivePrefixTreeFieldType.class
              + " or "
              + RptWithGeometrySpatialField.class);
    }

    final SpatialContext ctx = strategy.getSpatialContext();

    // get the bbox (query Rectangle)
    String geomStr = params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_GEOM);
    final Shape boundsShape =
        geomStr == null ? ctx.getWorldBounds() : SpatialUtils.parseGeomSolrException(geomStr, ctx);

    // get the grid level (possibly indirectly via distErr or distErrPct)
    final int gridLevel;
    Integer gridLevelObj = params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_LEVEL);
    final int maxGridLevel = strategy.getGrid().getMaxLevels();
    if (gridLevelObj != null) {
      gridLevel = gridLevelObj;
      if (gridLevel <= 0 || gridLevel > maxGridLevel) {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST,
            FacetParams.FACET_HEATMAP_LEVEL + " should be > 0 and <= " + maxGridLevel);
      }
    } else {
      // SpatialArgs has utility methods to resolve a 'distErr' from optionally set distErr &
      // distErrPct. Arguably that
      // should be refactored to feel less weird than using it like this.
      SpatialArgs spatialArgs =
          new SpatialArgs(
              SpatialOperation.Intersects /*ignored*/,
              boundsShape == null ? ctx.getWorldBounds() : boundsShape);
      final Double distErrObj = params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR);
      if (distErrObj != null) {
        // convert distErr units based on configured units
        spatialArgs.setDistErr(distErrObj * distanceUnits.multiplierFromThisUnitToDegrees());
      }
      spatialArgs.setDistErrPct(
          params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR_PCT));
      double distErr = spatialArgs.resolveDistErr(ctx, DEFAULT_DIST_ERR_PCT);
      if (distErr <= 0) {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST,
            FacetParams.FACET_HEATMAP_DIST_ERR_PCT
                + " or "
                + FacetParams.FACET_HEATMAP_DIST_ERR
                + " should be > 0 or instead provide "
                + FacetParams.FACET_HEATMAP_LEVEL
                + "="
                + maxGridLevel
                + " if you insist on maximum detail");
      }
      // The SPT (grid) can lookup a grid level satisfying an error distance constraint
      gridLevel = strategy.getGrid().getLevelForDistance(distErr);
    }

    // Compute!
    final HeatmapFacetCounter.Heatmap heatmap;
    try {
      heatmap =
          HeatmapFacetCounter.calcFacets(
              strategy,
              rb.req.getSearcher().getTopReaderContext(),
              docSet.getTopFilter(),
              boundsShape,
              gridLevel,
              params.getFieldInt(
                  fieldKey, FacetParams.FACET_HEATMAP_MAX_CELLS, 100_000) // will throw if exceeded
              );
    } catch (IllegalArgumentException e) { // e.g. too many cells
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.toString(), e);
    }

    // Populate response
    NamedList<Object> result = new NamedList<>();
    result.add("gridLevel", gridLevel);
    result.add("columns", heatmap.columns);
    result.add("rows", heatmap.rows);
    result.add("minX", heatmap.region.getMinX());
    result.add("maxX", heatmap.region.getMaxX());
    result.add("minY", heatmap.region.getMinY());
    result.add("maxY", heatmap.region.getMaxY());

    boolean hasNonZero = false;
    for (int count : heatmap.counts) {
      if (count > 0) {
        hasNonZero = true;
        break;
      }
    }
    formatCountsAndAddToNL(
        fieldKey,
        rb,
        params,
        heatmap.columns,
        heatmap.rows,
        hasNonZero ? heatmap.counts : null,
        result);

    return result;
  }