private void alternateField( NamedList docSummaries, SolrParams params, Document doc, String fieldName) { String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD); if (alternateField != null && alternateField.length() > 0) { String[] altTexts = doc.getValues(alternateField); if (altTexts != null && altTexts.length > 0) { int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0); if (alternateFieldLen <= 0) { docSummaries.add(fieldName, altTexts); } else { List<String> altList = new ArrayList<String>(); int len = 0; for (String altText : altTexts) { altList.add( len + altText.length() > alternateFieldLen ? new String(altText.substring(0, alternateFieldLen - len)) : altText); len += altText.length(); if (len >= alternateFieldLen) break; } docSummaries.add(fieldName, altList); } } } }
public Fragmenter getFragmenter(String fieldName, SolrParams params) { numRequests++; if (defaults != null) { params = new DefaultSolrParams(params, defaults); } int fragsize = params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, LuceneRegexFragmenter.DEFAULT_FRAGMENT_SIZE); int increment = params.getFieldInt( fieldName, HighlightParams.INCREMENT, LuceneRegexFragmenter.DEFAULT_INCREMENT_GAP); float slop = params.getFieldFloat(fieldName, HighlightParams.SLOP, LuceneRegexFragmenter.DEFAULT_SLOP); int maxchars = params.getFieldInt( fieldName, HighlightParams.MAX_RE_CHARS, LuceneRegexFragmenter.DEFAULT_MAX_ANALYZED_CHARS); String rawpat = params.getFieldParam( fieldName, HighlightParams.PATTERN, LuceneRegexFragmenter.DEFAULT_PATTERN_RAW); Pattern p = rawpat == defaultPatternRaw ? defaultPattern : Pattern.compile(rawpat); if (fragsize <= 0) { return new NullFragmenter(); } return new LuceneRegexFragmenter(fragsize, increment, slop, maxchars, p); }
private PivotFacetField(ResponseBuilder rb, PivotFacetValue parent, String fieldName) { field = fieldName; parentValue = parent; // facet params SolrParams parameters = rb.req.getParams(); facetFieldMinimumCount = parameters.getFieldInt(field, FacetParams.FACET_PIVOT_MINCOUNT, 1); facetFieldOffset = parameters.getFieldInt(field, FacetParams.FACET_OFFSET, 0); facetFieldLimit = parameters.getFieldInt(field, FacetParams.FACET_LIMIT, 100); String defaultSort = (facetFieldLimit > 0) ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX; facetFieldSort = parameters.getFieldParam(field, FacetParams.FACET_SORT, defaultSort); valueCollection = new PivotFacetFieldValueCollection( facetFieldMinimumCount, facetFieldOffset, facetFieldLimit, facetFieldSort); if ((facetFieldLimit < 0) || // TODO: possible refinement issue if limit=0 & mincount=0 & missing=true // (ie: we only want the missing count for this field) (facetFieldLimit <= 0 && facetFieldMinimumCount == 0) || (facetFieldSort.equals(FacetParams.FACET_SORT_INDEX) && facetFieldMinimumCount <= 0)) { // in any of these cases, there's no need to refine this level of the pivot needRefinementAtThisLevel = false; } }
private void alternateField( NamedList docSummaries, SolrParams params, Document doc, String fieldName) { String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD); if (alternateField != null && alternateField.length() > 0) { IndexableField[] docFields = doc.getFields(alternateField); List<String> listFields = new ArrayList<String>(); for (IndexableField field : docFields) { if (field.binaryValue() == null) listFields.add(field.stringValue()); } String[] altTexts = listFields.toArray(new String[listFields.size()]); if (altTexts != null && altTexts.length > 0) { Encoder encoder = getEncoder(fieldName, params); int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0); List<String> altList = new ArrayList<String>(); int len = 0; for (String altText : altTexts) { if (alternateFieldLen <= 0) { altList.add(encoder.encodeText(altText)); } else { altList.add( len + altText.length() > alternateFieldLen ? encoder.encodeText(new String(altText.substring(0, alternateFieldLen - len))) : encoder.encodeText(altText)); len += altText.length(); if (len >= alternateFieldLen) break; } } docSummaries.add(fieldName, altList); } } }
private static void formatCountsAndAddToNL( String fieldKey, ResponseBuilder rb, SolrParams params, int columns, int rows, int[] counts, NamedList<Object> result) { final String format = params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_FORMAT, FORMAT_INTS2D); final Object countsVal; switch (format) { case FORMAT_INTS2D: // A List of List of Integers. Good for small heatmaps and ease of // consumption countsVal = counts != null ? asInts2D(columns, rows, counts) : null; break; case FORMAT_PNG: // A PNG graphic; compressed. Good for large & dense heatmaps; hard to // consume. countsVal = counts != null ? asPngBytes(columns, rows, counts, rb) : null; break; // TODO case skipList: //A sequence of values; negative values are actually how many 0's to // insert. // Good for small or large but sparse heatmaps. // TODO auto choose png or skipList; use skipList when < ~25% full or <= ~512 cells // remember to augment error list below when we add more formats. default: throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "format should be " + FORMAT_INTS2D + " or " + FORMAT_PNG); } result.add("counts_" + format, countsVal); }
/** * Return a {@link org.apache.lucene.search.highlight.Fragmenter} appropriate for this field. If a * fragmenter has not been configured for this field, fall back to the configured default or the * solr default ({@link org.apache.lucene.search.highlight.GapFragmenter}). * * @param fieldName The name of the field * @param params The params controlling Highlighting * @return An appropriate {@link org.apache.lucene.search.highlight.Fragmenter}. */ protected Fragmenter getFragmenter(String fieldName, SolrParams params) { String fmt = params.getFieldParam(fieldName, HighlightParams.FRAGMENTER); SolrFragmenter frag = fragmenters.get(fmt); if (frag == null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmenter: " + fmt); } return frag.getFragmenter(fieldName, params); }
/** * Return an {@link org.apache.lucene.search.highlight.Encoder} appropriate for this field. If an * encoder has not been configured for this field, fall back to the configured default or the solr * default ({@link org.apache.lucene.search.highlight.DefaultEncoder}). * * @param fieldName The name of the field * @param params The params controlling Highlighting * @return An appropriate {@link org.apache.lucene.search.highlight.Encoder}. */ protected Encoder getEncoder(String fieldName, SolrParams params) { String str = params.getFieldParam(fieldName, HighlightParams.ENCODER); SolrEncoder encoder = encoders.get(str); if (encoder == null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown encoder: " + str); } return encoder.getEncoder(fieldName, params); }
/** * Return a {@link org.apache.lucene.search.highlight.Formatter} appropriate for this field. If a * formatter has not been configured for this field, fall back to the configured default or the * solr default ({@link org.apache.lucene.search.highlight.SimpleHTMLFormatter}). * * @param fieldName The name of the field * @param params The params controlling Highlighting * @return An appropriate {@link org.apache.lucene.search.highlight.Formatter}. */ protected Formatter getFormatter(String fieldName, SolrParams params) { String str = params.getFieldParam(fieldName, HighlightParams.FORMATTER); SolrFormatter formatter = formatters.get(str); if (formatter == null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown formatter: " + str); } return formatter.getFormatter(fieldName, params); }
private SolrFragmentsBuilder getSolrFragmentsBuilder(String fieldName, SolrParams params) { String fb = params.getFieldParam(fieldName, HighlightParams.FRAGMENTS_BUILDER); SolrFragmentsBuilder solrFb = fragmentsBuilders.get(fb); if (solrFb == null) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmentsBuilder: " + fb); } return solrFb; }
protected FragListBuilder getFragListBuilder(String fieldName, SolrParams params) { String flb = params.getFieldParam(fieldName, HighlightParams.FRAG_LIST_BUILDER); SolrFragListBuilder solrFlb = fragListBuilders.get(flb); if (solrFlb == null) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragListBuilder: " + flb); } return solrFlb.getFragListBuilder(params); }
private BoundaryScanner getBoundaryScanner(String fieldName, SolrParams params) { String bs = params.getFieldParam(fieldName, HighlightParams.BOUNDARY_SCANNER); SolrBoundaryScanner solrBs = boundaryScanners.get(bs); if (solrBs == null) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown boundaryScanner: " + bs); } return solrBs.getBoundaryScanner(fieldName, params); }
@Override protected BoundaryScanner get(String fieldName, SolrParams params) { // construct Locale String language = params.getFieldParam(fieldName, HighlightParams.BS_LANGUAGE); String country = params.getFieldParam(fieldName, HighlightParams.BS_COUNTRY); if (country != null && language == null) { throw new SolrException( ErrorCode.BAD_REQUEST, HighlightParams.BS_LANGUAGE + " parameter cannot be null when you specify " + HighlightParams.BS_COUNTRY); } Locale locale = null; if (language != null) { locale = country == null ? new Locale(language) : new Locale(language, country); } else { locale = Locale.ROOT; } // construct BreakIterator String type = params.getFieldParam(fieldName, HighlightParams.BS_TYPE, "WORD").toLowerCase(Locale.ROOT); BreakIterator bi = null; if (type.equals("character")) { bi = BreakIterator.getCharacterInstance(locale); } else if (type.equals("word")) { bi = BreakIterator.getWordInstance(locale); } else if (type.equals("line")) { bi = BreakIterator.getLineInstance(locale); } else if (type.equals("sentence")) { bi = BreakIterator.getSentenceInstance(locale); } else throw new SolrException( ErrorCode.BAD_REQUEST, type + " is invalid for parameter " + HighlightParams.BS_TYPE); return new org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner(bi); }
/** create the FieldAdders that control how each field is indexed */ void prepareFields() { // Possible future optimization: for really rapid incremental indexing // from a POST, one could cache all of this setup info based on the params. // The link from FieldAdder to this would need to be severed for that to happen. adders = new CSVLoaderBase.FieldAdder[fieldnames.length]; String skipStr = params.get(SKIP); List<String> skipFields = skipStr == null ? null : StrUtils.splitSmart(skipStr, ','); CSVLoaderBase.FieldAdder adder = new CSVLoaderBase.FieldAdder(); CSVLoaderBase.FieldAdder adderKeepEmpty = new CSVLoaderBase.FieldAdderEmpty(); for (int i = 0; i < fieldnames.length; i++) { String fname = fieldnames[i]; // to skip a field, leave the entries in fields and addrs null if (fname.length() == 0 || (skipFields != null && skipFields.contains(fname))) continue; boolean keepEmpty = params.getFieldBool(fname, EMPTY, false); adders[i] = keepEmpty ? adderKeepEmpty : adder; // Order that operations are applied: split -> trim -> map -> add // so create in reverse order. // Creation of FieldAdders could be optimized and shared among fields String[] fmap = params.getFieldParams(fname, MAP); if (fmap != null) { for (String mapRule : fmap) { String[] mapArgs = colonSplit.split(mapRule, -1); if (mapArgs.length != 2) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Map rules must be of the form 'from:to' ,got '" + mapRule + "'"); adders[i] = new CSVLoaderBase.FieldMapperSingle(mapArgs[0], mapArgs[1], adders[i]); } } if (params.getFieldBool(fname, TRIM, false)) { adders[i] = new CSVLoaderBase.FieldTrimmer(adders[i]); } if (params.getFieldBool(fname, SPLIT, false)) { String sepStr = params.getFieldParam(fname, SEPARATOR); char fsep = sepStr == null || sepStr.length() == 0 ? ',' : sepStr.charAt(0); String encStr = params.getFieldParam(fname, ENCAPSULATOR); char fenc = encStr == null || encStr.length() == 0 ? (char) -2 : encStr.charAt(0); String escStr = params.getFieldParam(fname, ESCAPE); char fesc = escStr == null || escStr.length() == 0 ? CSVStrategy.ESCAPE_DISABLED : escStr.charAt(0); CSVStrategy fstrat = new CSVStrategy( fsep, fenc, CSVStrategy.COMMENTS_DISABLED, fesc, false, false, false, false); adders[i] = new CSVLoaderBase.FieldSplitter(fstrat, adders[i]); } } // look for any literal fields - literal.foo=xyzzy Iterator<String> paramNames = params.getParameterNamesIterator(); while (paramNames.hasNext()) { String pname = paramNames.next(); if (!pname.startsWith(LITERALS_PREFIX)) continue; String name = pname.substring(LITERALS_PREFIX.length()); literals.put(name, params.get(pname)); } }
/** * Term counts for use in field faceting that resepcts the specified mincount - if mincount is * null, the "zeros" param is consulted for the appropriate backcompat default * * @see FacetParams#FACET_ZEROS */ private NamedList<Integer> getTermCounts(String field, Integer mincount, ParsedParams parsed) throws IOException { final SolrParams params = parsed.params; final DocSet docs = parsed.docs; final int threads = parsed.threads; int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0); int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100); if (limit == 0) return new NamedList<>(); if (mincount == null) { Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS); // mincount = (zeros!=null && zeros) ? 0 : 1; mincount = (zeros != null && !zeros) ? 1 : 0; // current default is to include zeros. } boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false); // default to sorting if there is a limit. String sort = params.getFieldParam( field, FacetParams.FACET_SORT, limit > 0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX); String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX); String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS); boolean ignoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false); NamedList<Integer> counts; SchemaField sf = searcher.getSchema().getField(field); FieldType ft = sf.getType(); // determine what type of faceting method to use final String methodStr = params.getFieldParam(field, FacetParams.FACET_METHOD); FacetMethod method = null; if (FacetParams.FACET_METHOD_enum.equals(methodStr)) { method = FacetMethod.ENUM; } else if (FacetParams.FACET_METHOD_fcs.equals(methodStr)) { method = FacetMethod.FCS; } else if (FacetParams.FACET_METHOD_fc.equals(methodStr)) { method = FacetMethod.FC; } if (method == FacetMethod.ENUM && TrieField.getMainValuePrefix(ft) != null) { // enum can't deal with trie fields that index several terms per value method = sf.multiValued() ? FacetMethod.FC : FacetMethod.FCS; } if (method == null && ft instanceof BoolField) { // Always use filters for booleans... we know the number of values is very small. method = FacetMethod.ENUM; } final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache(); if (ft.getNumericType() != null && !sf.multiValued()) { // the per-segment approach is optimal for numeric field types since there // are no global ords to merge and no need to create an expensive // top-level reader method = FacetMethod.FCS; } if (method == null) { // TODO: default to per-segment or not? method = FacetMethod.FC; } if (method == FacetMethod.FCS && multiToken) { // only fc knows how to deal with multi-token fields method = FacetMethod.FC; } if (method == FacetMethod.ENUM && sf.hasDocValues()) { // only fc can handle docvalues types method = FacetMethod.FC; } if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) { counts = getGroupedCounts( searcher, docs, field, multiToken, offset, limit, mincount, missing, sort, prefix, contains, ignoreCase); } else { assert method != null; switch (method) { case ENUM: assert TrieField.getMainValuePrefix(ft) == null; counts = getFacetTermEnumCounts( searcher, docs, field, offset, limit, mincount, missing, sort, prefix, contains, ignoreCase, params); break; case FCS: assert !multiToken; if (ft.getNumericType() != null && !sf.multiValued()) { // force numeric faceting if (prefix != null && !prefix.isEmpty()) { throw new SolrException( ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types"); } if (contains != null && !contains.isEmpty()) { throw new SolrException( ErrorCode.BAD_REQUEST, FacetParams.FACET_CONTAINS + " is not supported on numeric types"); } counts = NumericFacets.getCounts( searcher, docs, field, offset, limit, mincount, missing, sort); } else { PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting( searcher, docs, field, offset, limit, mincount, missing, sort, prefix, contains, ignoreCase); Executor executor = threads == 0 ? directExecutor : facetExecutor; ps.setNumThreads(threads); counts = ps.getFacetCounts(executor); } break; case FC: counts = DocValuesFacets.getCounts( searcher, docs, field, offset, limit, mincount, missing, sort, prefix, contains, ignoreCase); break; default: throw new AssertionError(); } } return counts; }
/** Called by {@link org.apache.solr.request.SimpleFacets} to compute heatmap facets. */ public static NamedList<Object> getHeatmapForField( String fieldKey, String fieldName, ResponseBuilder rb, SolrParams params, DocSet docSet) throws IOException { // get the strategy from the field type final SchemaField schemaField = rb.req.getSchema().getField(fieldName); final FieldType type = schemaField.getType(); final PrefixTreeStrategy strategy; final DistanceUnits distanceUnits; // note: the two instanceof conditions is not ideal, versus one. If we start needing to add more // then refactor. if ((type instanceof AbstractSpatialPrefixTreeFieldType)) { AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type; strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName); distanceUnits = rptType.getDistanceUnits(); } else if (type instanceof RptWithGeometrySpatialField) { RptWithGeometrySpatialField rptSdvType = (RptWithGeometrySpatialField) type; strategy = rptSdvType.getStrategy(fieldName).getIndexStrategy(); distanceUnits = rptSdvType.getDistanceUnits(); } else { // FYI we support the term query one too but few people use that one throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "heatmap field needs to be of type " + SpatialRecursivePrefixTreeFieldType.class + " or " + RptWithGeometrySpatialField.class); } final SpatialContext ctx = strategy.getSpatialContext(); // get the bbox (query Rectangle) String geomStr = params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_GEOM); final Shape boundsShape = geomStr == null ? ctx.getWorldBounds() : SpatialUtils.parseGeomSolrException(geomStr, ctx); // get the grid level (possibly indirectly via distErr or distErrPct) final int gridLevel; Integer gridLevelObj = params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_LEVEL); final int maxGridLevel = strategy.getGrid().getMaxLevels(); if (gridLevelObj != null) { gridLevel = gridLevelObj; if (gridLevel <= 0 || gridLevel > maxGridLevel) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, FacetParams.FACET_HEATMAP_LEVEL + " should be > 0 and <= " + maxGridLevel); } } else { // SpatialArgs has utility methods to resolve a 'distErr' from optionally set distErr & // distErrPct. Arguably that // should be refactored to feel less weird than using it like this. SpatialArgs spatialArgs = new SpatialArgs( SpatialOperation.Intersects /*ignored*/, boundsShape == null ? ctx.getWorldBounds() : boundsShape); final Double distErrObj = params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR); if (distErrObj != null) { // convert distErr units based on configured units spatialArgs.setDistErr(distErrObj * distanceUnits.multiplierFromThisUnitToDegrees()); } spatialArgs.setDistErrPct( params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR_PCT)); double distErr = spatialArgs.resolveDistErr(ctx, DEFAULT_DIST_ERR_PCT); if (distErr <= 0) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, FacetParams.FACET_HEATMAP_DIST_ERR_PCT + " or " + FacetParams.FACET_HEATMAP_DIST_ERR + " should be > 0 or instead provide " + FacetParams.FACET_HEATMAP_LEVEL + "=" + maxGridLevel + " if you insist on maximum detail"); } // The SPT (grid) can lookup a grid level satisfying an error distance constraint gridLevel = strategy.getGrid().getLevelForDistance(distErr); } // Compute! final HeatmapFacetCounter.Heatmap heatmap; try { heatmap = HeatmapFacetCounter.calcFacets( strategy, rb.req.getSearcher().getTopReaderContext(), docSet.getTopFilter(), boundsShape, gridLevel, params.getFieldInt( fieldKey, FacetParams.FACET_HEATMAP_MAX_CELLS, 100_000) // will throw if exceeded ); } catch (IllegalArgumentException e) { // e.g. too many cells throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.toString(), e); } // Populate response NamedList<Object> result = new NamedList<>(); result.add("gridLevel", gridLevel); result.add("columns", heatmap.columns); result.add("rows", heatmap.rows); result.add("minX", heatmap.region.getMinX()); result.add("maxX", heatmap.region.getMaxX()); result.add("minY", heatmap.region.getMinY()); result.add("maxY", heatmap.region.getMaxY()); boolean hasNonZero = false; for (int count : heatmap.counts) { if (count > 0) { hasNonZero = true; break; } } formatCountsAndAddToNL( fieldKey, rb, params, heatmap.columns, heatmap.rows, hasNonZero ? heatmap.counts : null, result); return result; }