protected DocSet computeDocSet(DocSet baseDocSet, List<String> excludeTagList) throws SyntaxError, IOException { Map<?, ?> tagMap = (Map<?, ?>) req.getContext().get("tags"); // rb can be null if facets are being calculated from a RequestHandler e.g. MoreLikeThisHandler if (tagMap == null || rb == null) { return baseDocSet; } IdentityHashMap<Query, Boolean> excludeSet = new IdentityHashMap<>(); for (String excludeTag : excludeTagList) { Object olst = tagMap.get(excludeTag); // tagMap has entries of List<String,List<QParser>>, but subject to change in the future if (!(olst instanceof Collection)) continue; for (Object o : (Collection<?>) olst) { if (!(o instanceof QParser)) continue; QParser qp = (QParser) o; excludeSet.put(qp.getQuery(), Boolean.TRUE); } } if (excludeSet.size() == 0) return baseDocSet; List<Query> qlist = new ArrayList<>(); // add the base query if (!excludeSet.containsKey(rb.getQuery())) { qlist.add(rb.getQuery()); } // add the filters if (rb.getFilters() != null) { for (Query q : rb.getFilters()) { if (!excludeSet.containsKey(q)) { qlist.add(q); } } } // get the new base docset for this facet DocSet base = searcher.getDocSet(qlist); if (rb.grouping() && rb.getGroupingSpec().isTruncateGroups()) { Grouping grouping = new Grouping(searcher, null, rb.getQueryCommand(), false, 0, false); grouping.setWithinGroupSort(rb.getGroupingSpec().getSortWithinGroup()); if (rb.getGroupingSpec().getFields().length > 0) { grouping.addFieldCommand(rb.getGroupingSpec().getFields()[0], req); } else if (rb.getGroupingSpec().getFunctions().length > 0) { grouping.addFunctionCommand(rb.getGroupingSpec().getFunctions()[0], req); } else { return base; } AbstractAllGroupHeadsCollector allGroupHeadsCollector = grouping.getCommands().get(0).createAllGroupCollector(); searcher.search(base.getTopFilter(), allGroupHeadsCollector); return new BitDocSet(allGroupHeadsCollector.retrieveGroupHeads(searcher.maxDoc())); } else { return base; } }
/** * Returns a grouped facet count for the facet query * * @see FacetParams#FACET_QUERY */ public int getGroupedFacetQueryCount(Query facetQuery, DocSet docSet) throws IOException { // It is okay to retrieve group.field from global because it is never a local param String groupField = global.get(GroupParams.GROUP_FIELD); if (groupField == null) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Specify the group.field as parameter or local parameter"); } TermAllGroupsCollector collector = new TermAllGroupsCollector(groupField); Filter mainQueryFilter = docSet.getTopFilter(); // This returns a filter that only matches documents matching with q // param and fq params Query filteredFacetQuery = new BooleanQuery.Builder() .add(facetQuery, Occur.MUST) .add(mainQueryFilter, Occur.FILTER) .build(); searcher.search(filteredFacetQuery, collector); return collector.getGroupCount(); }
public NamedList<Integer> getGroupedCounts( SolrIndexSearcher searcher, DocSet base, String field, boolean multiToken, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String contains, boolean ignoreCase) throws IOException { GroupingSpecification groupingSpecification = rb.getGroupingSpec(); final String groupField = groupingSpecification != null ? groupingSpecification.getFields()[0] : null; if (groupField == null) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Specify the group.field as parameter or local parameter"); } BytesRef prefixBytesRef = prefix != null ? new BytesRef(prefix) : null; final TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector( groupField, field, multiToken, prefixBytesRef, 128); SchemaField sf = searcher.getSchema().getFieldOrNull(groupField); if (sf != null && sf.hasDocValues() == false && sf.multiValued() == false && sf.getType().getNumericType() != null) { // it's a single-valued numeric field: we must currently create insanity :( // there isn't a GroupedFacetCollector that works on numerics right now... searcher.search( base.getTopFilter(), new FilterCollector(collector) { @Override public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { LeafReader insane = Insanity.wrapInsanity(context.reader(), groupField); return in.getLeafCollector(insane.getContext()); } }); } else { searcher.search(base.getTopFilter(), collector); } boolean orderByCount = sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY); TermGroupFacetCollector.GroupedFacetResult result = collector.mergeSegmentResults( limit < 0 ? Integer.MAX_VALUE : (offset + limit), mincount, orderByCount); CharsRefBuilder charsRef = new CharsRefBuilder(); FieldType facetFieldType = searcher.getSchema().getFieldType(field); NamedList<Integer> facetCounts = new NamedList<>(); List<TermGroupFacetCollector.FacetEntry> scopedEntries = result.getFacetEntries(offset, limit < 0 ? Integer.MAX_VALUE : limit); for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) { // :TODO:can we do contains earlier than this to make it more efficient? if (contains != null && !contains(facetEntry.getValue().utf8ToString(), contains, ignoreCase)) { continue; } facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef); facetCounts.add(charsRef.toString(), facetEntry.getCount()); } if (missing) { facetCounts.add(null, result.getTotalMissingCount()); } return facetCounts; }
/** Actually run the query */ @Override public void process(ResponseBuilder rb) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; SolrParams params = req.getParams(); if (!params.getBool(COMPONENT_NAME, true)) { return; } SolrIndexSearcher searcher = req.getSearcher(); if (rb.getQueryCommand().getOffset() < 0) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "'start' parameter cannot be negative"); } // -1 as flag if not set. long timeAllowed = (long) params.getInt(CommonParams.TIME_ALLOWED, -1); // Optional: This could also be implemented by the top-level searcher sending // a filter that lists the ids... that would be transparent to // the request handler, but would be more expensive (and would preserve score // too if desired). String ids = params.get(ShardParams.IDS); if (ids != null) { SchemaField idField = req.getSchema().getUniqueKeyField(); List<String> idArr = StrUtils.splitSmart(ids, ",", true); int[] luceneIds = new int[idArr.size()]; int docs = 0; for (int i = 0; i < idArr.size(); i++) { int id = req.getSearcher() .getFirstMatch( new Term(idField.getName(), idField.getType().toInternal(idArr.get(i)))); if (id >= 0) luceneIds[docs++] = id; } DocListAndSet res = new DocListAndSet(); res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0); if (rb.isNeedDocSet()) { List<Query> queries = new ArrayList<Query>(); queries.add(rb.getQuery()); List<Query> filters = rb.getFilters(); if (filters != null) queries.addAll(filters); res.docSet = searcher.getDocSet(queries); } rb.setResults(res); rsp.add("response", rb.getResults().docList); return; } SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand(); cmd.setTimeAllowed(timeAllowed); SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult(); // // grouping / field collapsing // boolean doGroup = params.getBool(GroupParams.GROUP, false); if (doGroup) { try { cmd.groupCommands = new ArrayList<Grouping.Command>(); String[] fields = params.getParams(GroupParams.GROUP_FIELD); String[] funcs = params.getParams(GroupParams.GROUP_FUNC); String[] queries = params.getParams(GroupParams.GROUP_QUERY); String groupSortStr = params.get(GroupParams.GROUP_SORT); Sort groupSort = groupSortStr != null ? QueryParsing.parseSort(groupSortStr, req.getSchema()) : null; int limitDefault = cmd.getLen(); // this is normally from "rows" int docsPerGroupDefault = params.getInt(GroupParams.GROUP_LIMIT, 1); // temporary: implement all group-by-field as group-by-func if (funcs == null) { funcs = fields; } else if (fields != null) { // catenate functions and fields String[] both = new String[fields.length + funcs.length]; System.arraycopy(fields, 0, both, 0, fields.length); System.arraycopy(funcs, 0, both, fields.length, funcs.length); funcs = both; } if (funcs != null) { for (String groupByStr : funcs) { QParser parser = QParser.getParser(groupByStr, "func", rb.req); Query q = parser.getQuery(); Grouping.CommandFunc gc = new Grouping.CommandFunc(); gc.groupSort = groupSort; if (q instanceof FunctionQuery) { gc.groupBy = ((FunctionQuery) q).getValueSource(); } else { gc.groupBy = new QueryValueSource(q, 0.0f); } gc.key = groupByStr; gc.groupLimit = limitDefault; gc.docsPerGroup = docsPerGroupDefault; cmd.groupCommands.add(gc); } } if (cmd.groupCommands.size() == 0) cmd.groupCommands = null; if (cmd.groupCommands != null) { if (rb.doHighlights || rb.isDebug()) { // we need a single list of the returned docs cmd.setFlags(SolrIndexSearcher.GET_DOCLIST); } searcher.search(result, cmd); rb.setResult(result); rsp.add("grouped", result.groupedResults); // TODO: get "hits" a different way to log return; } } catch (ParseException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } } // normal search result searcher.search(result, cmd); rb.setResult(result); rsp.add("response", rb.getResults().docList); rsp.getToLog().add("hits", rb.getResults().docList.matches()); doFieldSortValues(rb, searcher); doPrefetch(rb); }
@SuppressWarnings("unchecked") private static SimpleOrderedMap<Object> getIndexedFieldsInfo( final SolrIndexSearcher searcher, final Set<String> fields, final int numTerms) throws Exception { IndexReader reader = searcher.getReader(); IndexSchema schema = searcher.getSchema(); // Walk the term enum and keep a priority queue for each map in our set Map<String, TopTermQueue> ttinfo = null; if (numTerms > 0) { ttinfo = getTopTerms(reader, fields, numTerms, null); } SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>(); Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL); for (String fieldName : fieldNames) { if (fields != null && !fields.contains(fieldName)) { continue; // if a field is specified, only them } SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>(); SchemaField sfield = schema.getFieldOrNull(fieldName); FieldType ftype = (sfield == null) ? null : sfield.getType(); f.add("type", (ftype == null) ? null : ftype.getTypeName()); f.add("schema", getFieldFlags(sfield)); if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) { f.add("dynamicBase", schema.getDynamicPattern(sfield.getName())); } // If numTerms==0, the call is just asking for a quick field list if (ttinfo != null && sfield != null && sfield.indexed()) { Query q = new TermRangeQuery(fieldName, null, null, false, false); TopDocs top = searcher.search(q, 1); if (top.totalHits > 0) { // Find a document with this field try { Document doc = searcher.doc(top.scoreDocs[0].doc); Fieldable fld = doc.getFieldable(fieldName); if (fld != null) { f.add("index", getFieldFlags(fld)); } else { // it is a non-stored field... f.add("index", "(unstored field)"); } } catch (Exception ex) { log.warn("error reading field: " + fieldName); } } f.add("docs", top.totalHits); TopTermQueue topTerms = ttinfo.get(fieldName); if (topTerms != null) { f.add("distinct", topTerms.distinctTerms); // Include top terms f.add("topTerms", topTerms.toNamedList(searcher.getSchema())); // Add a histogram f.add("histogram", topTerms.histogram.toNamedList()); } } // Add the field finfo.add(fieldName, f); } return finfo; }