private void handleFilterExclusions() throws IOException {
    List<String> excludeTags = freq.domain.excludeTags;

    if (excludeTags == null || excludeTags.size() == 0) {
      return;
    }

    // TODO: somehow remove responsebuilder dependency
    ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder();
    Map tagMap = (Map) rb.req.getContext().get("tags");
    if (tagMap == null) {
      // no filters were tagged
      return;
    }

    IdentityHashMap<Query, Boolean> excludeSet = new IdentityHashMap<>();
    for (String excludeTag : excludeTags) {
      Object olst = tagMap.get(excludeTag);
      // tagMap has entries of List<String,List<QParser>>, but subject to change in the future
      if (!(olst instanceof Collection)) continue;
      for (Object o : (Collection<?>) olst) {
        if (!(o instanceof QParser)) continue;
        QParser qp = (QParser) o;
        try {
          excludeSet.put(qp.getQuery(), Boolean.TRUE);
        } catch (SyntaxError syntaxError) {
          // This should not happen since we should only be retrieving a previously parsed query
          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
        }
      }
    }
    if (excludeSet.size() == 0) return;

    List<Query> qlist = new ArrayList<>();

    // add the base query
    if (!excludeSet.containsKey(rb.getQuery())) {
      qlist.add(rb.getQuery());
    }

    // add the filters
    if (rb.getFilters() != null) {
      for (Query q : rb.getFilters()) {
        if (!excludeSet.containsKey(q)) {
          qlist.add(q);
        }
      }
    }

    // now walk back up the context tree
    // TODO: we lose parent exclusions...
    for (FacetContext curr = fcontext; curr != null; curr = curr.parent) {
      if (curr.filter != null) {
        qlist.add(curr.filter);
      }
    }

    // recompute the base domain
    fcontext.base = fcontext.searcher.getDocSet(qlist);
  }
  protected DocSet computeDocSet(DocSet baseDocSet, List<String> excludeTagList)
      throws SyntaxError, IOException {
    Map<?, ?> tagMap = (Map<?, ?>) req.getContext().get("tags");
    // rb can be null if facets are being calculated from a RequestHandler e.g. MoreLikeThisHandler
    if (tagMap == null || rb == null) {
      return baseDocSet;
    }

    IdentityHashMap<Query, Boolean> excludeSet = new IdentityHashMap<>();
    for (String excludeTag : excludeTagList) {
      Object olst = tagMap.get(excludeTag);
      // tagMap has entries of List<String,List<QParser>>, but subject to change in the future
      if (!(olst instanceof Collection)) continue;
      for (Object o : (Collection<?>) olst) {
        if (!(o instanceof QParser)) continue;
        QParser qp = (QParser) o;
        excludeSet.put(qp.getQuery(), Boolean.TRUE);
      }
    }
    if (excludeSet.size() == 0) return baseDocSet;

    List<Query> qlist = new ArrayList<>();

    // add the base query
    if (!excludeSet.containsKey(rb.getQuery())) {
      qlist.add(rb.getQuery());
    }

    // add the filters
    if (rb.getFilters() != null) {
      for (Query q : rb.getFilters()) {
        if (!excludeSet.containsKey(q)) {
          qlist.add(q);
        }
      }
    }

    // get the new base docset for this facet
    DocSet base = searcher.getDocSet(qlist);
    if (rb.grouping() && rb.getGroupingSpec().isTruncateGroups()) {
      Grouping grouping = new Grouping(searcher, null, rb.getQueryCommand(), false, 0, false);
      grouping.setWithinGroupSort(rb.getGroupingSpec().getSortWithinGroup());
      if (rb.getGroupingSpec().getFields().length > 0) {
        grouping.addFieldCommand(rb.getGroupingSpec().getFields()[0], req);
      } else if (rb.getGroupingSpec().getFunctions().length > 0) {
        grouping.addFunctionCommand(rb.getGroupingSpec().getFunctions()[0], req);
      } else {
        return base;
      }
      AbstractAllGroupHeadsCollector allGroupHeadsCollector =
          grouping.getCommands().get(0).createAllGroupCollector();
      searcher.search(base.getTopFilter(), allGroupHeadsCollector);
      return new BitDocSet(allGroupHeadsCollector.retrieveGroupHeads(searcher.maxDoc()));
    } else {
      return base;
    }
  }
 @Override
 public void finishStage(ResponseBuilder rb) {
   SolrParams params = rb.req.getParams();
   if (!params.getBool(COMPONENT_NAME, false)
       || !params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false)) {
     return;
   }
   if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
     SearchClusteringEngine engine = getSearchClusteringEngine(rb);
     if (engine != null) {
       SolrDocumentList solrDocList = (SolrDocumentList) rb.rsp.getValues().get("response");
       // TODO: Currently, docIds is set to null in distributed environment.
       // This causes CarrotParams.PRODUCE_SUMMARY doesn't work.
       // To work CarrotParams.PRODUCE_SUMMARY under distributed mode, we can choose either one of:
       // (a) In each shard, ClusteringComponent produces summary and finishStage()
       //     merges these summaries.
       // (b) Adding doHighlighting(SolrDocumentList, ...) method to SolrHighlighter and
       //     making SolrHighlighter uses "external text" rather than stored values to produce
       // snippets.
       Map<SolrDocument, Integer> docIds = null;
       Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
       rb.rsp.add("clusters", clusters);
     } else {
       String name = getClusteringEngineName(rb);
       log.warn("No engine for: " + name);
     }
   }
 }
  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
      return;
    }
    String name = getClusteringEngineName(rb);
    boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false);
    if (useResults == true) {
      SearchClusteringEngine engine = getSearchClusteringEngine(rb);
      if (engine != null) {
        DocListAndSet results = rb.getResults();
        Map<SolrDocument, Integer> docIds = Maps.newHashMapWithExpectedSize(results.docList.size());
        SolrDocumentList solrDocList =
            SolrPluginUtils.docListToSolrDocumentList(
                results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
        Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
        rb.rsp.add("clusters", clusters);
      } else {
        log.warn("No engine for: " + name);
      }
    }
    boolean useCollection = params.getBool(ClusteringParams.USE_COLLECTION, false);
    if (useCollection == true) {
      DocumentClusteringEngine engine = documentClusteringEngines.get(name);
      if (engine != null) {
        boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false);
        NamedList<?> nl = null;

        // TODO: This likely needs to be made into a background task that runs in an executor
        if (useDocSet == true) {
          nl = engine.cluster(rb.getResults().docSet, params);
        } else {
          nl = engine.cluster(params);
        }
        rb.rsp.add("clusters", nl);
      } else {
        log.warn("No engine for " + name);
      }
    }
  }
Example #5
0
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
      return;
    }
    String name = params.get(ClusteringParams.ENGINE_NAME, ClusteringEngine.DEFAULT_ENGINE_NAME);
    boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false);
    if (useResults == true) {
      SearchClusteringEngine engine = searchClusteringEngines.get(name);
      if (engine != null) {
        DocListAndSet results = rb.getResults();
        Object clusters = engine.cluster(rb.getQuery(), results.docList, rb.req);
        rb.rsp.add("clusters", clusters);
      } else {
        log.warn("No engine for: " + name);
      }
    }
    boolean useCollection = params.getBool(ClusteringParams.USE_COLLECTION, false);
    if (useCollection == true) {
      DocumentClusteringEngine engine = documentClusteringEngines.get(name);
      if (engine != null) {
        boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false);
        NamedList nl = null;

        // TODO: This likely needs to be made into a background task that runs in an executor
        if (useDocSet == true) {
          nl = engine.cluster(rb.getResults().docSet, params);
        } else {
          nl = engine.cluster(params);
        }
        rb.rsp.add("clusters", nl);
      } else {
        log.warn("No engine for " + name);
      }
    }
  }