コード例 #1
0
 /**
  * @param set
  * @param restrictionSet
  * @return intersection of set (ConciseSet) and restrictionSet (if restrictionSet non-null &
  *     non-empty); otherwise return set
  */
 private static ExtendedSet<Integer> intersect(
     final ExtendedSet<Integer> set, final Set<Integer> restrictionSet) {
   if (restrictionSet != null && !restrictionSet.isEmpty()) {
     int prevSize = set.size();
     FastSet intersection = new FastSet(set);
     intersection.retainAll(restrictionSet);
     log.debug(
         prevSize != 0
             ? ("Size saving by retainAll = "
                     + (((prevSize - intersection.size()) * 100) / prevSize))
                 + "%"
             : "");
     return intersection;
   }
   return set;
 }
コード例 #2
0
  /**
   * The core scoring method for statistics queries
   *
   * @param statisticsQuery query to be peformed on statisticsStorage
   * @param statisticsStorage core data for Statistics qeries
   * @param scoringExps an out parameter.
   *     <p>- If null, experiment counts result of statisticsQuery should be returned. if - If
   *     non-null, it serves as a flag that an optimised statisticsQuery should be performed to just
   *     collect Experiments for which non-zero counts exist for Statistics query. A typical call
   *     scenario in this case is just one efv per statisticsQuery, in which we can both: 1. check
   *     if the efv Attribute itself is a scoring one 2. map this Attribute and Experimeants in
   *     scoringExps to efo terms - via the reverse mapping efv-experiment-> efo term in EfoIndex
   *     (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes())
   * @return Multiset of aggregated experiment counts, where the set of scores genes is intersected
   *     across statisticsQuery.getConditions(), and union-ed across attributes within each
   *     condition in statisticsQuery.getConditions().
   */
  public static Multiset<Integer> scoreQuery(
      StatisticsQueryCondition statisticsQuery,
      final StatisticsStorage statisticsStorage,
      Set<ExperimentInfo> scoringExps) {

    // gatherScoringExpsOnly -> experiment counts should be calculated for statisticsQuery
    // !gatherScoringExpsOnly -> scoring experiments should be collected (into scoringExps) only
    boolean gatherScoringExpsOnly = scoringExps != null;
    Set<StatisticsQueryOrConditions<StatisticsQueryCondition>> andStatisticsQueryConditions =
        statisticsQuery.getConditions();

    Multiset<Integer> results = null;

    if (andStatisticsQueryConditions.isEmpty()) { // End of recursion
      Set<Integer> bioEntityIdRestrictionSet = statisticsQuery.getBioEntityIdRestrictionSet();

      Set<EfAttribute> attributes = statisticsQuery.getAttributes();
      if (attributes.isEmpty()) {

        // No attributes were provided - we have to use pre-computed scores across all attributes
        Multiset<Integer> scoresAcrossAllEfos =
            statisticsStorage.getScoresAcrossAllEfos(statisticsQuery.getStatisticsType());
        results = intersect(scoresAcrossAllEfos, bioEntityIdRestrictionSet);
      } else {
        results = HashMultiset.create();
        setQueryExperiments(statisticsQuery, statisticsStorage);

        // For each experiment in the query, traverse through all attributes and add all gene
        // indexes into one ConciseSet. This way a gene can score
        // only once for a single experiment - across all OR attributes in this query. Once all
        // attributes have been traversed for a single experiment,
        // add ConciseSet to Multiset results
        for (ExperimentInfo exp : statisticsQuery.getExperiments()) {
          FastSet statsForExperiment = new FastSet();
          for (EfAttribute attr : attributes) {
            Map<ExperimentInfo, ConciseSet> expsToStats =
                getStatisticsForAttribute(
                    statisticsQuery.getStatisticsType(), attr, statisticsStorage);
            if (expsToStats != null) {
              if (expsToStats.isEmpty()) {
                log.debug(
                    "Failed to retrieve stats for stat: "
                        + statisticsQuery.getStatisticsType()
                        + " and attr: "
                        + attr);
              } else {
                if (expsToStats.get(exp) != null) {
                  if (!gatherScoringExpsOnly) {
                    statsForExperiment.addAll(
                        intersect(expsToStats.get(exp), bioEntityIdRestrictionSet));
                  } else if (containsAtLeastOne(expsToStats.get(exp), bioEntityIdRestrictionSet)) {
                    // exp contains at least one non-zero score for at least one gene index in
                    // bioEntityIdRestrictionSet -> add it to scoringExps
                    scoringExps.add(exp);
                  }
                } else {
                  log.debug(
                      "Failed to retrieve stats for stat: "
                          + statisticsQuery.getStatisticsType()
                          + " exp: "
                          + exp.getAccession()
                          + " and attr: "
                          + attr);
                }
              }
            }
          }
          if (!gatherScoringExpsOnly) {
            results.addAll(statsForExperiment);
          }
        }
      }
    } else {
      // run over all AND conditions, do "OR" inside (cf. scoreOrStatisticsQueryConditions()) ,
      // "AND"'ing over the whole thing
      for (StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions :
          andStatisticsQueryConditions) {

        // Pass gene restriction set down to orConditions
        orConditions.setGeneRestrictionSet(statisticsQuery.getBioEntityIdRestrictionSet());
        // process OR conditions
        Multiset<Integer> condGenes =
            getScoresForOrConditions(orConditions, statisticsStorage, scoringExps);

        if (results == null) results = condGenes;
        else {
          Iterator<Multiset.Entry<Integer>> resultGenes = results.entrySet().iterator();

          while (resultGenes.hasNext()) {
            Multiset.Entry<Integer> entry = resultGenes.next();
            if (!condGenes.contains(
                entry.getElement())) // AND operation between different top query conditions
            resultGenes.remove();
            else
              // for all gene ids belonging to intersection of all conditions seen so far, we
              // accumulate experiment counts
              results.setCount(
                  entry.getElement(), entry.getCount() + condGenes.count(entry.getElement()));
          }
        }
      }
    }

    if (results == null) {
      results = HashMultiset.create();
    }
    return results;
  }