Пример #1
0
 /**
  * @param scores
  * @param restrictionSet
  * @return intersection of set (Multiset<Integer>) and restrictionSet (if restrictionSet non-null
  *     & non-empty); otherwise return set
  */
 public static Multiset<Integer> intersect(
     final Multiset<Integer> scores, final Set<Integer> restrictionSet) {
   if (restrictionSet != null && !restrictionSet.isEmpty()) {
     int prevSize = scores.size();
     Multiset<Integer> intersection = HashMultiset.create(scores);
     intersection.retainAll(restrictionSet);
     log.debug(
         prevSize != 0
             ? ("Size saving by retainAll = "
                     + (((prevSize - intersection.size()) * 100) / prevSize))
                 + "%"
             : "");
     return intersection;
   }
   return scores;
 }
Пример #2
0
  /**
   * @param orConditions StatisticsQueryOrConditions<StatisticsQueryCondition>
   * @param statisticsStorage
   * @param scoringExps Set of experiments that have at least one non-zero score for
   *     statisticsQuery. This is used retrieving efos to be displayed in heatmap when no query efvs
   *     exist (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes())
   * @return Multiset<Integer> containing experiment counts corresponding to all attributes in each
   *     StatisticsQueryCondition in orConditions
   */
  private static Multiset<Integer> getScoresForOrConditions(
      final StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions,
      StatisticsStorage statisticsStorage,
      Set<ExperimentInfo> scoringExps) {

    Multiset<Integer> scores = HashMultiset.create();
    for (StatisticsQueryCondition orCondition : orConditions.getConditions()) {
      orCondition.setBioEntityIdRestrictionSet(orConditions.getBioEntityIdRestrictionSet());
      scores.addAll(scoreQuery(orCondition, statisticsStorage, scoringExps));
    }

    // Now apply orConditions' min experiments restriction to scores
    Multiset<Integer> qualifyingScores = HashMultiset.create();
    for (Multiset.Entry<Integer> entry : scores.entrySet()) {
      if (entry.getCount() >= orConditions.getMinExperiments()) {
        qualifyingScores.setCount(entry.getElement(), entry.getCount());
      }
    }

    return qualifyingScores;
  }
Пример #3
0
 /**
  * @param statsQuery StatisticsQueryCondition
  * @param statisticsStorage
  * @param scoringExps Set of experiments that have at least one non-zero score for
  *     statisticsQuery. This is used retrieving efos to be displayed in heatmap when no query efvs
  *     exist (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes())
  * @return experiment counts corresponding for statsQuery
  */
 public static Multiset<Integer> getExperimentCounts(
     StatisticsQueryCondition statsQuery,
     StatisticsStorage statisticsStorage,
     Set<ExperimentInfo> scoringExps) {
   long start = System.currentTimeMillis();
   Multiset<Integer> counts =
       StatisticsQueryUtils.scoreQuery(statsQuery, statisticsStorage, scoringExps);
   long dur = System.currentTimeMillis() - start;
   int numOfGenesWithCounts = counts.elementSet().size();
   if (numOfGenesWithCounts > 0) {
     log.debug(
         "StatisticsQuery: "
             + statsQuery.prettyPrint()
             + " ==> result set size: "
             + numOfGenesWithCounts
             + " (duration: "
             + dur
             + " ms)");
   }
   return counts;
 }
Пример #4
0
  /**
   * The core scoring method for statistics queries
   *
   * @param statisticsQuery query to be peformed on statisticsStorage
   * @param statisticsStorage core data for Statistics qeries
   * @param scoringExps an out parameter.
   *     <p>- If null, experiment counts result of statisticsQuery should be returned. if - If
   *     non-null, it serves as a flag that an optimised statisticsQuery should be performed to just
   *     collect Experiments for which non-zero counts exist for Statistics query. A typical call
   *     scenario in this case is just one efv per statisticsQuery, in which we can both: 1. check
   *     if the efv Attribute itself is a scoring one 2. map this Attribute and Experimeants in
   *     scoringExps to efo terms - via the reverse mapping efv-experiment-> efo term in EfoIndex
   *     (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes())
   * @return Multiset of aggregated experiment counts, where the set of scores genes is intersected
   *     across statisticsQuery.getConditions(), and union-ed across attributes within each
   *     condition in statisticsQuery.getConditions().
   */
  public static Multiset<Integer> scoreQuery(
      StatisticsQueryCondition statisticsQuery,
      final StatisticsStorage statisticsStorage,
      Set<ExperimentInfo> scoringExps) {

    // gatherScoringExpsOnly -> experiment counts should be calculated for statisticsQuery
    // !gatherScoringExpsOnly -> scoring experiments should be collected (into scoringExps) only
    boolean gatherScoringExpsOnly = scoringExps != null;
    Set<StatisticsQueryOrConditions<StatisticsQueryCondition>> andStatisticsQueryConditions =
        statisticsQuery.getConditions();

    Multiset<Integer> results = null;

    if (andStatisticsQueryConditions.isEmpty()) { // End of recursion
      Set<Integer> bioEntityIdRestrictionSet = statisticsQuery.getBioEntityIdRestrictionSet();

      Set<EfAttribute> attributes = statisticsQuery.getAttributes();
      if (attributes.isEmpty()) {

        // No attributes were provided - we have to use pre-computed scores across all attributes
        Multiset<Integer> scoresAcrossAllEfos =
            statisticsStorage.getScoresAcrossAllEfos(statisticsQuery.getStatisticsType());
        results = intersect(scoresAcrossAllEfos, bioEntityIdRestrictionSet);
      } else {
        results = HashMultiset.create();
        setQueryExperiments(statisticsQuery, statisticsStorage);

        // For each experiment in the query, traverse through all attributes and add all gene
        // indexes into one ConciseSet. This way a gene can score
        // only once for a single experiment - across all OR attributes in this query. Once all
        // attributes have been traversed for a single experiment,
        // add ConciseSet to Multiset results
        for (ExperimentInfo exp : statisticsQuery.getExperiments()) {
          FastSet statsForExperiment = new FastSet();
          for (EfAttribute attr : attributes) {
            Map<ExperimentInfo, ConciseSet> expsToStats =
                getStatisticsForAttribute(
                    statisticsQuery.getStatisticsType(), attr, statisticsStorage);
            if (expsToStats != null) {
              if (expsToStats.isEmpty()) {
                log.debug(
                    "Failed to retrieve stats for stat: "
                        + statisticsQuery.getStatisticsType()
                        + " and attr: "
                        + attr);
              } else {
                if (expsToStats.get(exp) != null) {
                  if (!gatherScoringExpsOnly) {
                    statsForExperiment.addAll(
                        intersect(expsToStats.get(exp), bioEntityIdRestrictionSet));
                  } else if (containsAtLeastOne(expsToStats.get(exp), bioEntityIdRestrictionSet)) {
                    // exp contains at least one non-zero score for at least one gene index in
                    // bioEntityIdRestrictionSet -> add it to scoringExps
                    scoringExps.add(exp);
                  }
                } else {
                  log.debug(
                      "Failed to retrieve stats for stat: "
                          + statisticsQuery.getStatisticsType()
                          + " exp: "
                          + exp.getAccession()
                          + " and attr: "
                          + attr);
                }
              }
            }
          }
          if (!gatherScoringExpsOnly) {
            results.addAll(statsForExperiment);
          }
        }
      }
    } else {
      // run over all AND conditions, do "OR" inside (cf. scoreOrStatisticsQueryConditions()) ,
      // "AND"'ing over the whole thing
      for (StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions :
          andStatisticsQueryConditions) {

        // Pass gene restriction set down to orConditions
        orConditions.setGeneRestrictionSet(statisticsQuery.getBioEntityIdRestrictionSet());
        // process OR conditions
        Multiset<Integer> condGenes =
            getScoresForOrConditions(orConditions, statisticsStorage, scoringExps);

        if (results == null) results = condGenes;
        else {
          Iterator<Multiset.Entry<Integer>> resultGenes = results.entrySet().iterator();

          while (resultGenes.hasNext()) {
            Multiset.Entry<Integer> entry = resultGenes.next();
            if (!condGenes.contains(
                entry.getElement())) // AND operation between different top query conditions
            resultGenes.remove();
            else
              // for all gene ids belonging to intersection of all conditions seen so far, we
              // accumulate experiment counts
              results.setCount(
                  entry.getElement(), entry.getCount() + condGenes.count(entry.getElement()));
          }
        }
      }
    }

    if (results == null) {
      results = HashMultiset.create();
    }
    return results;
  }