Пример #1
0
 /**
  * @param statType
  * @param statisticsStorage
  * @return Multiset<Integer> containing experiment counts across all efo attributes
  */
 public static Multiset<Integer> getScoresAcrossAllEfos(
     final StatisticsType statType, final StatisticsStorage statisticsStorage) {
   List<Attribute> efoAttrs = new ArrayList<Attribute>();
   for (String efo : statisticsStorage.getEfos()) {
     efoAttrs.add(new EfoAttribute(efo));
   }
   StatisticsQueryCondition statsQuery = new StatisticsQueryCondition(statType);
   statsQuery.and(getStatisticsOrQuery(efoAttrs, statType, 1, statisticsStorage));
   return getExperimentCounts(statsQuery, statisticsStorage, null);
 }
Пример #2
0
 /**
  * If no experiments were specified, inject into statisticsQuery a superset of all experiments for
  * which stats exist across all attributes
  *
  * @param statisticsQuery
  * @param statisticsStorage
  */
 private static void setQueryExperiments(
     StatisticsQueryCondition statisticsQuery, StatisticsStorage statisticsStorage) {
   Set<ExperimentInfo> exps = statisticsQuery.getExperiments();
   if (exps
       .isEmpty()) { // No experiments conditions were specified - assemble a superset of all
                     // experiments for which stats exist across all attributes
     for (EfAttribute attr : statisticsQuery.getAttributes()) {
       Map<ExperimentInfo, ConciseSet> expsToStats =
           getStatisticsForAttribute(statisticsQuery.getStatisticsType(), attr, statisticsStorage);
       if (expsToStats != null) exps.addAll(expsToStats.keySet());
     }
     statisticsQuery.inExperiments(exps);
   }
 }
Пример #3
0
  /**
   * @param orConditions StatisticsQueryOrConditions<StatisticsQueryCondition>
   * @param statisticsStorage
   * @param scoringExps Set of experiments that have at least one non-zero score for
   *     statisticsQuery. This is used retrieving efos to be displayed in heatmap when no query efvs
   *     exist (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes())
   * @return Multiset<Integer> containing experiment counts corresponding to all attributes in each
   *     StatisticsQueryCondition in orConditions
   */
  private static Multiset<Integer> getScoresForOrConditions(
      final StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions,
      StatisticsStorage statisticsStorage,
      Set<ExperimentInfo> scoringExps) {

    Multiset<Integer> scores = HashMultiset.create();
    for (StatisticsQueryCondition orCondition : orConditions.getConditions()) {
      orCondition.setBioEntityIdRestrictionSet(orConditions.getBioEntityIdRestrictionSet());
      scores.addAll(scoreQuery(orCondition, statisticsStorage, scoringExps));
    }

    // Now apply orConditions' min experiments restriction to scores
    Multiset<Integer> qualifyingScores = HashMultiset.create();
    for (Multiset.Entry<Integer> entry : scores.entrySet()) {
      if (entry.getCount() >= orConditions.getMinExperiments()) {
        qualifyingScores.setCount(entry.getElement(), entry.getCount());
      }
    }

    return qualifyingScores;
  }
Пример #4
0
  /**
   * @param orAttributes
   * @param statType
   * @param minExperiments minimum experiment count restriction for this clause
   * @param statisticsStorage - used to retrieve orAttributes, needed finding experiment counts in
   *     bit index
   * @return StatisticsQueryOrConditions representing orAttributes
   */
  public static StatisticsQueryOrConditions<StatisticsQueryCondition> getStatisticsOrQuery(
      final List<Attribute> orAttributes,
      final StatisticsType statType,
      int minExperiments,
      final StatisticsStorage statisticsStorage) {

    StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions =
        new StatisticsQueryOrConditions<StatisticsQueryCondition>();

    orConditions.setMinExperiments(minExperiments);

    // LinkedHashMap used to maintain ordering of processing of experiments in multi-Attribute,
    // multi-Experiment bit index queries to
    // retrieve sorted lists of experiments to be plotted on the gene page.
    Map<ExperimentInfo, Set<EfAttribute>> allExpsToAttrs =
        new LinkedHashMap<ExperimentInfo, Set<EfAttribute>>();

    for (Attribute attr : orAttributes) {
      attr.getAttributeToExperimentMappings(statisticsStorage, allExpsToAttrs);
    }

    // Now process allExpsToAttrs - for all efo terms in orAttributes, grouping into one
    // StatisticsQueryCondition
    // attributes from potentially different efoTerms for one experiment. This has the effect of
    // counting a given
    // experiment only once for an OR collection of Attributes.
    for (Map.Entry<ExperimentInfo, Set<EfAttribute>> expToAttr : allExpsToAttrs.entrySet()) {
      StatisticsQueryCondition cond = new StatisticsQueryCondition(statType);
      if (expToAttr.getKey() != EfAttribute.ALL_EXPERIMENTS)
        // For efv Attributes we span all experiments
        cond.inExperiments(Collections.singletonList(expToAttr.getKey()));
      for (EfAttribute attr : expToAttr.getValue()) {
        cond.inAttribute(attr);
      }
      orConditions.orCondition(cond);
    }

    return orConditions;
  }
Пример #5
0
 /**
  * @param statsQuery StatisticsQueryCondition
  * @param statisticsStorage
  * @param scoringExps Set of experiments that have at least one non-zero score for
  *     statisticsQuery. This is used retrieving efos to be displayed in heatmap when no query efvs
  *     exist (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes())
  * @return experiment counts corresponding for statsQuery
  */
 public static Multiset<Integer> getExperimentCounts(
     StatisticsQueryCondition statsQuery,
     StatisticsStorage statisticsStorage,
     Set<ExperimentInfo> scoringExps) {
   long start = System.currentTimeMillis();
   Multiset<Integer> counts =
       StatisticsQueryUtils.scoreQuery(statsQuery, statisticsStorage, scoringExps);
   long dur = System.currentTimeMillis() - start;
   int numOfGenesWithCounts = counts.elementSet().size();
   if (numOfGenesWithCounts > 0) {
     log.debug(
         "StatisticsQuery: "
             + statsQuery.prettyPrint()
             + " ==> result set size: "
             + numOfGenesWithCounts
             + " (duration: "
             + dur
             + " ms)");
   }
   return counts;
 }
Пример #6
0
  /**
   * The core scoring method for statistics queries
   *
   * @param statisticsQuery query to be peformed on statisticsStorage
   * @param statisticsStorage core data for Statistics qeries
   * @param scoringExps an out parameter.
   *     <p>- If null, experiment counts result of statisticsQuery should be returned. if - If
   *     non-null, it serves as a flag that an optimised statisticsQuery should be performed to just
   *     collect Experiments for which non-zero counts exist for Statistics query. A typical call
   *     scenario in this case is just one efv per statisticsQuery, in which we can both: 1. check
   *     if the efv Attribute itself is a scoring one 2. map this Attribute and Experimeants in
   *     scoringExps to efo terms - via the reverse mapping efv-experiment-> efo term in EfoIndex
   *     (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes())
   * @return Multiset of aggregated experiment counts, where the set of scores genes is intersected
   *     across statisticsQuery.getConditions(), and union-ed across attributes within each
   *     condition in statisticsQuery.getConditions().
   */
  public static Multiset<Integer> scoreQuery(
      StatisticsQueryCondition statisticsQuery,
      final StatisticsStorage statisticsStorage,
      Set<ExperimentInfo> scoringExps) {

    // gatherScoringExpsOnly -> experiment counts should be calculated for statisticsQuery
    // !gatherScoringExpsOnly -> scoring experiments should be collected (into scoringExps) only
    boolean gatherScoringExpsOnly = scoringExps != null;
    Set<StatisticsQueryOrConditions<StatisticsQueryCondition>> andStatisticsQueryConditions =
        statisticsQuery.getConditions();

    Multiset<Integer> results = null;

    if (andStatisticsQueryConditions.isEmpty()) { // End of recursion
      Set<Integer> bioEntityIdRestrictionSet = statisticsQuery.getBioEntityIdRestrictionSet();

      Set<EfAttribute> attributes = statisticsQuery.getAttributes();
      if (attributes.isEmpty()) {

        // No attributes were provided - we have to use pre-computed scores across all attributes
        Multiset<Integer> scoresAcrossAllEfos =
            statisticsStorage.getScoresAcrossAllEfos(statisticsQuery.getStatisticsType());
        results = intersect(scoresAcrossAllEfos, bioEntityIdRestrictionSet);
      } else {
        results = HashMultiset.create();
        setQueryExperiments(statisticsQuery, statisticsStorage);

        // For each experiment in the query, traverse through all attributes and add all gene
        // indexes into one ConciseSet. This way a gene can score
        // only once for a single experiment - across all OR attributes in this query. Once all
        // attributes have been traversed for a single experiment,
        // add ConciseSet to Multiset results
        for (ExperimentInfo exp : statisticsQuery.getExperiments()) {
          FastSet statsForExperiment = new FastSet();
          for (EfAttribute attr : attributes) {
            Map<ExperimentInfo, ConciseSet> expsToStats =
                getStatisticsForAttribute(
                    statisticsQuery.getStatisticsType(), attr, statisticsStorage);
            if (expsToStats != null) {
              if (expsToStats.isEmpty()) {
                log.debug(
                    "Failed to retrieve stats for stat: "
                        + statisticsQuery.getStatisticsType()
                        + " and attr: "
                        + attr);
              } else {
                if (expsToStats.get(exp) != null) {
                  if (!gatherScoringExpsOnly) {
                    statsForExperiment.addAll(
                        intersect(expsToStats.get(exp), bioEntityIdRestrictionSet));
                  } else if (containsAtLeastOne(expsToStats.get(exp), bioEntityIdRestrictionSet)) {
                    // exp contains at least one non-zero score for at least one gene index in
                    // bioEntityIdRestrictionSet -> add it to scoringExps
                    scoringExps.add(exp);
                  }
                } else {
                  log.debug(
                      "Failed to retrieve stats for stat: "
                          + statisticsQuery.getStatisticsType()
                          + " exp: "
                          + exp.getAccession()
                          + " and attr: "
                          + attr);
                }
              }
            }
          }
          if (!gatherScoringExpsOnly) {
            results.addAll(statsForExperiment);
          }
        }
      }
    } else {
      // run over all AND conditions, do "OR" inside (cf. scoreOrStatisticsQueryConditions()) ,
      // "AND"'ing over the whole thing
      for (StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions :
          andStatisticsQueryConditions) {

        // Pass gene restriction set down to orConditions
        orConditions.setGeneRestrictionSet(statisticsQuery.getBioEntityIdRestrictionSet());
        // process OR conditions
        Multiset<Integer> condGenes =
            getScoresForOrConditions(orConditions, statisticsStorage, scoringExps);

        if (results == null) results = condGenes;
        else {
          Iterator<Multiset.Entry<Integer>> resultGenes = results.entrySet().iterator();

          while (resultGenes.hasNext()) {
            Multiset.Entry<Integer> entry = resultGenes.next();
            if (!condGenes.contains(
                entry.getElement())) // AND operation between different top query conditions
            resultGenes.remove();
            else
              // for all gene ids belonging to intersection of all conditions seen so far, we
              // accumulate experiment counts
              results.setCount(
                  entry.getElement(), entry.getCount() + condGenes.count(entry.getElement()));
          }
        }
      }
    }

    if (results == null) {
      results = HashMultiset.create();
    }
    return results;
  }