Ejemplo n.º 1
0
 /**
  * @param statType
  * @param statisticsStorage
  * @return Multiset<Integer> containing experiment counts across all efo attributes
  */
 public static Multiset<Integer> getScoresAcrossAllEfos(
     final StatisticsType statType, final StatisticsStorage statisticsStorage) {
   List<Attribute> efoAttrs = new ArrayList<Attribute>();
   for (String efo : statisticsStorage.getEfos()) {
     efoAttrs.add(new EfoAttribute(efo));
   }
   StatisticsQueryCondition statsQuery = new StatisticsQueryCondition(statType);
   statsQuery.and(getStatisticsOrQuery(efoAttrs, statType, 1, statisticsStorage));
   return getExperimentCounts(statsQuery, statisticsStorage, null);
 }
Ejemplo n.º 2
0
 /**
  * @param statType
  * @param attribute
  * @param statisticsStorage
  * @return Map: experiment -> bit stats corresponding to statType and attr
  */
 private static Map<ExperimentInfo, ConciseSet> getStatisticsForAttribute(
     final StatisticsType statType,
     final EfAttribute attribute,
     final StatisticsStorage statisticsStorage) {
   Map<ExperimentInfo, ConciseSet> expToBits =
       statisticsStorage.getStatisticsForAttribute(attribute, statType);
   if (expToBits != null) {
     return expToBits;
   }
   return emptyMap();
 }
Ejemplo n.º 3
0
  /**
   * The core scoring method for statistics queries
   *
   * @param statisticsQuery query to be peformed on statisticsStorage
   * @param statisticsStorage core data for Statistics qeries
   * @param scoringExps an out parameter.
   *     <p>- If null, experiment counts result of statisticsQuery should be returned. if - If
   *     non-null, it serves as a flag that an optimised statisticsQuery should be performed to just
   *     collect Experiments for which non-zero counts exist for Statistics query. A typical call
   *     scenario in this case is just one efv per statisticsQuery, in which we can both: 1. check
   *     if the efv Attribute itself is a scoring one 2. map this Attribute and Experimeants in
   *     scoringExps to efo terms - via the reverse mapping efv-experiment-> efo term in EfoIndex
   *     (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes())
   * @return Multiset of aggregated experiment counts, where the set of scores genes is intersected
   *     across statisticsQuery.getConditions(), and union-ed across attributes within each
   *     condition in statisticsQuery.getConditions().
   */
  public static Multiset<Integer> scoreQuery(
      StatisticsQueryCondition statisticsQuery,
      final StatisticsStorage statisticsStorage,
      Set<ExperimentInfo> scoringExps) {

    // gatherScoringExpsOnly -> experiment counts should be calculated for statisticsQuery
    // !gatherScoringExpsOnly -> scoring experiments should be collected (into scoringExps) only
    boolean gatherScoringExpsOnly = scoringExps != null;
    Set<StatisticsQueryOrConditions<StatisticsQueryCondition>> andStatisticsQueryConditions =
        statisticsQuery.getConditions();

    Multiset<Integer> results = null;

    if (andStatisticsQueryConditions.isEmpty()) { // End of recursion
      Set<Integer> bioEntityIdRestrictionSet = statisticsQuery.getBioEntityIdRestrictionSet();

      Set<EfAttribute> attributes = statisticsQuery.getAttributes();
      if (attributes.isEmpty()) {

        // No attributes were provided - we have to use pre-computed scores across all attributes
        Multiset<Integer> scoresAcrossAllEfos =
            statisticsStorage.getScoresAcrossAllEfos(statisticsQuery.getStatisticsType());
        results = intersect(scoresAcrossAllEfos, bioEntityIdRestrictionSet);
      } else {
        results = HashMultiset.create();
        setQueryExperiments(statisticsQuery, statisticsStorage);

        // For each experiment in the query, traverse through all attributes and add all gene
        // indexes into one ConciseSet. This way a gene can score
        // only once for a single experiment - across all OR attributes in this query. Once all
        // attributes have been traversed for a single experiment,
        // add ConciseSet to Multiset results
        for (ExperimentInfo exp : statisticsQuery.getExperiments()) {
          FastSet statsForExperiment = new FastSet();
          for (EfAttribute attr : attributes) {
            Map<ExperimentInfo, ConciseSet> expsToStats =
                getStatisticsForAttribute(
                    statisticsQuery.getStatisticsType(), attr, statisticsStorage);
            if (expsToStats != null) {
              if (expsToStats.isEmpty()) {
                log.debug(
                    "Failed to retrieve stats for stat: "
                        + statisticsQuery.getStatisticsType()
                        + " and attr: "
                        + attr);
              } else {
                if (expsToStats.get(exp) != null) {
                  if (!gatherScoringExpsOnly) {
                    statsForExperiment.addAll(
                        intersect(expsToStats.get(exp), bioEntityIdRestrictionSet));
                  } else if (containsAtLeastOne(expsToStats.get(exp), bioEntityIdRestrictionSet)) {
                    // exp contains at least one non-zero score for at least one gene index in
                    // bioEntityIdRestrictionSet -> add it to scoringExps
                    scoringExps.add(exp);
                  }
                } else {
                  log.debug(
                      "Failed to retrieve stats for stat: "
                          + statisticsQuery.getStatisticsType()
                          + " exp: "
                          + exp.getAccession()
                          + " and attr: "
                          + attr);
                }
              }
            }
          }
          if (!gatherScoringExpsOnly) {
            results.addAll(statsForExperiment);
          }
        }
      }
    } else {
      // run over all AND conditions, do "OR" inside (cf. scoreOrStatisticsQueryConditions()) ,
      // "AND"'ing over the whole thing
      for (StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions :
          andStatisticsQueryConditions) {

        // Pass gene restriction set down to orConditions
        orConditions.setGeneRestrictionSet(statisticsQuery.getBioEntityIdRestrictionSet());
        // process OR conditions
        Multiset<Integer> condGenes =
            getScoresForOrConditions(orConditions, statisticsStorage, scoringExps);

        if (results == null) results = condGenes;
        else {
          Iterator<Multiset.Entry<Integer>> resultGenes = results.entrySet().iterator();

          while (resultGenes.hasNext()) {
            Multiset.Entry<Integer> entry = resultGenes.next();
            if (!condGenes.contains(
                entry.getElement())) // AND operation between different top query conditions
            resultGenes.remove();
            else
              // for all gene ids belonging to intersection of all conditions seen so far, we
              // accumulate experiment counts
              results.setCount(
                  entry.getElement(), entry.getCount() + condGenes.count(entry.getElement()));
          }
        }
      }
    }

    if (results == null) {
      results = HashMultiset.create();
    }
    return results;
  }