/** * If no experiments were specified, inject into statisticsQuery a superset of all experiments for * which stats exist across all attributes * * @param statisticsQuery * @param statisticsStorage */ private static void setQueryExperiments( StatisticsQueryCondition statisticsQuery, StatisticsStorage statisticsStorage) { Set<ExperimentInfo> exps = statisticsQuery.getExperiments(); if (exps .isEmpty()) { // No experiments conditions were specified - assemble a superset of all // experiments for which stats exist across all attributes for (EfAttribute attr : statisticsQuery.getAttributes()) { Map<ExperimentInfo, ConciseSet> expsToStats = getStatisticsForAttribute(statisticsQuery.getStatisticsType(), attr, statisticsStorage); if (expsToStats != null) exps.addAll(expsToStats.keySet()); } statisticsQuery.inExperiments(exps); } }
/** * The core scoring method for statistics queries * * @param statisticsQuery query to be peformed on statisticsStorage * @param statisticsStorage core data for Statistics qeries * @param scoringExps an out parameter. * <p>- If null, experiment counts result of statisticsQuery should be returned. if - If * non-null, it serves as a flag that an optimised statisticsQuery should be performed to just * collect Experiments for which non-zero counts exist for Statistics query. A typical call * scenario in this case is just one efv per statisticsQuery, in which we can both: 1. check * if the efv Attribute itself is a scoring one 2. map this Attribute and Experimeants in * scoringExps to efo terms - via the reverse mapping efv-experiment-> efo term in EfoIndex * (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes()) * @return Multiset of aggregated experiment counts, where the set of scores genes is intersected * across statisticsQuery.getConditions(), and union-ed across attributes within each * condition in statisticsQuery.getConditions(). */ public static Multiset<Integer> scoreQuery( StatisticsQueryCondition statisticsQuery, final StatisticsStorage statisticsStorage, Set<ExperimentInfo> scoringExps) { // gatherScoringExpsOnly -> experiment counts should be calculated for statisticsQuery // !gatherScoringExpsOnly -> scoring experiments should be collected (into scoringExps) only boolean gatherScoringExpsOnly = scoringExps != null; Set<StatisticsQueryOrConditions<StatisticsQueryCondition>> andStatisticsQueryConditions = statisticsQuery.getConditions(); Multiset<Integer> results = null; if (andStatisticsQueryConditions.isEmpty()) { // End of recursion Set<Integer> bioEntityIdRestrictionSet = statisticsQuery.getBioEntityIdRestrictionSet(); Set<EfAttribute> attributes = statisticsQuery.getAttributes(); if (attributes.isEmpty()) { // No attributes were provided - we have to use pre-computed scores across all attributes Multiset<Integer> scoresAcrossAllEfos = statisticsStorage.getScoresAcrossAllEfos(statisticsQuery.getStatisticsType()); results = intersect(scoresAcrossAllEfos, bioEntityIdRestrictionSet); } else { results = HashMultiset.create(); setQueryExperiments(statisticsQuery, statisticsStorage); // For each experiment in the query, traverse through all attributes and add all gene // indexes into one ConciseSet. This way a gene can score // only once for a single experiment - across all OR attributes in this query. Once all // attributes have been traversed for a single experiment, // add ConciseSet to Multiset results for (ExperimentInfo exp : statisticsQuery.getExperiments()) { FastSet statsForExperiment = new FastSet(); for (EfAttribute attr : attributes) { Map<ExperimentInfo, ConciseSet> expsToStats = getStatisticsForAttribute( statisticsQuery.getStatisticsType(), attr, statisticsStorage); if (expsToStats != null) { if (expsToStats.isEmpty()) { log.debug( "Failed to retrieve stats for stat: " + statisticsQuery.getStatisticsType() + " and attr: " + attr); } else { if (expsToStats.get(exp) != null) { if (!gatherScoringExpsOnly) { statsForExperiment.addAll( intersect(expsToStats.get(exp), bioEntityIdRestrictionSet)); } else if (containsAtLeastOne(expsToStats.get(exp), bioEntityIdRestrictionSet)) { // exp contains at least one non-zero score for at least one gene index in // bioEntityIdRestrictionSet -> add it to scoringExps scoringExps.add(exp); } } else { log.debug( "Failed to retrieve stats for stat: " + statisticsQuery.getStatisticsType() + " exp: " + exp.getAccession() + " and attr: " + attr); } } } } if (!gatherScoringExpsOnly) { results.addAll(statsForExperiment); } } } } else { // run over all AND conditions, do "OR" inside (cf. scoreOrStatisticsQueryConditions()) , // "AND"'ing over the whole thing for (StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions : andStatisticsQueryConditions) { // Pass gene restriction set down to orConditions orConditions.setGeneRestrictionSet(statisticsQuery.getBioEntityIdRestrictionSet()); // process OR conditions Multiset<Integer> condGenes = getScoresForOrConditions(orConditions, statisticsStorage, scoringExps); if (results == null) results = condGenes; else { Iterator<Multiset.Entry<Integer>> resultGenes = results.entrySet().iterator(); while (resultGenes.hasNext()) { Multiset.Entry<Integer> entry = resultGenes.next(); if (!condGenes.contains( entry.getElement())) // AND operation between different top query conditions resultGenes.remove(); else // for all gene ids belonging to intersection of all conditions seen so far, we // accumulate experiment counts results.setCount( entry.getElement(), entry.getCount() + condGenes.count(entry.getElement())); } } } } if (results == null) { results = HashMultiset.create(); } return results; }