/** * @param orConditions StatisticsQueryOrConditions<StatisticsQueryCondition> * @param statisticsStorage * @param scoringExps Set of experiments that have at least one non-zero score for * statisticsQuery. This is used retrieving efos to be displayed in heatmap when no query efvs * exist (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes()) * @return Multiset<Integer> containing experiment counts corresponding to all attributes in each * StatisticsQueryCondition in orConditions */ private static Multiset<Integer> getScoresForOrConditions( final StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions, StatisticsStorage statisticsStorage, Set<ExperimentInfo> scoringExps) { Multiset<Integer> scores = HashMultiset.create(); for (StatisticsQueryCondition orCondition : orConditions.getConditions()) { orCondition.setBioEntityIdRestrictionSet(orConditions.getBioEntityIdRestrictionSet()); scores.addAll(scoreQuery(orCondition, statisticsStorage, scoringExps)); } // Now apply orConditions' min experiments restriction to scores Multiset<Integer> qualifyingScores = HashMultiset.create(); for (Multiset.Entry<Integer> entry : scores.entrySet()) { if (entry.getCount() >= orConditions.getMinExperiments()) { qualifyingScores.setCount(entry.getElement(), entry.getCount()); } } return qualifyingScores; }
/** * @param orAttributes * @param statType * @param minExperiments minimum experiment count restriction for this clause * @param statisticsStorage - used to retrieve orAttributes, needed finding experiment counts in * bit index * @return StatisticsQueryOrConditions representing orAttributes */ public static StatisticsQueryOrConditions<StatisticsQueryCondition> getStatisticsOrQuery( final List<Attribute> orAttributes, final StatisticsType statType, int minExperiments, final StatisticsStorage statisticsStorage) { StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions = new StatisticsQueryOrConditions<StatisticsQueryCondition>(); orConditions.setMinExperiments(minExperiments); // LinkedHashMap used to maintain ordering of processing of experiments in multi-Attribute, // multi-Experiment bit index queries to // retrieve sorted lists of experiments to be plotted on the gene page. Map<ExperimentInfo, Set<EfAttribute>> allExpsToAttrs = new LinkedHashMap<ExperimentInfo, Set<EfAttribute>>(); for (Attribute attr : orAttributes) { attr.getAttributeToExperimentMappings(statisticsStorage, allExpsToAttrs); } // Now process allExpsToAttrs - for all efo terms in orAttributes, grouping into one // StatisticsQueryCondition // attributes from potentially different efoTerms for one experiment. This has the effect of // counting a given // experiment only once for an OR collection of Attributes. for (Map.Entry<ExperimentInfo, Set<EfAttribute>> expToAttr : allExpsToAttrs.entrySet()) { StatisticsQueryCondition cond = new StatisticsQueryCondition(statType); if (expToAttr.getKey() != EfAttribute.ALL_EXPERIMENTS) // For efv Attributes we span all experiments cond.inExperiments(Collections.singletonList(expToAttr.getKey())); for (EfAttribute attr : expToAttr.getValue()) { cond.inAttribute(attr); } orConditions.orCondition(cond); } return orConditions; }
/** * The core scoring method for statistics queries * * @param statisticsQuery query to be peformed on statisticsStorage * @param statisticsStorage core data for Statistics qeries * @param scoringExps an out parameter. * <p>- If null, experiment counts result of statisticsQuery should be returned. if - If * non-null, it serves as a flag that an optimised statisticsQuery should be performed to just * collect Experiments for which non-zero counts exist for Statistics query. A typical call * scenario in this case is just one efv per statisticsQuery, in which we can both: 1. check * if the efv Attribute itself is a scoring one 2. map this Attribute and Experimeants in * scoringExps to efo terms - via the reverse mapping efv-experiment-> efo term in EfoIndex * (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes()) * @return Multiset of aggregated experiment counts, where the set of scores genes is intersected * across statisticsQuery.getConditions(), and union-ed across attributes within each * condition in statisticsQuery.getConditions(). */ public static Multiset<Integer> scoreQuery( StatisticsQueryCondition statisticsQuery, final StatisticsStorage statisticsStorage, Set<ExperimentInfo> scoringExps) { // gatherScoringExpsOnly -> experiment counts should be calculated for statisticsQuery // !gatherScoringExpsOnly -> scoring experiments should be collected (into scoringExps) only boolean gatherScoringExpsOnly = scoringExps != null; Set<StatisticsQueryOrConditions<StatisticsQueryCondition>> andStatisticsQueryConditions = statisticsQuery.getConditions(); Multiset<Integer> results = null; if (andStatisticsQueryConditions.isEmpty()) { // End of recursion Set<Integer> bioEntityIdRestrictionSet = statisticsQuery.getBioEntityIdRestrictionSet(); Set<EfAttribute> attributes = statisticsQuery.getAttributes(); if (attributes.isEmpty()) { // No attributes were provided - we have to use pre-computed scores across all attributes Multiset<Integer> scoresAcrossAllEfos = statisticsStorage.getScoresAcrossAllEfos(statisticsQuery.getStatisticsType()); results = intersect(scoresAcrossAllEfos, bioEntityIdRestrictionSet); } else { results = HashMultiset.create(); setQueryExperiments(statisticsQuery, statisticsStorage); // For each experiment in the query, traverse through all attributes and add all gene // indexes into one ConciseSet. This way a gene can score // only once for a single experiment - across all OR attributes in this query. Once all // attributes have been traversed for a single experiment, // add ConciseSet to Multiset results for (ExperimentInfo exp : statisticsQuery.getExperiments()) { FastSet statsForExperiment = new FastSet(); for (EfAttribute attr : attributes) { Map<ExperimentInfo, ConciseSet> expsToStats = getStatisticsForAttribute( statisticsQuery.getStatisticsType(), attr, statisticsStorage); if (expsToStats != null) { if (expsToStats.isEmpty()) { log.debug( "Failed to retrieve stats for stat: " + statisticsQuery.getStatisticsType() + " and attr: " + attr); } else { if (expsToStats.get(exp) != null) { if (!gatherScoringExpsOnly) { statsForExperiment.addAll( intersect(expsToStats.get(exp), bioEntityIdRestrictionSet)); } else if (containsAtLeastOne(expsToStats.get(exp), bioEntityIdRestrictionSet)) { // exp contains at least one non-zero score for at least one gene index in // bioEntityIdRestrictionSet -> add it to scoringExps scoringExps.add(exp); } } else { log.debug( "Failed to retrieve stats for stat: " + statisticsQuery.getStatisticsType() + " exp: " + exp.getAccession() + " and attr: " + attr); } } } } if (!gatherScoringExpsOnly) { results.addAll(statsForExperiment); } } } } else { // run over all AND conditions, do "OR" inside (cf. scoreOrStatisticsQueryConditions()) , // "AND"'ing over the whole thing for (StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions : andStatisticsQueryConditions) { // Pass gene restriction set down to orConditions orConditions.setGeneRestrictionSet(statisticsQuery.getBioEntityIdRestrictionSet()); // process OR conditions Multiset<Integer> condGenes = getScoresForOrConditions(orConditions, statisticsStorage, scoringExps); if (results == null) results = condGenes; else { Iterator<Multiset.Entry<Integer>> resultGenes = results.entrySet().iterator(); while (resultGenes.hasNext()) { Multiset.Entry<Integer> entry = resultGenes.next(); if (!condGenes.contains( entry.getElement())) // AND operation between different top query conditions resultGenes.remove(); else // for all gene ids belonging to intersection of all conditions seen so far, we // accumulate experiment counts results.setCount( entry.getElement(), entry.getCount() + condGenes.count(entry.getElement())); } } } } if (results == null) { results = HashMultiset.create(); } return results; }