/** * @param statType * @param statisticsStorage * @return Multiset<Integer> containing experiment counts across all efo attributes */ public static Multiset<Integer> getScoresAcrossAllEfos( final StatisticsType statType, final StatisticsStorage statisticsStorage) { List<Attribute> efoAttrs = new ArrayList<Attribute>(); for (String efo : statisticsStorage.getEfos()) { efoAttrs.add(new EfoAttribute(efo)); } StatisticsQueryCondition statsQuery = new StatisticsQueryCondition(statType); statsQuery.and(getStatisticsOrQuery(efoAttrs, statType, 1, statisticsStorage)); return getExperimentCounts(statsQuery, statisticsStorage, null); }
/** * @param statType * @param attribute * @param statisticsStorage * @return Map: experiment -> bit stats corresponding to statType and attr */ private static Map<ExperimentInfo, ConciseSet> getStatisticsForAttribute( final StatisticsType statType, final EfAttribute attribute, final StatisticsStorage statisticsStorage) { Map<ExperimentInfo, ConciseSet> expToBits = statisticsStorage.getStatisticsForAttribute(attribute, statType); if (expToBits != null) { return expToBits; } return emptyMap(); }
/** * The core scoring method for statistics queries * * @param statisticsQuery query to be peformed on statisticsStorage * @param statisticsStorage core data for Statistics qeries * @param scoringExps an out parameter. * <p>- If null, experiment counts result of statisticsQuery should be returned. if - If * non-null, it serves as a flag that an optimised statisticsQuery should be performed to just * collect Experiments for which non-zero counts exist for Statistics query. A typical call * scenario in this case is just one efv per statisticsQuery, in which we can both: 1. check * if the efv Attribute itself is a scoring one 2. map this Attribute and Experimeants in * scoringExps to efo terms - via the reverse mapping efv-experiment-> efo term in EfoIndex * (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes()) * @return Multiset of aggregated experiment counts, where the set of scores genes is intersected * across statisticsQuery.getConditions(), and union-ed across attributes within each * condition in statisticsQuery.getConditions(). */ public static Multiset<Integer> scoreQuery( StatisticsQueryCondition statisticsQuery, final StatisticsStorage statisticsStorage, Set<ExperimentInfo> scoringExps) { // gatherScoringExpsOnly -> experiment counts should be calculated for statisticsQuery // !gatherScoringExpsOnly -> scoring experiments should be collected (into scoringExps) only boolean gatherScoringExpsOnly = scoringExps != null; Set<StatisticsQueryOrConditions<StatisticsQueryCondition>> andStatisticsQueryConditions = statisticsQuery.getConditions(); Multiset<Integer> results = null; if (andStatisticsQueryConditions.isEmpty()) { // End of recursion Set<Integer> bioEntityIdRestrictionSet = statisticsQuery.getBioEntityIdRestrictionSet(); Set<EfAttribute> attributes = statisticsQuery.getAttributes(); if (attributes.isEmpty()) { // No attributes were provided - we have to use pre-computed scores across all attributes Multiset<Integer> scoresAcrossAllEfos = statisticsStorage.getScoresAcrossAllEfos(statisticsQuery.getStatisticsType()); results = intersect(scoresAcrossAllEfos, bioEntityIdRestrictionSet); } else { results = HashMultiset.create(); setQueryExperiments(statisticsQuery, statisticsStorage); // For each experiment in the query, traverse through all attributes and add all gene // indexes into one ConciseSet. This way a gene can score // only once for a single experiment - across all OR attributes in this query. Once all // attributes have been traversed for a single experiment, // add ConciseSet to Multiset results for (ExperimentInfo exp : statisticsQuery.getExperiments()) { FastSet statsForExperiment = new FastSet(); for (EfAttribute attr : attributes) { Map<ExperimentInfo, ConciseSet> expsToStats = getStatisticsForAttribute( statisticsQuery.getStatisticsType(), attr, statisticsStorage); if (expsToStats != null) { if (expsToStats.isEmpty()) { log.debug( "Failed to retrieve stats for stat: " + statisticsQuery.getStatisticsType() + " and attr: " + attr); } else { if (expsToStats.get(exp) != null) { if (!gatherScoringExpsOnly) { statsForExperiment.addAll( intersect(expsToStats.get(exp), bioEntityIdRestrictionSet)); } else if (containsAtLeastOne(expsToStats.get(exp), bioEntityIdRestrictionSet)) { // exp contains at least one non-zero score for at least one gene index in // bioEntityIdRestrictionSet -> add it to scoringExps scoringExps.add(exp); } } else { log.debug( "Failed to retrieve stats for stat: " + statisticsQuery.getStatisticsType() + " exp: " + exp.getAccession() + " and attr: " + attr); } } } } if (!gatherScoringExpsOnly) { results.addAll(statsForExperiment); } } } } else { // run over all AND conditions, do "OR" inside (cf. scoreOrStatisticsQueryConditions()) , // "AND"'ing over the whole thing for (StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions : andStatisticsQueryConditions) { // Pass gene restriction set down to orConditions orConditions.setGeneRestrictionSet(statisticsQuery.getBioEntityIdRestrictionSet()); // process OR conditions Multiset<Integer> condGenes = getScoresForOrConditions(orConditions, statisticsStorage, scoringExps); if (results == null) results = condGenes; else { Iterator<Multiset.Entry<Integer>> resultGenes = results.entrySet().iterator(); while (resultGenes.hasNext()) { Multiset.Entry<Integer> entry = resultGenes.next(); if (!condGenes.contains( entry.getElement())) // AND operation between different top query conditions resultGenes.remove(); else // for all gene ids belonging to intersection of all conditions seen so far, we // accumulate experiment counts results.setCount( entry.getElement(), entry.getCount() + condGenes.count(entry.getElement())); } } } } if (results == null) { results = HashMultiset.create(); } return results; }