static List<Feature> computeGeneLevelExpression( Map<String, FeatureCount> featureCounts, Map<String, Feature> geneInfos, double numberOfReads) { List<Feature> geneFeatures = new LinkedList<Feature>(); HashMap<String, List<FeatureCount>> geneFeaturesMap = new HashMap<>(); // Organize exons by gene Collection<FeatureCount> exons = featureCounts.values(); for (FeatureCount fc : exons) { String geneId = fc.getFeature().getAttribute("gene_id"); if (geneFeaturesMap.containsKey(geneId)) { geneFeaturesMap.get(geneId).add(fc); } else { LinkedList<FeatureCount> fcs = new LinkedList<>(); fcs.add(fc); geneFeaturesMap.put(geneId, fcs); } } // compute total counts and total length for (Map.Entry<String, List<FeatureCount>> entry : geneFeaturesMap.entrySet()) { List<FeatureCount> fcsForGene = entry.getValue(); Collections.sort(fcsForGene); Feature geneInfo = geneInfos.get(entry.getKey()); if (geneInfo == null) { throw new IllegalStateException(entry.getKey() + " has no entry in geneInfos"); } if (!geneInfo.type().equals("gene")) { throw new IllegalStateException(geneInfo + " is not a gene"); } // get total length and counts double count = 0.0; int length = 0; List<String> exonIds = new LinkedList<>(); for (FeatureCount fc : fcsForGene) { count += fc.getCount(); length += fc.getFeature().location().length(); exonIds.add(fc.getId()); } // comput rpkm double rpkm = count / length / numberOfReads * Math.pow(10, 9); HashMap<String, String> info = new HashMap<>(); info.put("reads", Double.toString(count)); info.put("RPKM", Double.toString(rpkm)); info.put("length", Integer.toString(length)); info.put("exons", StringUtils.join(exonIds, ",")); Feature newGeneInfo = GTFFeatureBuilder.addAttributesToFeature(geneInfo, info); geneFeatures.add(newGeneInfo); } return geneFeatures; }
@Override public List<Feature> getCounts() { List<Feature> newFeatures = new LinkedList<>(); for (FeatureCount fc : this.featureCounts.values()) { Feature f = fc.getFeature(); Feature gene = geneInfo.get(f.getAttribute("gene_id")); int tss = gene.location().bioStrand() == '+' ? gene.location().bioStart() : gene.location().bioEnd(); Map<String, String> newAtts = new HashMap<>(); newAtts.put("id", fc.getId()); newAtts.put("reads", Double.toString(fc.getCount())); newAtts.put("RPKM", Double.toString(fc.getRPKM(this.getTotalCount()))); newAtts.put("tss", Integer.toString(tss)); Feature newF = GTFFeatureBuilder.addAttributesToFeature(f, newAtts); newFeatures.add(newF); } return newFeatures; }