@PostConstruct private void initDatabase() throws Exception { if (ifCreateDropTables.equals("create-drop")) { loadPeptideAnnotations(); loadReplicateAnnotations(); } referenceP100Profile = repositoryService.getReferenceProfileVector(AssayType.P100); referenceGCPProfile = repositoryService.getReferenceProfileVector(AssayType.GCP); referenceP100GeneNames = repositoryService.getReferenceGeneNames(AssayType.P100); referenceGCPGeneNames = repositoryService.getReferenceGeneNames(AssayType.GCP); if (ifCreateDropTables.equals("create-drop")) { // loadPeptideAnnotations(); // loadReplicateAnnotations(); loadDataPoints(); // normalize(); buildProfiles(); computeCorrelations(); mergeProfiles(referenceP100Profile.size(), referenceGCPProfile.size()); } }
private void buildProfiles() { log.info("Filling repository with profiles"); List<String> referenceProfile; List<String> referenceGeneNames; Set<GctReplicate> gctReplicatePairs = repositoryService.getGctReplicatesCombinations(); int dummyClusteringOrder = gctReplicatePairs.size(); for (GctReplicate gctReplicate : gctReplicatePairs) { GctFile gctFile = gctReplicate.getGctFile(); ReplicateAnnotation replicateAnnotation = gctReplicate.getReplicateAnnotation(); List<PeakArea> peakAreas = peakAreaRepository.findByGctFileAndReplicateAnnotation(gctFile, replicateAnnotation); AssayType assayType = gctFile.getAssayType(); referenceProfile = getReferenceProfile(assayType); referenceGeneNames = getReferenceGeneNames(assayType); Double[] profileVector = new Double[referenceProfile.size()]; boolean[] imputeVector = new boolean[referenceProfile.size()]; for (PeakArea peakArea : peakAreas) { int index = referenceProfile.indexOf(peakArea.getPeptideAnnotation().getPeptideId()); profileVector[index] = peakArea.getValue(); } UtilsStatistics.imputeProfileVector(profileVector, imputeVector); Profile profile = new Profile( replicateAnnotation, gctFile, ArrayUtils.toPrimitive(profileVector), imputeVector, referenceProfile, referenceGeneNames, dummyClusteringOrder--); profileRepository.save(profile); } }
private void mergeProfiles(int p100Length, int gcpLength) { log.info("Merging profiles"); ArrayList<AssayType> dummyAssay = new ArrayList<>(); dummyAssay.add(AssayType.GCP); dummyAssay.add(AssayType.P100); List<Profile> profiles = profileRepository.findByAssayTypeInOrderByConcatDesc(dummyAssay); String prevConcat = null; String curConcat; List<Profile> bunchOfProfiles = null; for (Profile profile : profiles) { curConcat = profile.getReplicateAnnotation().getCellId() + profile.getReplicateAnnotation().getPertiname(); if (prevConcat == null) { prevConcat = curConcat; bunchOfProfiles = new ArrayList<>(); } if (!curConcat.equals(prevConcat)) { prevConcat = curConcat; MergedProfile mergedProfile = UtilsTransform.mergeProfiles(bunchOfProfiles, p100Length, gcpLength); mergedProfileRepository.save(mergedProfile); bunchOfProfiles = new ArrayList<>(); } else { bunchOfProfiles.add(profile); } } }
private void computeCorrelations() { log.info("Filling repository with most correlated profiles"); List<String> referenceProfile; for (AssayType assayType : AssayType.values()) { referenceProfile = getReferenceProfile(assayType); List<Profile> profiles = profileRepository.findByAssayType(assayType); String[] profileNames = new String[profiles.size()]; double[][] distanceMatrix = new double[profiles.size()][profiles.size()]; int i = 0; for (Profile profileA : profiles) { profileNames[i] = profileA.getId().toString(); Double maxPearson = Double.MIN_VALUE; Profile maxProfile = profileA; int j = 0; for (Profile profileB : profiles) { if (profileA.equals(profileB)) { distanceMatrix[i][j] = 0; j++; continue; } double[] vectorA = profileA.getVector(); double[] vectorB = profileB.getVector(); PearsonsCorrelation pearson = new PearsonsCorrelation(); Double pearsonCorrelation = pearson.correlation(vectorA, vectorB); if (pearsonCorrelation >= maxPearson) { maxPearson = pearsonCorrelation; maxProfile = profileB; } double[] profileAasDouble = UtilsTransform.intArrayToDouble(profileA.getColors()); double[] profileBasDouble = UtilsTransform.intArrayToDouble(profileB.getColors()); Double pearsonOfColors = pearson.correlation(profileAasDouble, profileBasDouble); distanceMatrix[i][j] = pearsonOfColors; j++; } profileA.setCorrelatedVector(maxProfile.getListWrapper()); SortedSet<StringDouble> positivePeptides = UtilsStatistics.influentialPeptides( profileA.getVector(), maxProfile.getVector(), referenceProfile, true); profileA.setPositivePeptides(UtilsTransform.SortedSetToHTML(positivePeptides, false)); DecimalFormat df = new DecimalFormat("0.0000"); String peptideCorrelation = " <br/><br/><b style=\"color: #23527c;\">%s</b>"; profileA.setPositiveCorrelation( maxProfile.toString() + String.format(peptideCorrelation, df.format(maxPearson))); profileRepository.save(profileA); i++; } } }
private void normalize() { List<AssayType> assayTypes = Arrays.asList(AssayType.GCP, AssayType.P100); // do loop for each assay e.g. P100, GCP for (AssayType assayType : assayTypes) { log.info("Normalize matrix of peak values for assay: {}", assayType); List<PeakArea> allPeakAreas = peakAreaRepository.findByGctFileAssayType(assayType); // inefficient, try with database count int numberOfPeptides = peptideAnnotationRepository.findByAssayType(assayType).size(); int numberOfReplicates = replicateAnnotationRepository.findByAssayType(assayType).size(); ArrayList<Integer> mapPeptideIdToRowId = new ArrayList<>(); ArrayList<Integer> mapReplicateIdToColumnId = new ArrayList<>(); // double[][] matrix = new double[replicates][peptides]; // init matrix with sizes List<List<Double>> peaksAsMatrix = new ArrayList<>(numberOfReplicates); for (int i = 0; i < numberOfReplicates; i++) { peaksAsMatrix.add(new ArrayList<>(numberOfPeptides)); } for (PeakArea peakArea : allPeakAreas) { int peptideId = Math.toIntExact(peakArea.getPeptideAnnotation().getId()); int replicateId = Math.toIntExact(peakArea.getReplicateAnnotation().getId()); Double rawValue = peakArea.getValue(); int mappedRowId; int mappedColumnId; if (!mapPeptideIdToRowId.contains(peptideId)) { mapPeptideIdToRowId.add(peptideId); } mappedRowId = mapPeptideIdToRowId.indexOf(peptideId); if (!mapReplicateIdToColumnId.contains(replicateId)) { mapReplicateIdToColumnId.add(replicateId); } mappedColumnId = mapReplicateIdToColumnId.indexOf(replicateId); peaksAsMatrix.get(mappedColumnId).set(mappedRowId, rawValue); } List<List<Double>> outputMatrix = Normalizer.quantileAndZScoreNormalize(peaksAsMatrix); // write normalized values back to DB for (int i = 0; i < numberOfReplicates; i++) { for (int j = 0; j < numberOfPeptides; j++) { Double normalizedValue = outputMatrix.get(i).get(j); Long databaseReplicateId = mapReplicateIdToColumnId.get(i).longValue(); Long databasePeptideId = mapPeptideIdToRowId.get(j).longValue(); List<PeakArea> peakAreas = peakAreaRepository .findByGctFileAssayTypeAndReplicateAnnotationIdAndPeptideAnnotationId( assayType, databaseReplicateId, databasePeptideId); assert peakAreas.size() == 1; PeakArea peakArea = peakAreas.get(0); peakArea.setNormalizedValue(normalizedValue); // Fill in normalized value field in DB peakAreaRepository.save(peakArea); } } log.info( "Normalized assay: {} peptides: {}, replicates: {}.", assayType, numberOfPeptides, numberOfReplicates); } }