private void buildProfiles() { log.info("Filling repository with profiles"); List<String> referenceProfile; List<String> referenceGeneNames; Set<GctReplicate> gctReplicatePairs = repositoryService.getGctReplicatesCombinations(); int dummyClusteringOrder = gctReplicatePairs.size(); for (GctReplicate gctReplicate : gctReplicatePairs) { GctFile gctFile = gctReplicate.getGctFile(); ReplicateAnnotation replicateAnnotation = gctReplicate.getReplicateAnnotation(); List<PeakArea> peakAreas = peakAreaRepository.findByGctFileAndReplicateAnnotation(gctFile, replicateAnnotation); AssayType assayType = gctFile.getAssayType(); referenceProfile = getReferenceProfile(assayType); referenceGeneNames = getReferenceGeneNames(assayType); Double[] profileVector = new Double[referenceProfile.size()]; boolean[] imputeVector = new boolean[referenceProfile.size()]; for (PeakArea peakArea : peakAreas) { int index = referenceProfile.indexOf(peakArea.getPeptideAnnotation().getPeptideId()); profileVector[index] = peakArea.getValue(); } UtilsStatistics.imputeProfileVector(profileVector, imputeVector); Profile profile = new Profile( replicateAnnotation, gctFile, ArrayUtils.toPrimitive(profileVector), imputeVector, referenceProfile, referenceGeneNames, dummyClusteringOrder--); profileRepository.save(profile); } }
private void normalize() { List<AssayType> assayTypes = Arrays.asList(AssayType.GCP, AssayType.P100); // do loop for each assay e.g. P100, GCP for (AssayType assayType : assayTypes) { log.info("Normalize matrix of peak values for assay: {}", assayType); List<PeakArea> allPeakAreas = peakAreaRepository.findByGctFileAssayType(assayType); // inefficient, try with database count int numberOfPeptides = peptideAnnotationRepository.findByAssayType(assayType).size(); int numberOfReplicates = replicateAnnotationRepository.findByAssayType(assayType).size(); ArrayList<Integer> mapPeptideIdToRowId = new ArrayList<>(); ArrayList<Integer> mapReplicateIdToColumnId = new ArrayList<>(); // double[][] matrix = new double[replicates][peptides]; // init matrix with sizes List<List<Double>> peaksAsMatrix = new ArrayList<>(numberOfReplicates); for (int i = 0; i < numberOfReplicates; i++) { peaksAsMatrix.add(new ArrayList<>(numberOfPeptides)); } for (PeakArea peakArea : allPeakAreas) { int peptideId = Math.toIntExact(peakArea.getPeptideAnnotation().getId()); int replicateId = Math.toIntExact(peakArea.getReplicateAnnotation().getId()); Double rawValue = peakArea.getValue(); int mappedRowId; int mappedColumnId; if (!mapPeptideIdToRowId.contains(peptideId)) { mapPeptideIdToRowId.add(peptideId); } mappedRowId = mapPeptideIdToRowId.indexOf(peptideId); if (!mapReplicateIdToColumnId.contains(replicateId)) { mapReplicateIdToColumnId.add(replicateId); } mappedColumnId = mapReplicateIdToColumnId.indexOf(replicateId); peaksAsMatrix.get(mappedColumnId).set(mappedRowId, rawValue); } List<List<Double>> outputMatrix = Normalizer.quantileAndZScoreNormalize(peaksAsMatrix); // write normalized values back to DB for (int i = 0; i < numberOfReplicates; i++) { for (int j = 0; j < numberOfPeptides; j++) { Double normalizedValue = outputMatrix.get(i).get(j); Long databaseReplicateId = mapReplicateIdToColumnId.get(i).longValue(); Long databasePeptideId = mapPeptideIdToRowId.get(j).longValue(); List<PeakArea> peakAreas = peakAreaRepository .findByGctFileAssayTypeAndReplicateAnnotationIdAndPeptideAnnotationId( assayType, databaseReplicateId, databasePeptideId); assert peakAreas.size() == 1; PeakArea peakArea = peakAreas.get(0); peakArea.setNormalizedValue(normalizedValue); // Fill in normalized value field in DB peakAreaRepository.save(peakArea); } } log.info( "Normalized assay: {} peptides: {}, replicates: {}.", assayType, numberOfPeptides, numberOfReplicates); } }
private void loadDataPoints() throws IOException, CommandException { log.info("Loading peak areas from panorama gct files"); List<String> gctDownloadUrls = connectPanorama.gctDownloadUrls(true); for (String url : gctDownloadUrls) { HashMap<String, List<ParseGCT.AnnotationValue>> metaPeptides = new HashMap<>(); HashMap<String, List<ParseGCT.AnnotationValue>> metaReplicas = new HashMap<>(); ArrayList<ParseGCT.PeptideReplicatePeak> peakValues = new ArrayList<>(); try { parser.parseToRepository(url, peakValues, metaPeptides, metaReplicas); } catch (Exception e) { } GctFile gctfile = new GctFile(url); int runId = UtilsParse.parseRunId(url); String runIdUrl = connectPanorama.getRunIdLink(gctfile); gctfile.setRunId(runId); gctfile.setRunIdUrl(runIdUrl); gctFileRepository.save(gctfile); List<String> probeNameIds = new ArrayList<>(metaPeptides.keySet()); AssayType assayType = UtilsParse.parseArrayTypeFromUrl(url); HashMap<String, Integer> peptideIdsForChromatogramsUrl = connectPanorama.getPeptideIdsFromJSON(probeNameIds, assayType, runId); for (ParseGCT.PeptideReplicatePeak peakFromGct : peakValues) { String peptideId = peakFromGct.getPeptideId(); String replicateId = peakFromGct.getReplicateId(); PeptideAnnotation peptideAnnotation = peptideAnnotationRepository.findFirstByPeptideId(peptideId); ReplicateAnnotation replicateAnnotation = replicateAnnotationRepository.findFirstByReplicateId(replicateId); if (peptideAnnotation == null) { log.warn("Peptide annotation is null"); } if (replicateAnnotation == null) { log.warn("Replicate annotation is null"); } Double peakAreaValue = peakFromGct.getPeakArea(); for (ParseGCT.AnnotationValue annotationObject : metaPeptides.get(peptideId)) { String annotationName = annotationObject.getAnnotationName(); String annotationValue = annotationObject.getAnnotationValue(); if (annotationName.equals("pr_probe_suitability_manual") && annotationValue.equals("FALSE")) { peakAreaValue = null; } } String chromatogramUrl = connectPanorama.getChromatogramUrl( assayType, peptideIdsForChromatogramsUrl.get(peptideId), replicateId); PeakArea peakArea = new PeakArea( gctfile, peptideAnnotation, replicateAnnotation, peakAreaValue, chromatogramUrl); peakAreaRepository.save(peakArea); } } }