コード例 #1
0
ファイル: DatabaseLoader.java プロジェクト: uc-bd2k/Pilincs
  private void normalize() {
    List<AssayType> assayTypes = Arrays.asList(AssayType.GCP, AssayType.P100);

    // do loop for each assay e.g. P100, GCP
    for (AssayType assayType : assayTypes) {
      log.info("Normalize matrix of peak values for assay: {}", assayType);

      List<PeakArea> allPeakAreas = peakAreaRepository.findByGctFileAssayType(assayType);

      // inefficient, try with database count
      int numberOfPeptides = peptideAnnotationRepository.findByAssayType(assayType).size();
      int numberOfReplicates = replicateAnnotationRepository.findByAssayType(assayType).size();

      ArrayList<Integer> mapPeptideIdToRowId = new ArrayList<>();
      ArrayList<Integer> mapReplicateIdToColumnId = new ArrayList<>();

      //          double[][] matrix = new double[replicates][peptides];

      // init matrix with sizes
      List<List<Double>> peaksAsMatrix = new ArrayList<>(numberOfReplicates);
      for (int i = 0; i < numberOfReplicates; i++) {
        peaksAsMatrix.add(new ArrayList<>(numberOfPeptides));
      }

      for (PeakArea peakArea : allPeakAreas) {
        int peptideId = Math.toIntExact(peakArea.getPeptideAnnotation().getId());
        int replicateId = Math.toIntExact(peakArea.getReplicateAnnotation().getId());

        Double rawValue = peakArea.getValue();

        int mappedRowId;
        int mappedColumnId;

        if (!mapPeptideIdToRowId.contains(peptideId)) {
          mapPeptideIdToRowId.add(peptideId);
        }
        mappedRowId = mapPeptideIdToRowId.indexOf(peptideId);

        if (!mapReplicateIdToColumnId.contains(replicateId)) {
          mapReplicateIdToColumnId.add(replicateId);
        }

        mappedColumnId = mapReplicateIdToColumnId.indexOf(replicateId);

        peaksAsMatrix.get(mappedColumnId).set(mappedRowId, rawValue);
      }

      List<List<Double>> outputMatrix = Normalizer.quantileAndZScoreNormalize(peaksAsMatrix);

      // write normalized values back to DB

      for (int i = 0; i < numberOfReplicates; i++) {
        for (int j = 0; j < numberOfPeptides; j++) {
          Double normalizedValue = outputMatrix.get(i).get(j);

          Long databaseReplicateId = mapReplicateIdToColumnId.get(i).longValue();
          Long databasePeptideId = mapPeptideIdToRowId.get(j).longValue();

          List<PeakArea> peakAreas =
              peakAreaRepository
                  .findByGctFileAssayTypeAndReplicateAnnotationIdAndPeptideAnnotationId(
                      assayType, databaseReplicateId, databasePeptideId);

          assert peakAreas.size() == 1;

          PeakArea peakArea = peakAreas.get(0);
          peakArea.setNormalizedValue(normalizedValue);

          // Fill in normalized value field in DB
          peakAreaRepository.save(peakArea);
        }
      }

      log.info(
          "Normalized assay: {} peptides: {}, replicates: {}.",
          assayType,
          numberOfPeptides,
          numberOfReplicates);
    }
  }
コード例 #2
0
ファイル: DatabaseLoader.java プロジェクト: uc-bd2k/Pilincs
  private void loadDataPoints() throws IOException, CommandException {
    log.info("Loading peak areas from panorama gct files");

    List<String> gctDownloadUrls = connectPanorama.gctDownloadUrls(true);

    for (String url : gctDownloadUrls) {

      HashMap<String, List<ParseGCT.AnnotationValue>> metaPeptides = new HashMap<>();
      HashMap<String, List<ParseGCT.AnnotationValue>> metaReplicas = new HashMap<>();
      ArrayList<ParseGCT.PeptideReplicatePeak> peakValues = new ArrayList<>();

      try {
        parser.parseToRepository(url, peakValues, metaPeptides, metaReplicas);
      } catch (Exception e) {

      }

      GctFile gctfile = new GctFile(url);

      int runId = UtilsParse.parseRunId(url);
      String runIdUrl = connectPanorama.getRunIdLink(gctfile);

      gctfile.setRunId(runId);
      gctfile.setRunIdUrl(runIdUrl);

      gctFileRepository.save(gctfile);

      List<String> probeNameIds = new ArrayList<>(metaPeptides.keySet());

      AssayType assayType = UtilsParse.parseArrayTypeFromUrl(url);

      HashMap<String, Integer> peptideIdsForChromatogramsUrl =
          connectPanorama.getPeptideIdsFromJSON(probeNameIds, assayType, runId);

      for (ParseGCT.PeptideReplicatePeak peakFromGct : peakValues) {

        String peptideId = peakFromGct.getPeptideId();
        String replicateId = peakFromGct.getReplicateId();

        PeptideAnnotation peptideAnnotation =
            peptideAnnotationRepository.findFirstByPeptideId(peptideId);
        ReplicateAnnotation replicateAnnotation =
            replicateAnnotationRepository.findFirstByReplicateId(replicateId);

        if (peptideAnnotation == null) {
          log.warn("Peptide annotation is null");
        }

        if (replicateAnnotation == null) {
          log.warn("Replicate annotation is null");
        }

        Double peakAreaValue = peakFromGct.getPeakArea();

        for (ParseGCT.AnnotationValue annotationObject : metaPeptides.get(peptideId)) {
          String annotationName = annotationObject.getAnnotationName();
          String annotationValue = annotationObject.getAnnotationValue();

          if (annotationName.equals("pr_probe_suitability_manual")
              && annotationValue.equals("FALSE")) {
            peakAreaValue = null;
          }
        }

        String chromatogramUrl =
            connectPanorama.getChromatogramUrl(
                assayType, peptideIdsForChromatogramsUrl.get(peptideId), replicateId);

        PeakArea peakArea =
            new PeakArea(
                gctfile, peptideAnnotation, replicateAnnotation, peakAreaValue, chromatogramUrl);

        peakAreaRepository.save(peakArea);
      }
    }
  }