Ejemplo n.º 1
0
  private void buildProfiles() {
    log.info("Filling repository with profiles");

    List<String> referenceProfile;
    List<String> referenceGeneNames;

    Set<GctReplicate> gctReplicatePairs = repositoryService.getGctReplicatesCombinations();

    int dummyClusteringOrder = gctReplicatePairs.size();
    for (GctReplicate gctReplicate : gctReplicatePairs) {

      GctFile gctFile = gctReplicate.getGctFile();
      ReplicateAnnotation replicateAnnotation = gctReplicate.getReplicateAnnotation();

      List<PeakArea> peakAreas =
          peakAreaRepository.findByGctFileAndReplicateAnnotation(gctFile, replicateAnnotation);

      AssayType assayType = gctFile.getAssayType();

      referenceProfile = getReferenceProfile(assayType);
      referenceGeneNames = getReferenceGeneNames(assayType);

      Double[] profileVector = new Double[referenceProfile.size()];
      boolean[] imputeVector = new boolean[referenceProfile.size()];

      for (PeakArea peakArea : peakAreas) {
        int index = referenceProfile.indexOf(peakArea.getPeptideAnnotation().getPeptideId());
        profileVector[index] = peakArea.getValue();
      }

      UtilsStatistics.imputeProfileVector(profileVector, imputeVector);

      Profile profile =
          new Profile(
              replicateAnnotation,
              gctFile,
              ArrayUtils.toPrimitive(profileVector),
              imputeVector,
              referenceProfile,
              referenceGeneNames,
              dummyClusteringOrder--);

      profileRepository.save(profile);
    }
  }
Ejemplo n.º 2
0
  private void normalize() {
    List<AssayType> assayTypes = Arrays.asList(AssayType.GCP, AssayType.P100);

    // do loop for each assay e.g. P100, GCP
    for (AssayType assayType : assayTypes) {
      log.info("Normalize matrix of peak values for assay: {}", assayType);

      List<PeakArea> allPeakAreas = peakAreaRepository.findByGctFileAssayType(assayType);

      // inefficient, try with database count
      int numberOfPeptides = peptideAnnotationRepository.findByAssayType(assayType).size();
      int numberOfReplicates = replicateAnnotationRepository.findByAssayType(assayType).size();

      ArrayList<Integer> mapPeptideIdToRowId = new ArrayList<>();
      ArrayList<Integer> mapReplicateIdToColumnId = new ArrayList<>();

      //          double[][] matrix = new double[replicates][peptides];

      // init matrix with sizes
      List<List<Double>> peaksAsMatrix = new ArrayList<>(numberOfReplicates);
      for (int i = 0; i < numberOfReplicates; i++) {
        peaksAsMatrix.add(new ArrayList<>(numberOfPeptides));
      }

      for (PeakArea peakArea : allPeakAreas) {
        int peptideId = Math.toIntExact(peakArea.getPeptideAnnotation().getId());
        int replicateId = Math.toIntExact(peakArea.getReplicateAnnotation().getId());

        Double rawValue = peakArea.getValue();

        int mappedRowId;
        int mappedColumnId;

        if (!mapPeptideIdToRowId.contains(peptideId)) {
          mapPeptideIdToRowId.add(peptideId);
        }
        mappedRowId = mapPeptideIdToRowId.indexOf(peptideId);

        if (!mapReplicateIdToColumnId.contains(replicateId)) {
          mapReplicateIdToColumnId.add(replicateId);
        }

        mappedColumnId = mapReplicateIdToColumnId.indexOf(replicateId);

        peaksAsMatrix.get(mappedColumnId).set(mappedRowId, rawValue);
      }

      List<List<Double>> outputMatrix = Normalizer.quantileAndZScoreNormalize(peaksAsMatrix);

      // write normalized values back to DB

      for (int i = 0; i < numberOfReplicates; i++) {
        for (int j = 0; j < numberOfPeptides; j++) {
          Double normalizedValue = outputMatrix.get(i).get(j);

          Long databaseReplicateId = mapReplicateIdToColumnId.get(i).longValue();
          Long databasePeptideId = mapPeptideIdToRowId.get(j).longValue();

          List<PeakArea> peakAreas =
              peakAreaRepository
                  .findByGctFileAssayTypeAndReplicateAnnotationIdAndPeptideAnnotationId(
                      assayType, databaseReplicateId, databasePeptideId);

          assert peakAreas.size() == 1;

          PeakArea peakArea = peakAreas.get(0);
          peakArea.setNormalizedValue(normalizedValue);

          // Fill in normalized value field in DB
          peakAreaRepository.save(peakArea);
        }
      }

      log.info(
          "Normalized assay: {} peptides: {}, replicates: {}.",
          assayType,
          numberOfPeptides,
          numberOfReplicates);
    }
  }