Exemplo n.º 1
0
  @PostConstruct
  private void initDatabase() throws Exception {

    if (ifCreateDropTables.equals("create-drop")) {
      loadPeptideAnnotations();
      loadReplicateAnnotations();
    }

    referenceP100Profile = repositoryService.getReferenceProfileVector(AssayType.P100);
    referenceGCPProfile = repositoryService.getReferenceProfileVector(AssayType.GCP);

    referenceP100GeneNames = repositoryService.getReferenceGeneNames(AssayType.P100);
    referenceGCPGeneNames = repositoryService.getReferenceGeneNames(AssayType.GCP);

    if (ifCreateDropTables.equals("create-drop")) {
      //            loadPeptideAnnotations();
      //            loadReplicateAnnotations();

      loadDataPoints();
      //        normalize();

      buildProfiles();
      computeCorrelations();
      mergeProfiles(referenceP100Profile.size(), referenceGCPProfile.size());
    }
  }
Exemplo n.º 2
0
  private void buildProfiles() {
    log.info("Filling repository with profiles");

    List<String> referenceProfile;
    List<String> referenceGeneNames;

    Set<GctReplicate> gctReplicatePairs = repositoryService.getGctReplicatesCombinations();

    int dummyClusteringOrder = gctReplicatePairs.size();
    for (GctReplicate gctReplicate : gctReplicatePairs) {

      GctFile gctFile = gctReplicate.getGctFile();
      ReplicateAnnotation replicateAnnotation = gctReplicate.getReplicateAnnotation();

      List<PeakArea> peakAreas =
          peakAreaRepository.findByGctFileAndReplicateAnnotation(gctFile, replicateAnnotation);

      AssayType assayType = gctFile.getAssayType();

      referenceProfile = getReferenceProfile(assayType);
      referenceGeneNames = getReferenceGeneNames(assayType);

      Double[] profileVector = new Double[referenceProfile.size()];
      boolean[] imputeVector = new boolean[referenceProfile.size()];

      for (PeakArea peakArea : peakAreas) {
        int index = referenceProfile.indexOf(peakArea.getPeptideAnnotation().getPeptideId());
        profileVector[index] = peakArea.getValue();
      }

      UtilsStatistics.imputeProfileVector(profileVector, imputeVector);

      Profile profile =
          new Profile(
              replicateAnnotation,
              gctFile,
              ArrayUtils.toPrimitive(profileVector),
              imputeVector,
              referenceProfile,
              referenceGeneNames,
              dummyClusteringOrder--);

      profileRepository.save(profile);
    }
  }
Exemplo n.º 3
0
  private void mergeProfiles(int p100Length, int gcpLength) {

    log.info("Merging profiles");

    ArrayList<AssayType> dummyAssay = new ArrayList<>();
    dummyAssay.add(AssayType.GCP);
    dummyAssay.add(AssayType.P100);

    List<Profile> profiles = profileRepository.findByAssayTypeInOrderByConcatDesc(dummyAssay);

    String prevConcat = null;
    String curConcat;
    List<Profile> bunchOfProfiles = null;

    for (Profile profile : profiles) {
      curConcat =
          profile.getReplicateAnnotation().getCellId()
              + profile.getReplicateAnnotation().getPertiname();
      if (prevConcat == null) {
        prevConcat = curConcat;
        bunchOfProfiles = new ArrayList<>();
      }

      if (!curConcat.equals(prevConcat)) {
        prevConcat = curConcat;

        MergedProfile mergedProfile =
            UtilsTransform.mergeProfiles(bunchOfProfiles, p100Length, gcpLength);
        mergedProfileRepository.save(mergedProfile);

        bunchOfProfiles = new ArrayList<>();

      } else {
        bunchOfProfiles.add(profile);
      }
    }
  }
Exemplo n.º 4
0
  private void computeCorrelations() {
    log.info("Filling repository with most correlated profiles");

    List<String> referenceProfile;

    for (AssayType assayType : AssayType.values()) {

      referenceProfile = getReferenceProfile(assayType);
      List<Profile> profiles = profileRepository.findByAssayType(assayType);

      String[] profileNames = new String[profiles.size()];
      double[][] distanceMatrix = new double[profiles.size()][profiles.size()];

      int i = 0;

      for (Profile profileA : profiles) {

        profileNames[i] = profileA.getId().toString();

        Double maxPearson = Double.MIN_VALUE;
        Profile maxProfile = profileA;

        int j = 0;

        for (Profile profileB : profiles) {
          if (profileA.equals(profileB)) {
            distanceMatrix[i][j] = 0;
            j++;
            continue;
          }

          double[] vectorA = profileA.getVector();
          double[] vectorB = profileB.getVector();

          PearsonsCorrelation pearson = new PearsonsCorrelation();
          Double pearsonCorrelation = pearson.correlation(vectorA, vectorB);

          if (pearsonCorrelation >= maxPearson) {
            maxPearson = pearsonCorrelation;
            maxProfile = profileB;
          }

          double[] profileAasDouble = UtilsTransform.intArrayToDouble(profileA.getColors());
          double[] profileBasDouble = UtilsTransform.intArrayToDouble(profileB.getColors());

          Double pearsonOfColors = pearson.correlation(profileAasDouble, profileBasDouble);
          distanceMatrix[i][j] = pearsonOfColors;
          j++;
        }

        profileA.setCorrelatedVector(maxProfile.getListWrapper());

        SortedSet<StringDouble> positivePeptides =
            UtilsStatistics.influentialPeptides(
                profileA.getVector(), maxProfile.getVector(), referenceProfile, true);

        profileA.setPositivePeptides(UtilsTransform.SortedSetToHTML(positivePeptides, false));

        DecimalFormat df = new DecimalFormat("0.0000");
        String peptideCorrelation = " <br/><br/><b style=\"color: #23527c;\">%s</b>";

        profileA.setPositiveCorrelation(
            maxProfile.toString() + String.format(peptideCorrelation, df.format(maxPearson)));

        profileRepository.save(profileA);
        i++;
      }
    }
  }
Exemplo n.º 5
0
  private void normalize() {
    List<AssayType> assayTypes = Arrays.asList(AssayType.GCP, AssayType.P100);

    // do loop for each assay e.g. P100, GCP
    for (AssayType assayType : assayTypes) {
      log.info("Normalize matrix of peak values for assay: {}", assayType);

      List<PeakArea> allPeakAreas = peakAreaRepository.findByGctFileAssayType(assayType);

      // inefficient, try with database count
      int numberOfPeptides = peptideAnnotationRepository.findByAssayType(assayType).size();
      int numberOfReplicates = replicateAnnotationRepository.findByAssayType(assayType).size();

      ArrayList<Integer> mapPeptideIdToRowId = new ArrayList<>();
      ArrayList<Integer> mapReplicateIdToColumnId = new ArrayList<>();

      //          double[][] matrix = new double[replicates][peptides];

      // init matrix with sizes
      List<List<Double>> peaksAsMatrix = new ArrayList<>(numberOfReplicates);
      for (int i = 0; i < numberOfReplicates; i++) {
        peaksAsMatrix.add(new ArrayList<>(numberOfPeptides));
      }

      for (PeakArea peakArea : allPeakAreas) {
        int peptideId = Math.toIntExact(peakArea.getPeptideAnnotation().getId());
        int replicateId = Math.toIntExact(peakArea.getReplicateAnnotation().getId());

        Double rawValue = peakArea.getValue();

        int mappedRowId;
        int mappedColumnId;

        if (!mapPeptideIdToRowId.contains(peptideId)) {
          mapPeptideIdToRowId.add(peptideId);
        }
        mappedRowId = mapPeptideIdToRowId.indexOf(peptideId);

        if (!mapReplicateIdToColumnId.contains(replicateId)) {
          mapReplicateIdToColumnId.add(replicateId);
        }

        mappedColumnId = mapReplicateIdToColumnId.indexOf(replicateId);

        peaksAsMatrix.get(mappedColumnId).set(mappedRowId, rawValue);
      }

      List<List<Double>> outputMatrix = Normalizer.quantileAndZScoreNormalize(peaksAsMatrix);

      // write normalized values back to DB

      for (int i = 0; i < numberOfReplicates; i++) {
        for (int j = 0; j < numberOfPeptides; j++) {
          Double normalizedValue = outputMatrix.get(i).get(j);

          Long databaseReplicateId = mapReplicateIdToColumnId.get(i).longValue();
          Long databasePeptideId = mapPeptideIdToRowId.get(j).longValue();

          List<PeakArea> peakAreas =
              peakAreaRepository
                  .findByGctFileAssayTypeAndReplicateAnnotationIdAndPeptideAnnotationId(
                      assayType, databaseReplicateId, databasePeptideId);

          assert peakAreas.size() == 1;

          PeakArea peakArea = peakAreas.get(0);
          peakArea.setNormalizedValue(normalizedValue);

          // Fill in normalized value field in DB
          peakAreaRepository.save(peakArea);
        }
      }

      log.info(
          "Normalized assay: {} peptides: {}, replicates: {}.",
          assayType,
          numberOfPeptides,
          numberOfReplicates);
    }
  }