Пример #1
0
  // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator
  private String proteoAnnotatorLineToString(ProteinAmbiguityGroup pag) {

    String line = "";
    List<UserParam> userParams = pag.getUserParam();
    String countNonA = "";
    String scoreNonA = "";
    String nonAPeptide = "";
    String aGenes = "";
    String qValue = "";
    for (int i = 0; i < userParams.size(); i++) {
      UserParam userParam = userParams.get(i);

      if (userParam.getName().equals("nonAPeptide")) {
        nonAPeptide = userParam.getValue();
      }
    }
    List<CvParam> cvParamList = pag.getCvParam();
    for (int i = 0; i < cvParamList.size(); i++) {
      CvParam cvParam = cvParamList.get(i);
      if (cvParam.getAccession().equals("MS:1002474")) {
        scoreNonA = cvParam.getValue();
      }

      if (cvParam.getAccession().equals("MS:1002475")) {
        countNonA = cvParam.getValue();
      }
      if (cvParam.getAccession().equals("MS:1002373")) {
        qValue = cvParam.getValue();
      }
    }

    List<ProteinDetectionHypothesis> proteinDetectionHypothesisList =
        pag.getProteinDetectionHypothesis();
    for (int i = 0; i < proteinDetectionHypothesisList.size(); i++) {
      ProteinDetectionHypothesis proteinDetectionHypothesis = proteinDetectionHypothesisList.get(i);
      if (proteinDetectionHypothesis.getDBSequenceRef().startsWith("dbseq_generic|A_"))
        aGenes = aGenes + proteinDetectionHypothesis.getDBSequenceRef() + ";";
    }

    line = countNonA + sep + scoreNonA + sep + nonAPeptide + sep + aGenes + sep + qValue;
    return line;
  }
Пример #2
0
  private ProteinDetectionHypothesis getRepresentativePDH(
      ProteinAmbiguityGroup pag, String cvAccForRep) {

    ProteinDetectionHypothesis repPDH = null;
    for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) {
      for (CvParam cvParam : pdh.getCvParam()) {
        if (cvParam.getAccession().equals(cvAccForRep)) {
          repPDH = pdh;
          break;
        }
      }
    }
    return repPDH;
  }
Пример #3
0
  private String sirToString(SpectrumIdentificationResult sir) {
    String sirString = "";

    SpectraData spectraData = spectraDataIdHashMap.get(sir.getSpectraDataRef());
    sirString += spectraData.getLocation() + sep + "\"" + sir.getSpectrumID() + "\"";

    Double rtInSeconds = -1.0;
    String spectrumTitle = "";

    // <cvParam accession="MS:1001114" name="retention time(s)"  cvRef="PSI-MS" value="3488.676"
    // unitAccession="UO:0000010" unitName="second" unitCvRef="UO" />
    //  <cvParam accession="MS:1000796" name="spectrum title"  cvRef="PSI-MS"
    // value="mam_050108o_CPTAC_study6_6E004.6805.6805.1" />
    //
    for (CvParam cvParam : sir.getCvParam()) {
      // Updated by FG: checking for old CV param 1114 or newer correct CV term 16.
      if (cvParam.getAccession().equals("MS:1001114")
          || cvParam.getAccession().equals("MS:1000016")) {
        if (cvParam.getUnitAccession().equals("UO:0000010")) {
          rtInSeconds = Double.parseDouble(cvParam.getValue());
        } else if (cvParam.getUnitAccession().equals("UO:0000031")) {
          rtInSeconds = Double.parseDouble(cvParam.getValue()) / 60; // Convert minutes to seconds
        } else {
          System.out.println("Error parsing RT - unit not recognised");
        }
      }

      if (cvParam.getAccession().equals("MS:1000796")) {
        spectrumTitle = cvParam.getValue();
      }
    }

    sirString += sep + "\"" + spectrumTitle + "\"" + sep + rtInSeconds;

    return sirString;
  }
Пример #4
0
  private String pdhToString(ProteinDetectionHypothesis pdh) {
    String pdhString =
        "\""
            + dbSequenceIdHashMap.get(pdh.getDBSequenceRef()).getAccession()
            + "\""
            + sep
            + pdh.isPassThreshold()
            + sep;

    DBSequence dbSeq = dbSequenceIdHashMap.get(pdh.getDBSequenceRef());
    String protDesc = "";

    String protGroupMembership = "";

    if (dbSeq != null) {
      for (CvParam cvParam : dbSeq.getCvParam()) {
        if (cvParam.getAccession().equals("MS:1001088")) { // Protein description

          String description = cvParam.getValue().replaceAll("\"", ""); // remove internal "
          protDesc = "\"" + description + "\"";
        }
      }
    }

    Map<String, String> mapNameToValue = new HashMap<>();

    for (CvParam cvParam : pdh.getCvParam()) {
      if (cvParam.getAccession().equals("MS:1001591")
          || cvParam.getAccession().equals("MS:1001592")
          || cvParam.getAccession().equals("MS:1001593")
          || cvParam.getAccession().equals("MS:1001594")
          || cvParam.getAccession().equals("MS:1001595")
          || cvParam.getAccession().equals("MS:1001596")
          || cvParam.getAccession().equals("MS:1001597")
          || cvParam.getAccession().equals("MS:1001598")
          || cvParam.getAccession().equals("MS:1001599")) { // Protein description
        protGroupMembership = "\"" + cvParam.getName();
        if (cvParam.getValue() != null) {
          protGroupMembership += ":" + cvParam.getValue();
        }
        protGroupMembership += "\"";
      } else {
        mapNameToValue.put(cvParam.getName(), cvParam.getValue());
      }
    }

    for (UserParam userParam : pdh.getUserParam()) {

      mapNameToValue.put(userParam.getName(), userParam.getValue());
    }

    pdhString += protDesc + sep + protGroupMembership + sep;

    // Handle scores
    for (int i = 0; i < columnToProtScoreMap.size(); i++) {
      String score = columnToProtScoreMap.get(i);
      // System.out.println("test2" + score);
      if (mapNameToValue.containsKey(score)) {
        String scoreValue = mapNameToValue.get(score);
        // System.out.println("test3" + scoreValue);
        pdhString += scoreValue + sep;
      } else {
        pdhString += sep;
      }
    }

    return pdhString;
  }
Пример #5
0
  private void init(String outputFile, String exportOption) {
    Writer out = null;
    try {
      out = new BufferedWriter(new FileWriter(outputFile));
      // Read all the objects we will need into hashes that are not automatically resolved by object
      // reference
      if (isVerbose) {
        System.out.print("About to iterate over PepEvid...");
      }
      Iterator<PeptideEvidence> iterPeptideEvidence =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.PeptideEvidence);
      while (iterPeptideEvidence.hasNext()) {
        PeptideEvidence peptideEvidence = iterPeptideEvidence.next();
        peptideEvidenceIdHashMap.put(peptideEvidence.getId(), peptideEvidence);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over Peptide");
      }
      Iterator<Peptide> iterPeptide =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.Peptide);
      while (iterPeptide.hasNext()) {
        Peptide peptide = iterPeptide.next();
        peptideIdHashMap.put(peptide.getId(), peptide);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over Spectra Data");
      }
      Iterator<SpectraData> iterSpectraData =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectraData);
      while (iterSpectraData.hasNext()) {
        SpectraData spectraData = iterSpectraData.next();
        spectraDataIdHashMap.put(spectraData.getId(), spectraData);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over DBsequence");
      }
      Iterator<DBSequence> iterDBSequence =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.DBSequence);
      while (iterDBSequence.hasNext()) {
        DBSequence dbSequence = iterDBSequence.next();
        dbSequenceIdHashMap.put(dbSequence.getId(), dbSequence);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over PDH");
      }
      Iterator<ProteinDetectionHypothesis> iterPDH =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinDetectionHypothesis);
      Integer pCounter = 0;
      while (iterPDH.hasNext()) {
        ProteinDetectionHypothesis pdh = iterPDH.next();
        pdhIdHashMap.put(pdh.getId(), pdh);

        for (CvParam cvParam : pdh.getCvParam()) {
          if (cvParam.getAccession().equals("MS:1001591")
              || cvParam.getAccession().equals("MS:1001592")
              || cvParam.getAccession().equals("MS:1001593")
              || cvParam.getAccession().equals("MS:1001594")
              || cvParam.getAccession().equals("MS:1001595")
              || cvParam.getAccession().equals("MS:1001596")
              || cvParam.getAccession().equals("MS:1001597")
              || cvParam.getAccession().equals("MS:1001598")
              || cvParam
                  .getAccession()
                  .equals("MS:1001599")) { // do nothing - these are specifically handled
            // ToDO this code could be improved using an array of values...
          } else if (cvParam.getValue() != null) {
            if (!columnToProtScoreMap.containsValue(cvParam.getName())) {
              columnToProtScoreMap.put(pCounter, cvParam.getName());
              pCounter++;
            }
          }
        }

        for (UserParam userParam : pdh.getUserParam()) {
          if (!columnToProtScoreMap.containsValue(userParam.getName())) {
            columnToProtScoreMap.put(pCounter, userParam.getName());
            pCounter++;
          }
        }
      }
      for (int i = 0; i < pCounter; i++) {
        pScoreHeader += columnToProtScoreMap.get(i) + sep;
      }
      // Now let's see what scores we have in the file
      // TODO - I'm not sure this is the fastest way to parse the files; these are unmarshalled
      // again below - inefficient?
      // Iterator<SpectrumIdentificationItem> iterSII =
      // unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationItem);
      Integer counter = 0;
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over SIR");
      }
      Iterator<SpectrumIdentificationResult> iterSIR =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationResult);
      List<SpectrumIdentificationResult> sirList = new ArrayList<>();
      while (iterSIR.hasNext()) {
        SpectrumIdentificationResult sir = iterSIR.next();
        sirList.add(sir);

        List<SpectrumIdentificationItem> listSII = sir.getSpectrumIdentificationItem();

        for (SpectrumIdentificationItem sii : listSII) {
          siiIdHashMap.put(sii.getId(), sii);
          siiIdToSirHashMap.put(sii.getId(), sir);
          for (CvParam cvParam : sii.getCvParam()) {
            if (cvParam.getValue() != null) {
              if (!columnToScoreMap.containsValue(cvParam.getName())) {
                columnToScoreMap.put(counter, cvParam.getName());
                counter++;
              }
            }
          }
        }
      }
      for (int i = 0; i < counter; i++) {
        scoreHeader += sep + columnToScoreMap.get(i);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to create output");
      }
      if (exportOption.equals("exportPSMs")) {

        out.write(spectrumHeader + psmHeader + scoreHeader);
        out.write(endPsmHeader + "\n");

        // Iterator<SpectrumIdentificationResult> iterSIR =
        // unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationResult);
        for (SpectrumIdentificationResult sir : sirList) {

          String sirLine = sirToString(sir);

          List<SpectrumIdentificationItem> listSII = sir.getSpectrumIdentificationItem();

          for (SpectrumIdentificationItem sii : listSII) {
            out.write(sirLine + sep + siiToString(sii) + "\n");
          }
        }

      } else if (exportOption.equals("exportProteinGroups")) {

        out.write(pagHeader);
        out.write(pScoreHeader);
        out.write(spectrumHeader + psmHeader + scoreHeader);
        out.write(endPsmHeader + "\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();

          String pagLine = pagToString(pag);

          // handle PDHs
          for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) {
            String pdhLine = pagLine;
            pdhLine += pdhToString(pdh);

            for (PeptideHypothesis pepH : pdh.getPeptideHypothesis()) {

              List<SpectrumIdentificationItemRef> siiRefList =
                  pepH.getSpectrumIdentificationItemRef();
              for (SpectrumIdentificationItemRef siiRef : siiRefList) {
                SpectrumIdentificationResult sir =
                    siiIdToSirHashMap.get(siiRef.getSpectrumIdentificationItemRef());

                SpectrumIdentificationItem sii =
                    siiIdHashMap.get(siiRef.getSpectrumIdentificationItemRef());
                out.write(pdhLine + sirToString(sir) + sep + siiToString(sii) + "\n");
              }
            }
          }
        }

      } else if (exportOption.equals("exportRepProteinPerPAGOnly")) {
        out.write(pagHeader);
        out.write(pScoreHeader);
        out.write("\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();
          String pagLine = pagToString(pag);

          ProteinDetectionHypothesis repPdh = getRepresentativePDH(pag, representativeProteinAcc);

          String pdhLine = pagLine;

          if (repPdh != null) {
            pdhLine += pdhToString(repPdh);
          }
          out.write(pdhLine + "\n");
        }

      } else if (exportOption.equals(
          "exportProteoAnnotator")) { // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator

        out.write(pagHeader);
        out.write(pScoreHeader);
        // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator

        out.write(exportProteoAnnotatorHeader);
        out.write("\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();
          String pagLine = pagToString(pag);

          ProteinDetectionHypothesis repPdh = getRepresentativePDH(pag, representativeProteinAcc);

          String pdhLine = pagLine;

          if (repPdh != null) {
            pdhLine += pdhToString(repPdh);
          }
          // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator
          String proteoAnnotatorLine = pdhLine;
          proteoAnnotatorLine = proteoAnnotatorLine + proteoAnnotatorLineToString(pag);
          out.write(proteoAnnotatorLine + "\n");
        }
      } else if (exportOption.equals("exportProteinsOnly")) {
        out.write(pagHeader);
        out.write(pScoreHeader);
        out.write("\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();
          String pagLine = pagToString(pag);

          // handle PDHs
          for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) {
            String pdhLine = pagLine;
            pdhLine += pdhToString(pdh);
            out.write(pdhLine + "\n");
          }
        }
      } else {
        System.out.println(
            "Error - correct usage MzIdentMLToCSV inputFile outputFile -exportType [exportProteinGroups|exportPSMs|exportProteinsOnly]");
      }
      out.close();
      System.out.println("Output written to " + outputFile);
    } catch (IOException ex) {
      String methodName = Thread.currentThread().getStackTrace()[1].getMethodName();
      String className = this.getClass().getName();
      String message =
          "The task \""
              + methodName
              + "\" in the class \""
              + className
              + "\" was not completed because of "
              + ex.getMessage()
              + "."
              + "\nPlease see the reference guide at 02 for more information on this error. https://code.google.com/p/mzidentml-lib/wiki/CommonErrors ";
      System.out.println(message);
    } finally {
      try {
        out.close();
      } catch (IOException ex) {
        String methodName = Thread.currentThread().getStackTrace()[1].getMethodName();
        String className = this.getClass().getName();
        String message =
            "The task \""
                + methodName
                + "\" in the class \""
                + className
                + "\" was not completed because of "
                + ex.getMessage()
                + "."
                + "\nPlease see the reference guide at 02 for more information on this error. https://code.google.com/p/mzidentml-lib/wiki/CommonErrors ";
        System.out.println(message);
      }
    }
  }