Exemple #1
0
  private String pdhToString(ProteinDetectionHypothesis pdh) {
    String pdhString =
        "\""
            + dbSequenceIdHashMap.get(pdh.getDBSequenceRef()).getAccession()
            + "\""
            + sep
            + pdh.isPassThreshold()
            + sep;

    DBSequence dbSeq = dbSequenceIdHashMap.get(pdh.getDBSequenceRef());
    String protDesc = "";

    String protGroupMembership = "";

    if (dbSeq != null) {
      for (CvParam cvParam : dbSeq.getCvParam()) {
        if (cvParam.getAccession().equals("MS:1001088")) { // Protein description

          String description = cvParam.getValue().replaceAll("\"", ""); // remove internal "
          protDesc = "\"" + description + "\"";
        }
      }
    }

    Map<String, String> mapNameToValue = new HashMap<>();

    for (CvParam cvParam : pdh.getCvParam()) {
      if (cvParam.getAccession().equals("MS:1001591")
          || cvParam.getAccession().equals("MS:1001592")
          || cvParam.getAccession().equals("MS:1001593")
          || cvParam.getAccession().equals("MS:1001594")
          || cvParam.getAccession().equals("MS:1001595")
          || cvParam.getAccession().equals("MS:1001596")
          || cvParam.getAccession().equals("MS:1001597")
          || cvParam.getAccession().equals("MS:1001598")
          || cvParam.getAccession().equals("MS:1001599")) { // Protein description
        protGroupMembership = "\"" + cvParam.getName();
        if (cvParam.getValue() != null) {
          protGroupMembership += ":" + cvParam.getValue();
        }
        protGroupMembership += "\"";
      } else {
        mapNameToValue.put(cvParam.getName(), cvParam.getValue());
      }
    }

    for (UserParam userParam : pdh.getUserParam()) {

      mapNameToValue.put(userParam.getName(), userParam.getValue());
    }

    pdhString += protDesc + sep + protGroupMembership + sep;

    // Handle scores
    for (int i = 0; i < columnToProtScoreMap.size(); i++) {
      String score = columnToProtScoreMap.get(i);
      // System.out.println("test2" + score);
      if (mapNameToValue.containsKey(score)) {
        String scoreValue = mapNameToValue.get(score);
        // System.out.println("test3" + scoreValue);
        pdhString += scoreValue + sep;
      } else {
        pdhString += sep;
      }
    }

    return pdhString;
  }
Exemple #2
0
  private String siiToString(SpectrumIdentificationItem sii) {
    String siiString = "";

    siiString =
        "\""
            + sii.getId()
            + "\""
            + sep
            + sii.getRank()
            + sep
            + sii.isPassThreshold()
            + sep
            + sii.getCalculatedMassToCharge()
            + sep
            + sii.getExperimentalMassToCharge()
            + sep
            + sii.getChargeState();
    Peptide pep =
        peptideIdHashMap.get(sii.getPeptideRef()); // get Peptide via the hash for this object
    siiString += sep + "\"" + pep.getPeptideSequence() + "\"";

    // Handle Mods
    siiString += sep;

    String modString = "";

    if (pep.getModification() != null) {
      int i = 0;
      for (Modification mod : pep.getModification()) {
        if (i > 0) {
          modString += ";"; // Add an extra separator between mods
        }
        modString += modToString(mod);
        i++;
      }
    }

    if (pep.getSubstitutionModification() != null) {
      int i = 0;
      for (SubstitutionModification subMod : pep.getSubstitutionModification()) {
        if (i > 0 || !modString.equals("")) {
          modString += ";"; // Add an extra separator between mods
        }
        modString += subModToString(subMod);
        i++;
      }
    }

    siiString += modString;

    Map<String, String> mapNameToValue = new HashMap<>();
    for (AbstractParam param : sii.getParamGroup()) {
      mapNameToValue.put(param.getName(), param.getValue());
      // System.out.println("test1" + param.getName() + "-> " + param.getValue());
    }

    // Handle scores
    for (int i = 0; i < columnToScoreMap.size(); i++) {
      String score = columnToScoreMap.get(i);
      // System.out.println("test2" + score);
      if (mapNameToValue.containsKey(score)) {
        String scoreValue = mapNameToValue.get(score);
        // System.out.println("test3" + scoreValue);
        siiString += sep + scoreValue;
      } else {
        siiString += sep;
      }
    }

    // Handle all protein maps
    siiString += sep + "\"";
    List<PeptideEvidenceRef> peptideEvidenceRefList = sii.getPeptideEvidenceRef();
    Boolean isDecoy = false;
    for (int i = 0; i < peptideEvidenceRefList.size(); i++) {
      PeptideEvidenceRef peptideEvidenceRef = peptideEvidenceRefList.get(i);
      PeptideEvidence peptideEvidence =
          peptideEvidenceIdHashMap.get(peptideEvidenceRef.getPeptideEvidenceRef());

      DBSequence dbSeq = dbSequenceIdHashMap.get(peptideEvidence.getDBSequenceRef());
      if (i > 0) {
        siiString += ";"; // Add an extra separator between mods
      }
      siiString +=
          dbSeq.getAccession()
              + "_"
              + peptideEvidence.getStart()
              + "_"
              + peptideEvidence.getEnd()
              + "_"
              + peptideEvidence.getPre()
              + "_"
              + peptideEvidence.getPost();
      if (peptideEvidence.isIsDecoy()) {
        isDecoy = true;
      }
    }
    siiString += "\"";

    siiString += sep + isDecoy;

    return siiString;
  }
Exemple #3
0
  private void init(String outputFile, String exportOption) {
    Writer out = null;
    try {
      out = new BufferedWriter(new FileWriter(outputFile));
      // Read all the objects we will need into hashes that are not automatically resolved by object
      // reference
      if (isVerbose) {
        System.out.print("About to iterate over PepEvid...");
      }
      Iterator<PeptideEvidence> iterPeptideEvidence =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.PeptideEvidence);
      while (iterPeptideEvidence.hasNext()) {
        PeptideEvidence peptideEvidence = iterPeptideEvidence.next();
        peptideEvidenceIdHashMap.put(peptideEvidence.getId(), peptideEvidence);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over Peptide");
      }
      Iterator<Peptide> iterPeptide =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.Peptide);
      while (iterPeptide.hasNext()) {
        Peptide peptide = iterPeptide.next();
        peptideIdHashMap.put(peptide.getId(), peptide);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over Spectra Data");
      }
      Iterator<SpectraData> iterSpectraData =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectraData);
      while (iterSpectraData.hasNext()) {
        SpectraData spectraData = iterSpectraData.next();
        spectraDataIdHashMap.put(spectraData.getId(), spectraData);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over DBsequence");
      }
      Iterator<DBSequence> iterDBSequence =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.DBSequence);
      while (iterDBSequence.hasNext()) {
        DBSequence dbSequence = iterDBSequence.next();
        dbSequenceIdHashMap.put(dbSequence.getId(), dbSequence);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over PDH");
      }
      Iterator<ProteinDetectionHypothesis> iterPDH =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinDetectionHypothesis);
      Integer pCounter = 0;
      while (iterPDH.hasNext()) {
        ProteinDetectionHypothesis pdh = iterPDH.next();
        pdhIdHashMap.put(pdh.getId(), pdh);

        for (CvParam cvParam : pdh.getCvParam()) {
          if (cvParam.getAccession().equals("MS:1001591")
              || cvParam.getAccession().equals("MS:1001592")
              || cvParam.getAccession().equals("MS:1001593")
              || cvParam.getAccession().equals("MS:1001594")
              || cvParam.getAccession().equals("MS:1001595")
              || cvParam.getAccession().equals("MS:1001596")
              || cvParam.getAccession().equals("MS:1001597")
              || cvParam.getAccession().equals("MS:1001598")
              || cvParam
                  .getAccession()
                  .equals("MS:1001599")) { // do nothing - these are specifically handled
            // ToDO this code could be improved using an array of values...
          } else if (cvParam.getValue() != null) {
            if (!columnToProtScoreMap.containsValue(cvParam.getName())) {
              columnToProtScoreMap.put(pCounter, cvParam.getName());
              pCounter++;
            }
          }
        }

        for (UserParam userParam : pdh.getUserParam()) {
          if (!columnToProtScoreMap.containsValue(userParam.getName())) {
            columnToProtScoreMap.put(pCounter, userParam.getName());
            pCounter++;
          }
        }
      }
      for (int i = 0; i < pCounter; i++) {
        pScoreHeader += columnToProtScoreMap.get(i) + sep;
      }
      // Now let's see what scores we have in the file
      // TODO - I'm not sure this is the fastest way to parse the files; these are unmarshalled
      // again below - inefficient?
      // Iterator<SpectrumIdentificationItem> iterSII =
      // unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationItem);
      Integer counter = 0;
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over SIR");
      }
      Iterator<SpectrumIdentificationResult> iterSIR =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationResult);
      List<SpectrumIdentificationResult> sirList = new ArrayList<>();
      while (iterSIR.hasNext()) {
        SpectrumIdentificationResult sir = iterSIR.next();
        sirList.add(sir);

        List<SpectrumIdentificationItem> listSII = sir.getSpectrumIdentificationItem();

        for (SpectrumIdentificationItem sii : listSII) {
          siiIdHashMap.put(sii.getId(), sii);
          siiIdToSirHashMap.put(sii.getId(), sir);
          for (CvParam cvParam : sii.getCvParam()) {
            if (cvParam.getValue() != null) {
              if (!columnToScoreMap.containsValue(cvParam.getName())) {
                columnToScoreMap.put(counter, cvParam.getName());
                counter++;
              }
            }
          }
        }
      }
      for (int i = 0; i < counter; i++) {
        scoreHeader += sep + columnToScoreMap.get(i);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to create output");
      }
      if (exportOption.equals("exportPSMs")) {

        out.write(spectrumHeader + psmHeader + scoreHeader);
        out.write(endPsmHeader + "\n");

        // Iterator<SpectrumIdentificationResult> iterSIR =
        // unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationResult);
        for (SpectrumIdentificationResult sir : sirList) {

          String sirLine = sirToString(sir);

          List<SpectrumIdentificationItem> listSII = sir.getSpectrumIdentificationItem();

          for (SpectrumIdentificationItem sii : listSII) {
            out.write(sirLine + sep + siiToString(sii) + "\n");
          }
        }

      } else if (exportOption.equals("exportProteinGroups")) {

        out.write(pagHeader);
        out.write(pScoreHeader);
        out.write(spectrumHeader + psmHeader + scoreHeader);
        out.write(endPsmHeader + "\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();

          String pagLine = pagToString(pag);

          // handle PDHs
          for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) {
            String pdhLine = pagLine;
            pdhLine += pdhToString(pdh);

            for (PeptideHypothesis pepH : pdh.getPeptideHypothesis()) {

              List<SpectrumIdentificationItemRef> siiRefList =
                  pepH.getSpectrumIdentificationItemRef();
              for (SpectrumIdentificationItemRef siiRef : siiRefList) {
                SpectrumIdentificationResult sir =
                    siiIdToSirHashMap.get(siiRef.getSpectrumIdentificationItemRef());

                SpectrumIdentificationItem sii =
                    siiIdHashMap.get(siiRef.getSpectrumIdentificationItemRef());
                out.write(pdhLine + sirToString(sir) + sep + siiToString(sii) + "\n");
              }
            }
          }
        }

      } else if (exportOption.equals("exportRepProteinPerPAGOnly")) {
        out.write(pagHeader);
        out.write(pScoreHeader);
        out.write("\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();
          String pagLine = pagToString(pag);

          ProteinDetectionHypothesis repPdh = getRepresentativePDH(pag, representativeProteinAcc);

          String pdhLine = pagLine;

          if (repPdh != null) {
            pdhLine += pdhToString(repPdh);
          }
          out.write(pdhLine + "\n");
        }

      } else if (exportOption.equals(
          "exportProteoAnnotator")) { // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator

        out.write(pagHeader);
        out.write(pScoreHeader);
        // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator

        out.write(exportProteoAnnotatorHeader);
        out.write("\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();
          String pagLine = pagToString(pag);

          ProteinDetectionHypothesis repPdh = getRepresentativePDH(pag, representativeProteinAcc);

          String pdhLine = pagLine;

          if (repPdh != null) {
            pdhLine += pdhToString(repPdh);
          }
          // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator
          String proteoAnnotatorLine = pdhLine;
          proteoAnnotatorLine = proteoAnnotatorLine + proteoAnnotatorLineToString(pag);
          out.write(proteoAnnotatorLine + "\n");
        }
      } else if (exportOption.equals("exportProteinsOnly")) {
        out.write(pagHeader);
        out.write(pScoreHeader);
        out.write("\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();
          String pagLine = pagToString(pag);

          // handle PDHs
          for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) {
            String pdhLine = pagLine;
            pdhLine += pdhToString(pdh);
            out.write(pdhLine + "\n");
          }
        }
      } else {
        System.out.println(
            "Error - correct usage MzIdentMLToCSV inputFile outputFile -exportType [exportProteinGroups|exportPSMs|exportProteinsOnly]");
      }
      out.close();
      System.out.println("Output written to " + outputFile);
    } catch (IOException ex) {
      String methodName = Thread.currentThread().getStackTrace()[1].getMethodName();
      String className = this.getClass().getName();
      String message =
          "The task \""
              + methodName
              + "\" in the class \""
              + className
              + "\" was not completed because of "
              + ex.getMessage()
              + "."
              + "\nPlease see the reference guide at 02 for more information on this error. https://code.google.com/p/mzidentml-lib/wiki/CommonErrors ";
      System.out.println(message);
    } finally {
      try {
        out.close();
      } catch (IOException ex) {
        String methodName = Thread.currentThread().getStackTrace()[1].getMethodName();
        String className = this.getClass().getName();
        String message =
            "The task \""
                + methodName
                + "\" in the class \""
                + className
                + "\" was not completed because of "
                + ex.getMessage()
                + "."
                + "\nPlease see the reference guide at 02 for more information on this error. https://code.google.com/p/mzidentml-lib/wiki/CommonErrors ";
        System.out.println(message);
      }
    }
  }