Пример #1
0
  /*
   * Method to convert an mzid Mod element into a string of type
   * ModName@location
   */
  private String modToString(Modification mod) {

    String modString = "";

    if (mod.getCvParam() != null) {

      String modName = "";
      for (CvParam cvParam : mod.getCvParam()) {
        modName = cvParam.getName();
      }

      modString += modName;
    } else {

      if (mod.getMonoisotopicMassDelta() != null) {
        modString += mod.getMonoisotopicMassDelta();
      } else if (mod.getAvgMassDelta() != null) {
        modString += mod.getAvgMassDelta();
      }
    }

    if (mod.getLocation() != null) {
      modString += ":" + mod.getLocation();
    }
    return modString;
  }
Пример #2
0
  private String pagToString(ProteinAmbiguityGroup pag) {
    String pagString = pag.getId() + sep;

    if (pag.getCvParam().isEmpty()) {
      CvParam scoreParam = pag.getCvParam().get(0);
      pagString += scoreParam.getValue() + sep;
    } else {
      pagString += sep;
    }

    return pagString;
  }
Пример #3
0
  private ProteinDetectionHypothesis getRepresentativePDH(
      ProteinAmbiguityGroup pag, String cvAccForRep) {

    ProteinDetectionHypothesis repPDH = null;
    for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) {
      for (CvParam cvParam : pdh.getCvParam()) {
        if (cvParam.getAccession().equals(cvAccForRep)) {
          repPDH = pdh;
          break;
        }
      }
    }
    return repPDH;
  }
Пример #4
0
  private String sirToString(SpectrumIdentificationResult sir) {
    String sirString = "";

    SpectraData spectraData = spectraDataIdHashMap.get(sir.getSpectraDataRef());
    sirString += spectraData.getLocation() + sep + "\"" + sir.getSpectrumID() + "\"";

    Double rtInSeconds = -1.0;
    String spectrumTitle = "";

    // <cvParam accession="MS:1001114" name="retention time(s)"  cvRef="PSI-MS" value="3488.676"
    // unitAccession="UO:0000010" unitName="second" unitCvRef="UO" />
    //  <cvParam accession="MS:1000796" name="spectrum title"  cvRef="PSI-MS"
    // value="mam_050108o_CPTAC_study6_6E004.6805.6805.1" />
    //
    for (CvParam cvParam : sir.getCvParam()) {
      // Updated by FG: checking for old CV param 1114 or newer correct CV term 16.
      if (cvParam.getAccession().equals("MS:1001114")
          || cvParam.getAccession().equals("MS:1000016")) {
        if (cvParam.getUnitAccession().equals("UO:0000010")) {
          rtInSeconds = Double.parseDouble(cvParam.getValue());
        } else if (cvParam.getUnitAccession().equals("UO:0000031")) {
          rtInSeconds = Double.parseDouble(cvParam.getValue()) / 60; // Convert minutes to seconds
        } else {
          System.out.println("Error parsing RT - unit not recognised");
        }
      }

      if (cvParam.getAccession().equals("MS:1000796")) {
        spectrumTitle = cvParam.getValue();
      }
    }

    sirString += sep + "\"" + spectrumTitle + "\"" + sep + rtInSeconds;

    return sirString;
  }
Пример #5
0
  // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator
  private String proteoAnnotatorLineToString(ProteinAmbiguityGroup pag) {

    String line = "";
    List<UserParam> userParams = pag.getUserParam();
    String countNonA = "";
    String scoreNonA = "";
    String nonAPeptide = "";
    String aGenes = "";
    String qValue = "";
    for (int i = 0; i < userParams.size(); i++) {
      UserParam userParam = userParams.get(i);

      if (userParam.getName().equals("nonAPeptide")) {
        nonAPeptide = userParam.getValue();
      }
    }
    List<CvParam> cvParamList = pag.getCvParam();
    for (int i = 0; i < cvParamList.size(); i++) {
      CvParam cvParam = cvParamList.get(i);
      if (cvParam.getAccession().equals("MS:1002474")) {
        scoreNonA = cvParam.getValue();
      }

      if (cvParam.getAccession().equals("MS:1002475")) {
        countNonA = cvParam.getValue();
      }
      if (cvParam.getAccession().equals("MS:1002373")) {
        qValue = cvParam.getValue();
      }
    }

    List<ProteinDetectionHypothesis> proteinDetectionHypothesisList =
        pag.getProteinDetectionHypothesis();
    for (int i = 0; i < proteinDetectionHypothesisList.size(); i++) {
      ProteinDetectionHypothesis proteinDetectionHypothesis = proteinDetectionHypothesisList.get(i);
      if (proteinDetectionHypothesis.getDBSequenceRef().startsWith("dbseq_generic|A_"))
        aGenes = aGenes + proteinDetectionHypothesis.getDBSequenceRef() + ";";
    }

    line = countNonA + sep + scoreNonA + sep + nonAPeptide + sep + aGenes + sep + qValue;
    return line;
  }
Пример #6
0
  private String pdhToString(ProteinDetectionHypothesis pdh) {
    String pdhString =
        "\""
            + dbSequenceIdHashMap.get(pdh.getDBSequenceRef()).getAccession()
            + "\""
            + sep
            + pdh.isPassThreshold()
            + sep;

    DBSequence dbSeq = dbSequenceIdHashMap.get(pdh.getDBSequenceRef());
    String protDesc = "";

    String protGroupMembership = "";

    if (dbSeq != null) {
      for (CvParam cvParam : dbSeq.getCvParam()) {
        if (cvParam.getAccession().equals("MS:1001088")) { // Protein description

          String description = cvParam.getValue().replaceAll("\"", ""); // remove internal "
          protDesc = "\"" + description + "\"";
        }
      }
    }

    Map<String, String> mapNameToValue = new HashMap<>();

    for (CvParam cvParam : pdh.getCvParam()) {
      if (cvParam.getAccession().equals("MS:1001591")
          || cvParam.getAccession().equals("MS:1001592")
          || cvParam.getAccession().equals("MS:1001593")
          || cvParam.getAccession().equals("MS:1001594")
          || cvParam.getAccession().equals("MS:1001595")
          || cvParam.getAccession().equals("MS:1001596")
          || cvParam.getAccession().equals("MS:1001597")
          || cvParam.getAccession().equals("MS:1001598")
          || cvParam.getAccession().equals("MS:1001599")) { // Protein description
        protGroupMembership = "\"" + cvParam.getName();
        if (cvParam.getValue() != null) {
          protGroupMembership += ":" + cvParam.getValue();
        }
        protGroupMembership += "\"";
      } else {
        mapNameToValue.put(cvParam.getName(), cvParam.getValue());
      }
    }

    for (UserParam userParam : pdh.getUserParam()) {

      mapNameToValue.put(userParam.getName(), userParam.getValue());
    }

    pdhString += protDesc + sep + protGroupMembership + sep;

    // Handle scores
    for (int i = 0; i < columnToProtScoreMap.size(); i++) {
      String score = columnToProtScoreMap.get(i);
      // System.out.println("test2" + score);
      if (mapNameToValue.containsKey(score)) {
        String scoreValue = mapNameToValue.get(score);
        // System.out.println("test3" + scoreValue);
        pdhString += scoreValue + sep;
      } else {
        pdhString += sep;
      }
    }

    return pdhString;
  }
Пример #7
0
  private void init(String outputFile, String exportOption) {
    Writer out = null;
    try {
      out = new BufferedWriter(new FileWriter(outputFile));
      // Read all the objects we will need into hashes that are not automatically resolved by object
      // reference
      if (isVerbose) {
        System.out.print("About to iterate over PepEvid...");
      }
      Iterator<PeptideEvidence> iterPeptideEvidence =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.PeptideEvidence);
      while (iterPeptideEvidence.hasNext()) {
        PeptideEvidence peptideEvidence = iterPeptideEvidence.next();
        peptideEvidenceIdHashMap.put(peptideEvidence.getId(), peptideEvidence);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over Peptide");
      }
      Iterator<Peptide> iterPeptide =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.Peptide);
      while (iterPeptide.hasNext()) {
        Peptide peptide = iterPeptide.next();
        peptideIdHashMap.put(peptide.getId(), peptide);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over Spectra Data");
      }
      Iterator<SpectraData> iterSpectraData =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectraData);
      while (iterSpectraData.hasNext()) {
        SpectraData spectraData = iterSpectraData.next();
        spectraDataIdHashMap.put(spectraData.getId(), spectraData);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over DBsequence");
      }
      Iterator<DBSequence> iterDBSequence =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.DBSequence);
      while (iterDBSequence.hasNext()) {
        DBSequence dbSequence = iterDBSequence.next();
        dbSequenceIdHashMap.put(dbSequence.getId(), dbSequence);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over PDH");
      }
      Iterator<ProteinDetectionHypothesis> iterPDH =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinDetectionHypothesis);
      Integer pCounter = 0;
      while (iterPDH.hasNext()) {
        ProteinDetectionHypothesis pdh = iterPDH.next();
        pdhIdHashMap.put(pdh.getId(), pdh);

        for (CvParam cvParam : pdh.getCvParam()) {
          if (cvParam.getAccession().equals("MS:1001591")
              || cvParam.getAccession().equals("MS:1001592")
              || cvParam.getAccession().equals("MS:1001593")
              || cvParam.getAccession().equals("MS:1001594")
              || cvParam.getAccession().equals("MS:1001595")
              || cvParam.getAccession().equals("MS:1001596")
              || cvParam.getAccession().equals("MS:1001597")
              || cvParam.getAccession().equals("MS:1001598")
              || cvParam
                  .getAccession()
                  .equals("MS:1001599")) { // do nothing - these are specifically handled
            // ToDO this code could be improved using an array of values...
          } else if (cvParam.getValue() != null) {
            if (!columnToProtScoreMap.containsValue(cvParam.getName())) {
              columnToProtScoreMap.put(pCounter, cvParam.getName());
              pCounter++;
            }
          }
        }

        for (UserParam userParam : pdh.getUserParam()) {
          if (!columnToProtScoreMap.containsValue(userParam.getName())) {
            columnToProtScoreMap.put(pCounter, userParam.getName());
            pCounter++;
          }
        }
      }
      for (int i = 0; i < pCounter; i++) {
        pScoreHeader += columnToProtScoreMap.get(i) + sep;
      }
      // Now let's see what scores we have in the file
      // TODO - I'm not sure this is the fastest way to parse the files; these are unmarshalled
      // again below - inefficient?
      // Iterator<SpectrumIdentificationItem> iterSII =
      // unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationItem);
      Integer counter = 0;
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over SIR");
      }
      Iterator<SpectrumIdentificationResult> iterSIR =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationResult);
      List<SpectrumIdentificationResult> sirList = new ArrayList<>();
      while (iterSIR.hasNext()) {
        SpectrumIdentificationResult sir = iterSIR.next();
        sirList.add(sir);

        List<SpectrumIdentificationItem> listSII = sir.getSpectrumIdentificationItem();

        for (SpectrumIdentificationItem sii : listSII) {
          siiIdHashMap.put(sii.getId(), sii);
          siiIdToSirHashMap.put(sii.getId(), sir);
          for (CvParam cvParam : sii.getCvParam()) {
            if (cvParam.getValue() != null) {
              if (!columnToScoreMap.containsValue(cvParam.getName())) {
                columnToScoreMap.put(counter, cvParam.getName());
                counter++;
              }
            }
          }
        }
      }
      for (int i = 0; i < counter; i++) {
        scoreHeader += sep + columnToScoreMap.get(i);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to create output");
      }
      if (exportOption.equals("exportPSMs")) {

        out.write(spectrumHeader + psmHeader + scoreHeader);
        out.write(endPsmHeader + "\n");

        // Iterator<SpectrumIdentificationResult> iterSIR =
        // unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationResult);
        for (SpectrumIdentificationResult sir : sirList) {

          String sirLine = sirToString(sir);

          List<SpectrumIdentificationItem> listSII = sir.getSpectrumIdentificationItem();

          for (SpectrumIdentificationItem sii : listSII) {
            out.write(sirLine + sep + siiToString(sii) + "\n");
          }
        }

      } else if (exportOption.equals("exportProteinGroups")) {

        out.write(pagHeader);
        out.write(pScoreHeader);
        out.write(spectrumHeader + psmHeader + scoreHeader);
        out.write(endPsmHeader + "\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();

          String pagLine = pagToString(pag);

          // handle PDHs
          for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) {
            String pdhLine = pagLine;
            pdhLine += pdhToString(pdh);

            for (PeptideHypothesis pepH : pdh.getPeptideHypothesis()) {

              List<SpectrumIdentificationItemRef> siiRefList =
                  pepH.getSpectrumIdentificationItemRef();
              for (SpectrumIdentificationItemRef siiRef : siiRefList) {
                SpectrumIdentificationResult sir =
                    siiIdToSirHashMap.get(siiRef.getSpectrumIdentificationItemRef());

                SpectrumIdentificationItem sii =
                    siiIdHashMap.get(siiRef.getSpectrumIdentificationItemRef());
                out.write(pdhLine + sirToString(sir) + sep + siiToString(sii) + "\n");
              }
            }
          }
        }

      } else if (exportOption.equals("exportRepProteinPerPAGOnly")) {
        out.write(pagHeader);
        out.write(pScoreHeader);
        out.write("\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();
          String pagLine = pagToString(pag);

          ProteinDetectionHypothesis repPdh = getRepresentativePDH(pag, representativeProteinAcc);

          String pdhLine = pagLine;

          if (repPdh != null) {
            pdhLine += pdhToString(repPdh);
          }
          out.write(pdhLine + "\n");
        }

      } else if (exportOption.equals(
          "exportProteoAnnotator")) { // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator

        out.write(pagHeader);
        out.write(pScoreHeader);
        // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator

        out.write(exportProteoAnnotatorHeader);
        out.write("\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();
          String pagLine = pagToString(pag);

          ProteinDetectionHypothesis repPdh = getRepresentativePDH(pag, representativeProteinAcc);

          String pdhLine = pagLine;

          if (repPdh != null) {
            pdhLine += pdhToString(repPdh);
          }
          // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator
          String proteoAnnotatorLine = pdhLine;
          proteoAnnotatorLine = proteoAnnotatorLine + proteoAnnotatorLineToString(pag);
          out.write(proteoAnnotatorLine + "\n");
        }
      } else if (exportOption.equals("exportProteinsOnly")) {
        out.write(pagHeader);
        out.write(pScoreHeader);
        out.write("\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();
          String pagLine = pagToString(pag);

          // handle PDHs
          for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) {
            String pdhLine = pagLine;
            pdhLine += pdhToString(pdh);
            out.write(pdhLine + "\n");
          }
        }
      } else {
        System.out.println(
            "Error - correct usage MzIdentMLToCSV inputFile outputFile -exportType [exportProteinGroups|exportPSMs|exportProteinsOnly]");
      }
      out.close();
      System.out.println("Output written to " + outputFile);
    } catch (IOException ex) {
      String methodName = Thread.currentThread().getStackTrace()[1].getMethodName();
      String className = this.getClass().getName();
      String message =
          "The task \""
              + methodName
              + "\" in the class \""
              + className
              + "\" was not completed because of "
              + ex.getMessage()
              + "."
              + "\nPlease see the reference guide at 02 for more information on this error. https://code.google.com/p/mzidentml-lib/wiki/CommonErrors ";
      System.out.println(message);
    } finally {
      try {
        out.close();
      } catch (IOException ex) {
        String methodName = Thread.currentThread().getStackTrace()[1].getMethodName();
        String className = this.getClass().getName();
        String message =
            "The task \""
                + methodName
                + "\" in the class \""
                + className
                + "\" was not completed because of "
                + ex.getMessage()
                + "."
                + "\nPlease see the reference guide at 02 for more information on this error. https://code.google.com/p/mzidentml-lib/wiki/CommonErrors ";
        System.out.println(message);
      }
    }
  }
Пример #8
0
  private static SearchModification createPSIModification(
      com.compomics.mascotdatfile.util.interfaces.Modification mod, UnimodParser uniModParser) {
    SearchModification searchMod = new SearchModification();

    if (mod instanceof VariableModification) {
      searchMod.setFixedMod(false);
    } else {
      searchMod.setFixedMod(true);
    }

    if (mod.getLocation().contains("term") || mod.getLocation().contains("Term")) {

      OntologyConstants modConstant = null;
      if (mod.getLocation().startsWith("Protein N")) {
        modConstant = OntologyConstants.MODIFICATION_SPECIFICITY_PROTEIN_N_TERM;
      } else if (mod.getLocation().startsWith("Protein C")) {
        modConstant = OntologyConstants.MODIFICATION_SPECIFICITY_PROTEIN_C_TERM;
      } else if (mod.getLocation().startsWith("N")) {
        modConstant = OntologyConstants.MODIFICATION_SPECIFICITY_PEP_N_TERM;
      } else if (mod.getLocation().startsWith("C")) {
        modConstant = OntologyConstants.MODIFICATION_SPECIFICITY_PEP_C_TERM;
      }

      if (modConstant != null) {
        CvParam specificity = MzIdentMLTools.createPSICvParam(modConstant, null);

        SpecificityRules specRules = new SpecificityRules();
        specRules.getCvParam().add(specificity);
        searchMod.getSpecificityRules().add(specRules);

        String[] residues = mod.getLocation().split("erm");
        if (residues.length > 1) {
          for (Character residue : residues[1].trim().toCharArray()) {
            if (residue != ' ') {
              searchMod.getResidues().add(residue.toString());
            }
          }
        } else {
          searchMod.getResidues().add(".");
        }
      }
    } else {
      for (Character residue : mod.getLocation().toCharArray()) {
        searchMod.getResidues().add(residue.toString());
      }
    }
    searchMod.setMassDelta((float) mod.getMass());

    ModT unimod =
        uniModParser.getModificationByNameAndMass(
            mod.getType(), mod.getMass(), searchMod.getResidues());
    if (unimod != null) {
      CvParam cvParam = new CvParam();
      cvParam.setAccession("UNIMOD:" + unimod.getRecordId());
      cvParam.setCv(UnimodParser.getCv());
      cvParam.setName(unimod.getTitle());
      searchMod.getCvParam().add(cvParam);
    }

    return searchMod;
  }