/* * Method to convert an mzid Mod element into a string of type * ModName@location */ private String modToString(Modification mod) { String modString = ""; if (mod.getCvParam() != null) { String modName = ""; for (CvParam cvParam : mod.getCvParam()) { modName = cvParam.getName(); } modString += modName; } else { if (mod.getMonoisotopicMassDelta() != null) { modString += mod.getMonoisotopicMassDelta(); } else if (mod.getAvgMassDelta() != null) { modString += mod.getAvgMassDelta(); } } if (mod.getLocation() != null) { modString += ":" + mod.getLocation(); } return modString; }
private String pagToString(ProteinAmbiguityGroup pag) { String pagString = pag.getId() + sep; if (pag.getCvParam().isEmpty()) { CvParam scoreParam = pag.getCvParam().get(0); pagString += scoreParam.getValue() + sep; } else { pagString += sep; } return pagString; }
private ProteinDetectionHypothesis getRepresentativePDH( ProteinAmbiguityGroup pag, String cvAccForRep) { ProteinDetectionHypothesis repPDH = null; for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) { for (CvParam cvParam : pdh.getCvParam()) { if (cvParam.getAccession().equals(cvAccForRep)) { repPDH = pdh; break; } } } return repPDH; }
private String sirToString(SpectrumIdentificationResult sir) { String sirString = ""; SpectraData spectraData = spectraDataIdHashMap.get(sir.getSpectraDataRef()); sirString += spectraData.getLocation() + sep + "\"" + sir.getSpectrumID() + "\""; Double rtInSeconds = -1.0; String spectrumTitle = ""; // <cvParam accession="MS:1001114" name="retention time(s)" cvRef="PSI-MS" value="3488.676" // unitAccession="UO:0000010" unitName="second" unitCvRef="UO" /> // <cvParam accession="MS:1000796" name="spectrum title" cvRef="PSI-MS" // value="mam_050108o_CPTAC_study6_6E004.6805.6805.1" /> // for (CvParam cvParam : sir.getCvParam()) { // Updated by FG: checking for old CV param 1114 or newer correct CV term 16. if (cvParam.getAccession().equals("MS:1001114") || cvParam.getAccession().equals("MS:1000016")) { if (cvParam.getUnitAccession().equals("UO:0000010")) { rtInSeconds = Double.parseDouble(cvParam.getValue()); } else if (cvParam.getUnitAccession().equals("UO:0000031")) { rtInSeconds = Double.parseDouble(cvParam.getValue()) / 60; // Convert minutes to seconds } else { System.out.println("Error parsing RT - unit not recognised"); } } if (cvParam.getAccession().equals("MS:1000796")) { spectrumTitle = cvParam.getValue(); } } sirString += sep + "\"" + spectrumTitle + "\"" + sep + rtInSeconds; return sirString; }
// Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator private String proteoAnnotatorLineToString(ProteinAmbiguityGroup pag) { String line = ""; List<UserParam> userParams = pag.getUserParam(); String countNonA = ""; String scoreNonA = ""; String nonAPeptide = ""; String aGenes = ""; String qValue = ""; for (int i = 0; i < userParams.size(); i++) { UserParam userParam = userParams.get(i); if (userParam.getName().equals("nonAPeptide")) { nonAPeptide = userParam.getValue(); } } List<CvParam> cvParamList = pag.getCvParam(); for (int i = 0; i < cvParamList.size(); i++) { CvParam cvParam = cvParamList.get(i); if (cvParam.getAccession().equals("MS:1002474")) { scoreNonA = cvParam.getValue(); } if (cvParam.getAccession().equals("MS:1002475")) { countNonA = cvParam.getValue(); } if (cvParam.getAccession().equals("MS:1002373")) { qValue = cvParam.getValue(); } } List<ProteinDetectionHypothesis> proteinDetectionHypothesisList = pag.getProteinDetectionHypothesis(); for (int i = 0; i < proteinDetectionHypothesisList.size(); i++) { ProteinDetectionHypothesis proteinDetectionHypothesis = proteinDetectionHypothesisList.get(i); if (proteinDetectionHypothesis.getDBSequenceRef().startsWith("dbseq_generic|A_")) aGenes = aGenes + proteinDetectionHypothesis.getDBSequenceRef() + ";"; } line = countNonA + sep + scoreNonA + sep + nonAPeptide + sep + aGenes + sep + qValue; return line; }
private String pdhToString(ProteinDetectionHypothesis pdh) { String pdhString = "\"" + dbSequenceIdHashMap.get(pdh.getDBSequenceRef()).getAccession() + "\"" + sep + pdh.isPassThreshold() + sep; DBSequence dbSeq = dbSequenceIdHashMap.get(pdh.getDBSequenceRef()); String protDesc = ""; String protGroupMembership = ""; if (dbSeq != null) { for (CvParam cvParam : dbSeq.getCvParam()) { if (cvParam.getAccession().equals("MS:1001088")) { // Protein description String description = cvParam.getValue().replaceAll("\"", ""); // remove internal " protDesc = "\"" + description + "\""; } } } Map<String, String> mapNameToValue = new HashMap<>(); for (CvParam cvParam : pdh.getCvParam()) { if (cvParam.getAccession().equals("MS:1001591") || cvParam.getAccession().equals("MS:1001592") || cvParam.getAccession().equals("MS:1001593") || cvParam.getAccession().equals("MS:1001594") || cvParam.getAccession().equals("MS:1001595") || cvParam.getAccession().equals("MS:1001596") || cvParam.getAccession().equals("MS:1001597") || cvParam.getAccession().equals("MS:1001598") || cvParam.getAccession().equals("MS:1001599")) { // Protein description protGroupMembership = "\"" + cvParam.getName(); if (cvParam.getValue() != null) { protGroupMembership += ":" + cvParam.getValue(); } protGroupMembership += "\""; } else { mapNameToValue.put(cvParam.getName(), cvParam.getValue()); } } for (UserParam userParam : pdh.getUserParam()) { mapNameToValue.put(userParam.getName(), userParam.getValue()); } pdhString += protDesc + sep + protGroupMembership + sep; // Handle scores for (int i = 0; i < columnToProtScoreMap.size(); i++) { String score = columnToProtScoreMap.get(i); // System.out.println("test2" + score); if (mapNameToValue.containsKey(score)) { String scoreValue = mapNameToValue.get(score); // System.out.println("test3" + scoreValue); pdhString += scoreValue + sep; } else { pdhString += sep; } } return pdhString; }
private void init(String outputFile, String exportOption) { Writer out = null; try { out = new BufferedWriter(new FileWriter(outputFile)); // Read all the objects we will need into hashes that are not automatically resolved by object // reference if (isVerbose) { System.out.print("About to iterate over PepEvid..."); } Iterator<PeptideEvidence> iterPeptideEvidence = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.PeptideEvidence); while (iterPeptideEvidence.hasNext()) { PeptideEvidence peptideEvidence = iterPeptideEvidence.next(); peptideEvidenceIdHashMap.put(peptideEvidence.getId(), peptideEvidence); } if (isVerbose) { System.out.println("...done"); System.out.print("About to iterate over Peptide"); } Iterator<Peptide> iterPeptide = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.Peptide); while (iterPeptide.hasNext()) { Peptide peptide = iterPeptide.next(); peptideIdHashMap.put(peptide.getId(), peptide); } if (isVerbose) { System.out.println("...done"); System.out.print("About to iterate over Spectra Data"); } Iterator<SpectraData> iterSpectraData = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectraData); while (iterSpectraData.hasNext()) { SpectraData spectraData = iterSpectraData.next(); spectraDataIdHashMap.put(spectraData.getId(), spectraData); } if (isVerbose) { System.out.println("...done"); System.out.print("About to iterate over DBsequence"); } Iterator<DBSequence> iterDBSequence = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.DBSequence); while (iterDBSequence.hasNext()) { DBSequence dbSequence = iterDBSequence.next(); dbSequenceIdHashMap.put(dbSequence.getId(), dbSequence); } if (isVerbose) { System.out.println("...done"); System.out.print("About to iterate over PDH"); } Iterator<ProteinDetectionHypothesis> iterPDH = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinDetectionHypothesis); Integer pCounter = 0; while (iterPDH.hasNext()) { ProteinDetectionHypothesis pdh = iterPDH.next(); pdhIdHashMap.put(pdh.getId(), pdh); for (CvParam cvParam : pdh.getCvParam()) { if (cvParam.getAccession().equals("MS:1001591") || cvParam.getAccession().equals("MS:1001592") || cvParam.getAccession().equals("MS:1001593") || cvParam.getAccession().equals("MS:1001594") || cvParam.getAccession().equals("MS:1001595") || cvParam.getAccession().equals("MS:1001596") || cvParam.getAccession().equals("MS:1001597") || cvParam.getAccession().equals("MS:1001598") || cvParam .getAccession() .equals("MS:1001599")) { // do nothing - these are specifically handled // ToDO this code could be improved using an array of values... } else if (cvParam.getValue() != null) { if (!columnToProtScoreMap.containsValue(cvParam.getName())) { columnToProtScoreMap.put(pCounter, cvParam.getName()); pCounter++; } } } for (UserParam userParam : pdh.getUserParam()) { if (!columnToProtScoreMap.containsValue(userParam.getName())) { columnToProtScoreMap.put(pCounter, userParam.getName()); pCounter++; } } } for (int i = 0; i < pCounter; i++) { pScoreHeader += columnToProtScoreMap.get(i) + sep; } // Now let's see what scores we have in the file // TODO - I'm not sure this is the fastest way to parse the files; these are unmarshalled // again below - inefficient? // Iterator<SpectrumIdentificationItem> iterSII = // unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationItem); Integer counter = 0; if (isVerbose) { System.out.println("...done"); System.out.print("About to iterate over SIR"); } Iterator<SpectrumIdentificationResult> iterSIR = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationResult); List<SpectrumIdentificationResult> sirList = new ArrayList<>(); while (iterSIR.hasNext()) { SpectrumIdentificationResult sir = iterSIR.next(); sirList.add(sir); List<SpectrumIdentificationItem> listSII = sir.getSpectrumIdentificationItem(); for (SpectrumIdentificationItem sii : listSII) { siiIdHashMap.put(sii.getId(), sii); siiIdToSirHashMap.put(sii.getId(), sir); for (CvParam cvParam : sii.getCvParam()) { if (cvParam.getValue() != null) { if (!columnToScoreMap.containsValue(cvParam.getName())) { columnToScoreMap.put(counter, cvParam.getName()); counter++; } } } } } for (int i = 0; i < counter; i++) { scoreHeader += sep + columnToScoreMap.get(i); } if (isVerbose) { System.out.println("...done"); System.out.print("About to create output"); } if (exportOption.equals("exportPSMs")) { out.write(spectrumHeader + psmHeader + scoreHeader); out.write(endPsmHeader + "\n"); // Iterator<SpectrumIdentificationResult> iterSIR = // unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationResult); for (SpectrumIdentificationResult sir : sirList) { String sirLine = sirToString(sir); List<SpectrumIdentificationItem> listSII = sir.getSpectrumIdentificationItem(); for (SpectrumIdentificationItem sii : listSII) { out.write(sirLine + sep + siiToString(sii) + "\n"); } } } else if (exportOption.equals("exportProteinGroups")) { out.write(pagHeader); out.write(pScoreHeader); out.write(spectrumHeader + psmHeader + scoreHeader); out.write(endPsmHeader + "\n"); Iterator<ProteinAmbiguityGroup> iterPAG = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup); while (iterPAG.hasNext()) { ProteinAmbiguityGroup pag = iterPAG.next(); String pagLine = pagToString(pag); // handle PDHs for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) { String pdhLine = pagLine; pdhLine += pdhToString(pdh); for (PeptideHypothesis pepH : pdh.getPeptideHypothesis()) { List<SpectrumIdentificationItemRef> siiRefList = pepH.getSpectrumIdentificationItemRef(); for (SpectrumIdentificationItemRef siiRef : siiRefList) { SpectrumIdentificationResult sir = siiIdToSirHashMap.get(siiRef.getSpectrumIdentificationItemRef()); SpectrumIdentificationItem sii = siiIdHashMap.get(siiRef.getSpectrumIdentificationItemRef()); out.write(pdhLine + sirToString(sir) + sep + siiToString(sii) + "\n"); } } } } } else if (exportOption.equals("exportRepProteinPerPAGOnly")) { out.write(pagHeader); out.write(pScoreHeader); out.write("\n"); Iterator<ProteinAmbiguityGroup> iterPAG = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup); while (iterPAG.hasNext()) { ProteinAmbiguityGroup pag = iterPAG.next(); String pagLine = pagToString(pag); ProteinDetectionHypothesis repPdh = getRepresentativePDH(pag, representativeProteinAcc); String pdhLine = pagLine; if (repPdh != null) { pdhLine += pdhToString(repPdh); } out.write(pdhLine + "\n"); } } else if (exportOption.equals( "exportProteoAnnotator")) { // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator out.write(pagHeader); out.write(pScoreHeader); // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator out.write(exportProteoAnnotatorHeader); out.write("\n"); Iterator<ProteinAmbiguityGroup> iterPAG = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup); while (iterPAG.hasNext()) { ProteinAmbiguityGroup pag = iterPAG.next(); String pagLine = pagToString(pag); ProteinDetectionHypothesis repPdh = getRepresentativePDH(pag, representativeProteinAcc); String pdhLine = pagLine; if (repPdh != null) { pdhLine += pdhToString(repPdh); } // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator String proteoAnnotatorLine = pdhLine; proteoAnnotatorLine = proteoAnnotatorLine + proteoAnnotatorLineToString(pag); out.write(proteoAnnotatorLine + "\n"); } } else if (exportOption.equals("exportProteinsOnly")) { out.write(pagHeader); out.write(pScoreHeader); out.write("\n"); Iterator<ProteinAmbiguityGroup> iterPAG = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup); while (iterPAG.hasNext()) { ProteinAmbiguityGroup pag = iterPAG.next(); String pagLine = pagToString(pag); // handle PDHs for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) { String pdhLine = pagLine; pdhLine += pdhToString(pdh); out.write(pdhLine + "\n"); } } } else { System.out.println( "Error - correct usage MzIdentMLToCSV inputFile outputFile -exportType [exportProteinGroups|exportPSMs|exportProteinsOnly]"); } out.close(); System.out.println("Output written to " + outputFile); } catch (IOException ex) { String methodName = Thread.currentThread().getStackTrace()[1].getMethodName(); String className = this.getClass().getName(); String message = "The task \"" + methodName + "\" in the class \"" + className + "\" was not completed because of " + ex.getMessage() + "." + "\nPlease see the reference guide at 02 for more information on this error. https://code.google.com/p/mzidentml-lib/wiki/CommonErrors "; System.out.println(message); } finally { try { out.close(); } catch (IOException ex) { String methodName = Thread.currentThread().getStackTrace()[1].getMethodName(); String className = this.getClass().getName(); String message = "The task \"" + methodName + "\" in the class \"" + className + "\" was not completed because of " + ex.getMessage() + "." + "\nPlease see the reference guide at 02 for more information on this error. https://code.google.com/p/mzidentml-lib/wiki/CommonErrors "; System.out.println(message); } } }
private static SearchModification createPSIModification( com.compomics.mascotdatfile.util.interfaces.Modification mod, UnimodParser uniModParser) { SearchModification searchMod = new SearchModification(); if (mod instanceof VariableModification) { searchMod.setFixedMod(false); } else { searchMod.setFixedMod(true); } if (mod.getLocation().contains("term") || mod.getLocation().contains("Term")) { OntologyConstants modConstant = null; if (mod.getLocation().startsWith("Protein N")) { modConstant = OntologyConstants.MODIFICATION_SPECIFICITY_PROTEIN_N_TERM; } else if (mod.getLocation().startsWith("Protein C")) { modConstant = OntologyConstants.MODIFICATION_SPECIFICITY_PROTEIN_C_TERM; } else if (mod.getLocation().startsWith("N")) { modConstant = OntologyConstants.MODIFICATION_SPECIFICITY_PEP_N_TERM; } else if (mod.getLocation().startsWith("C")) { modConstant = OntologyConstants.MODIFICATION_SPECIFICITY_PEP_C_TERM; } if (modConstant != null) { CvParam specificity = MzIdentMLTools.createPSICvParam(modConstant, null); SpecificityRules specRules = new SpecificityRules(); specRules.getCvParam().add(specificity); searchMod.getSpecificityRules().add(specRules); String[] residues = mod.getLocation().split("erm"); if (residues.length > 1) { for (Character residue : residues[1].trim().toCharArray()) { if (residue != ' ') { searchMod.getResidues().add(residue.toString()); } } } else { searchMod.getResidues().add("."); } } } else { for (Character residue : mod.getLocation().toCharArray()) { searchMod.getResidues().add(residue.toString()); } } searchMod.setMassDelta((float) mod.getMass()); ModT unimod = uniModParser.getModificationByNameAndMass( mod.getType(), mod.getMass(), searchMod.getResidues()); if (unimod != null) { CvParam cvParam = new CvParam(); cvParam.setAccession("UNIMOD:" + unimod.getRecordId()); cvParam.setCv(UnimodParser.getCv()); cvParam.setName(unimod.getTitle()); searchMod.getCvParam().add(cvParam); } return searchMod; }