public void createProteinList() { minProteins = new HashMap<String, Protein>(); for (Peptide pg : minPeptides.values()) { String pepName = pg.getSequence(); for (String protName : pg.getProteins()) { if (minProteins.containsKey(protName)) { Protein prot = minProteins.get(protName); prot.addPeptide(pepName); } else { Protein prot = new Protein(); prot.setName(protName); prot.addPeptide(pepName); prot.setCluster(cluster_num); minProteins.put(protName, prot); } } } for (Protein p : minProteins.values()) { ProteinInfo pInfo = new ProteinInfo(); pInfo.setName(p.getName()); pInfo.setDescription(p.getDescription()); pInfo.setLength(p.getLength()); MassSieveFrame.addProtein(pInfo); } }
public void updatePeptideHits() { peptideHits = new ArrayList<PeptideHit>(); for (Peptide p : minPeptides.values()) { peptideHits.addAll(p.getPeptideHits()); experimentSet.addAll(p.getExperimentSet()); } }
public PeptideCollection getPepXML() { PeptideCollection new_pc = new PeptideCollection(); for (Peptide pg : minPeptides.values()) { if (pg.containsPepXML()) { new_pc.minPeptides.put(pg.getSequence(), pg); } } return new_pc; }
public PeptideCollection getPeptidesByHits(int numHits) { PeptideCollection new_pc = new PeptideCollection(); for (Peptide pg : minPeptides.values()) { if (pg.getNumPeptideHits() >= numHits) { new_pc.minPeptides.put(pg.getSequence(), pg); } } new_pc.updatePeptideHits(); return new_pc; }
public void updateParsimony() { for (Protein prot : minProteins.values()) { for (String pepName : prot.getPeptides()) { Peptide pep = minPeptides.get(pepName); prot.addAssociatedProteins(pep.getProteins()); } } for (Protein prot : minProteins.values()) { prot.updateParsimony(minProteins); } for (Protein prot : minProteins.values()) { prot.computeParsimonyType(); } }
private void addLinkedMembers(PeptideCollection cluster, Integer clustNum, Peptide pg) { for (String proName : pg.getProteins()) { Protein pro = minProteins.get(proName); pro.setCluster(clustNum); for (String pepName : pro.getPeptides()) { Peptide subPg = minPeptides.get(pepName); if (subPg.getCluster() == -1) { subPg.setCluster(clustNum); cluster.addPeptideGroup(subPg); addLinkedMembers(cluster, clustNum, subPg); } } } }
public void addPeptideHit(PeptideHit p) { if (p == null) { return; } peptideHits.add(p); experimentSet.add(p.getExperiment()); String key = p.getSequence(); if (minPeptides.containsKey(key)) { Peptide pg = minPeptides.get(key); pg.addPeptideHit(p); } else { Peptide pg = new Peptide(p); minPeptides.put(key, pg); } }
public Graph toGraph() { Table edgeTable = new Table(); Table nodeTable = new Table(); HashMap<String, Integer> unique = new HashMap<String, Integer>(); edgeTable.addColumn("Node1", int.class); edgeTable.addColumn("Node2", int.class); nodeTable.addColumn("key", int.class); nodeTable.addColumn("name", String.class); nodeTable.addColumn("type", String.class); nodeTable.addColumn("indeterminate", int.class); int idx = 0; for (Protein prot : minProteins.values()) { int row = nodeTable.addRow(); unique.put(prot.getName(), idx); nodeTable.setInt(row, "key", idx++); nodeTable.setString(row, "name", prot.getName()); nodeTable.setString(row, "type", "protein"); nodeTable.setInt(row, "indeterminate", 0); } for (Peptide pep : minPeptides.values()) { int row = nodeTable.addRow(); unique.put(pep.getSequence(), idx); nodeTable.setInt(row, "key", idx++); nodeTable.setString(row, "name", pep.getSequence()); nodeTable.setString(row, "type", "peptide"); if (pep.getIndeterminateType() == PeptideIndeterminacyType.NONE) { nodeTable.setInt(row, "indeterminate", 0); } else { nodeTable.setInt(row, "indeterminate", 1); } } for (Protein prot : minProteins.values()) { int id1 = unique.get(prot.getName()); for (String pep : prot.getPeptides()) { int id2 = unique.get(pep); int row = edgeTable.addRow(); edgeTable.setInt(row, "Node1", id1); edgeTable.setInt(row, "Node2", id2); } } Graph g = new Graph(nodeTable, edgeTable, false, "key", "Node1", "Node2"); // System.err.println(g.getEdgeCount()); return g; }
public void addPeptideGroup(Peptide pg) { String key = pg.getSequence(); if (minPeptides.containsKey(key)) { System.out.print("This PeptideCollection already contains " + key); System.out.println(" are you sure this is what you want?"); } else { minPeptides.put(key, pg); } }
public void updateProteins(HashMap<String, Protein> mainProteins) { minProteins = new HashMap<String, Protein>(); for (Peptide pg : minPeptides.values()) { String pepName = pg.getSequence(); for (String protName : pg.getProteins()) { if (minProteins.containsKey(protName)) { Protein prot = minProteins.get(protName); prot.addPeptide(pepName); } else { Protein prot = mainProteins.get(protName); minProteins.put(protName, prot); } } } for (Peptide pg : minPeptides.values()) { pg.updateProteins(minProteins); } for (Protein p : minProteins.values()) { p.updatePeptides(minPeptides); } }
public void updateClusters() { clusters = new HashMap<Integer, PeptideCollection>(); for (Peptide pg : minPeptides.values()) { pg.setCluster(-1); } int cluster_num = 1; for (Peptide pg : minPeptides.values()) { if (pg.getCluster() == -1) { PeptideCollection newCluster = new PeptideCollection(); newCluster.setClusterNum(cluster_num); pg.setCluster(cluster_num); newCluster.addPeptideGroup(pg); addLinkedMembers(newCluster, cluster_num, pg); newCluster.updateProteins(minProteins); newCluster.updateParsimony(); clusters.put(cluster_num, newCluster); cluster_num++; } } int equivGroup = 0; HashSet<String> usedProteins = new HashSet<String>(); ArrayList<Protein> equivOrder = new ArrayList<Protein>(); equivOrder.addAll(getCountables()); equivOrder.addAll(getSubsets()); equivOrder.addAll(getSubsumables()); for (Protein p : equivOrder) { if (!usedProteins.contains(p.getName())) { p.setEquivalentGroup(equivGroup); usedProteins.add(p.getName()); for (Protein ps : p.getEquivalent()) { ps.setEquivalentGroup(equivGroup); usedProteins.add(ps.getName()); } equivGroup++; } } }
private String siiToString(SpectrumIdentificationItem sii) { String siiString = ""; siiString = "\"" + sii.getId() + "\"" + sep + sii.getRank() + sep + sii.isPassThreshold() + sep + sii.getCalculatedMassToCharge() + sep + sii.getExperimentalMassToCharge() + sep + sii.getChargeState(); Peptide pep = peptideIdHashMap.get(sii.getPeptideRef()); // get Peptide via the hash for this object siiString += sep + "\"" + pep.getPeptideSequence() + "\""; // Handle Mods siiString += sep; String modString = ""; if (pep.getModification() != null) { int i = 0; for (Modification mod : pep.getModification()) { if (i > 0) { modString += ";"; // Add an extra separator between mods } modString += modToString(mod); i++; } } if (pep.getSubstitutionModification() != null) { int i = 0; for (SubstitutionModification subMod : pep.getSubstitutionModification()) { if (i > 0 || !modString.equals("")) { modString += ";"; // Add an extra separator between mods } modString += subModToString(subMod); i++; } } siiString += modString; Map<String, String> mapNameToValue = new HashMap<>(); for (AbstractParam param : sii.getParamGroup()) { mapNameToValue.put(param.getName(), param.getValue()); // System.out.println("test1" + param.getName() + "-> " + param.getValue()); } // Handle scores for (int i = 0; i < columnToScoreMap.size(); i++) { String score = columnToScoreMap.get(i); // System.out.println("test2" + score); if (mapNameToValue.containsKey(score)) { String scoreValue = mapNameToValue.get(score); // System.out.println("test3" + scoreValue); siiString += sep + scoreValue; } else { siiString += sep; } } // Handle all protein maps siiString += sep + "\""; List<PeptideEvidenceRef> peptideEvidenceRefList = sii.getPeptideEvidenceRef(); Boolean isDecoy = false; for (int i = 0; i < peptideEvidenceRefList.size(); i++) { PeptideEvidenceRef peptideEvidenceRef = peptideEvidenceRefList.get(i); PeptideEvidence peptideEvidence = peptideEvidenceIdHashMap.get(peptideEvidenceRef.getPeptideEvidenceRef()); DBSequence dbSeq = dbSequenceIdHashMap.get(peptideEvidence.getDBSequenceRef()); if (i > 0) { siiString += ";"; // Add an extra separator between mods } siiString += dbSeq.getAccession() + "_" + peptideEvidence.getStart() + "_" + peptideEvidence.getEnd() + "_" + peptideEvidence.getPre() + "_" + peptideEvidence.getPost(); if (peptideEvidence.isIsDecoy()) { isDecoy = true; } } siiString += "\""; siiString += sep + isDecoy; return siiString; }
private void init(String outputFile, String exportOption) { Writer out = null; try { out = new BufferedWriter(new FileWriter(outputFile)); // Read all the objects we will need into hashes that are not automatically resolved by object // reference if (isVerbose) { System.out.print("About to iterate over PepEvid..."); } Iterator<PeptideEvidence> iterPeptideEvidence = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.PeptideEvidence); while (iterPeptideEvidence.hasNext()) { PeptideEvidence peptideEvidence = iterPeptideEvidence.next(); peptideEvidenceIdHashMap.put(peptideEvidence.getId(), peptideEvidence); } if (isVerbose) { System.out.println("...done"); System.out.print("About to iterate over Peptide"); } Iterator<Peptide> iterPeptide = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.Peptide); while (iterPeptide.hasNext()) { Peptide peptide = iterPeptide.next(); peptideIdHashMap.put(peptide.getId(), peptide); } if (isVerbose) { System.out.println("...done"); System.out.print("About to iterate over Spectra Data"); } Iterator<SpectraData> iterSpectraData = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectraData); while (iterSpectraData.hasNext()) { SpectraData spectraData = iterSpectraData.next(); spectraDataIdHashMap.put(spectraData.getId(), spectraData); } if (isVerbose) { System.out.println("...done"); System.out.print("About to iterate over DBsequence"); } Iterator<DBSequence> iterDBSequence = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.DBSequence); while (iterDBSequence.hasNext()) { DBSequence dbSequence = iterDBSequence.next(); dbSequenceIdHashMap.put(dbSequence.getId(), dbSequence); } if (isVerbose) { System.out.println("...done"); System.out.print("About to iterate over PDH"); } Iterator<ProteinDetectionHypothesis> iterPDH = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinDetectionHypothesis); Integer pCounter = 0; while (iterPDH.hasNext()) { ProteinDetectionHypothesis pdh = iterPDH.next(); pdhIdHashMap.put(pdh.getId(), pdh); for (CvParam cvParam : pdh.getCvParam()) { if (cvParam.getAccession().equals("MS:1001591") || cvParam.getAccession().equals("MS:1001592") || cvParam.getAccession().equals("MS:1001593") || cvParam.getAccession().equals("MS:1001594") || cvParam.getAccession().equals("MS:1001595") || cvParam.getAccession().equals("MS:1001596") || cvParam.getAccession().equals("MS:1001597") || cvParam.getAccession().equals("MS:1001598") || cvParam .getAccession() .equals("MS:1001599")) { // do nothing - these are specifically handled // ToDO this code could be improved using an array of values... } else if (cvParam.getValue() != null) { if (!columnToProtScoreMap.containsValue(cvParam.getName())) { columnToProtScoreMap.put(pCounter, cvParam.getName()); pCounter++; } } } for (UserParam userParam : pdh.getUserParam()) { if (!columnToProtScoreMap.containsValue(userParam.getName())) { columnToProtScoreMap.put(pCounter, userParam.getName()); pCounter++; } } } for (int i = 0; i < pCounter; i++) { pScoreHeader += columnToProtScoreMap.get(i) + sep; } // Now let's see what scores we have in the file // TODO - I'm not sure this is the fastest way to parse the files; these are unmarshalled // again below - inefficient? // Iterator<SpectrumIdentificationItem> iterSII = // unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationItem); Integer counter = 0; if (isVerbose) { System.out.println("...done"); System.out.print("About to iterate over SIR"); } Iterator<SpectrumIdentificationResult> iterSIR = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationResult); List<SpectrumIdentificationResult> sirList = new ArrayList<>(); while (iterSIR.hasNext()) { SpectrumIdentificationResult sir = iterSIR.next(); sirList.add(sir); List<SpectrumIdentificationItem> listSII = sir.getSpectrumIdentificationItem(); for (SpectrumIdentificationItem sii : listSII) { siiIdHashMap.put(sii.getId(), sii); siiIdToSirHashMap.put(sii.getId(), sir); for (CvParam cvParam : sii.getCvParam()) { if (cvParam.getValue() != null) { if (!columnToScoreMap.containsValue(cvParam.getName())) { columnToScoreMap.put(counter, cvParam.getName()); counter++; } } } } } for (int i = 0; i < counter; i++) { scoreHeader += sep + columnToScoreMap.get(i); } if (isVerbose) { System.out.println("...done"); System.out.print("About to create output"); } if (exportOption.equals("exportPSMs")) { out.write(spectrumHeader + psmHeader + scoreHeader); out.write(endPsmHeader + "\n"); // Iterator<SpectrumIdentificationResult> iterSIR = // unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationResult); for (SpectrumIdentificationResult sir : sirList) { String sirLine = sirToString(sir); List<SpectrumIdentificationItem> listSII = sir.getSpectrumIdentificationItem(); for (SpectrumIdentificationItem sii : listSII) { out.write(sirLine + sep + siiToString(sii) + "\n"); } } } else if (exportOption.equals("exportProteinGroups")) { out.write(pagHeader); out.write(pScoreHeader); out.write(spectrumHeader + psmHeader + scoreHeader); out.write(endPsmHeader + "\n"); Iterator<ProteinAmbiguityGroup> iterPAG = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup); while (iterPAG.hasNext()) { ProteinAmbiguityGroup pag = iterPAG.next(); String pagLine = pagToString(pag); // handle PDHs for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) { String pdhLine = pagLine; pdhLine += pdhToString(pdh); for (PeptideHypothesis pepH : pdh.getPeptideHypothesis()) { List<SpectrumIdentificationItemRef> siiRefList = pepH.getSpectrumIdentificationItemRef(); for (SpectrumIdentificationItemRef siiRef : siiRefList) { SpectrumIdentificationResult sir = siiIdToSirHashMap.get(siiRef.getSpectrumIdentificationItemRef()); SpectrumIdentificationItem sii = siiIdHashMap.get(siiRef.getSpectrumIdentificationItemRef()); out.write(pdhLine + sirToString(sir) + sep + siiToString(sii) + "\n"); } } } } } else if (exportOption.equals("exportRepProteinPerPAGOnly")) { out.write(pagHeader); out.write(pScoreHeader); out.write("\n"); Iterator<ProteinAmbiguityGroup> iterPAG = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup); while (iterPAG.hasNext()) { ProteinAmbiguityGroup pag = iterPAG.next(); String pagLine = pagToString(pag); ProteinDetectionHypothesis repPdh = getRepresentativePDH(pag, representativeProteinAcc); String pdhLine = pagLine; if (repPdh != null) { pdhLine += pdhToString(repPdh); } out.write(pdhLine + "\n"); } } else if (exportOption.equals( "exportProteoAnnotator")) { // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator out.write(pagHeader); out.write(pScoreHeader); // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator out.write(exportProteoAnnotatorHeader); out.write("\n"); Iterator<ProteinAmbiguityGroup> iterPAG = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup); while (iterPAG.hasNext()) { ProteinAmbiguityGroup pag = iterPAG.next(); String pagLine = pagToString(pag); ProteinDetectionHypothesis repPdh = getRepresentativePDH(pag, representativeProteinAcc); String pdhLine = pagLine; if (repPdh != null) { pdhLine += pdhToString(repPdh); } // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator String proteoAnnotatorLine = pdhLine; proteoAnnotatorLine = proteoAnnotatorLine + proteoAnnotatorLineToString(pag); out.write(proteoAnnotatorLine + "\n"); } } else if (exportOption.equals("exportProteinsOnly")) { out.write(pagHeader); out.write(pScoreHeader); out.write("\n"); Iterator<ProteinAmbiguityGroup> iterPAG = unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup); while (iterPAG.hasNext()) { ProteinAmbiguityGroup pag = iterPAG.next(); String pagLine = pagToString(pag); // handle PDHs for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) { String pdhLine = pagLine; pdhLine += pdhToString(pdh); out.write(pdhLine + "\n"); } } } else { System.out.println( "Error - correct usage MzIdentMLToCSV inputFile outputFile -exportType [exportProteinGroups|exportPSMs|exportProteinsOnly]"); } out.close(); System.out.println("Output written to " + outputFile); } catch (IOException ex) { String methodName = Thread.currentThread().getStackTrace()[1].getMethodName(); String className = this.getClass().getName(); String message = "The task \"" + methodName + "\" in the class \"" + className + "\" was not completed because of " + ex.getMessage() + "." + "\nPlease see the reference guide at 02 for more information on this error. https://code.google.com/p/mzidentml-lib/wiki/CommonErrors "; System.out.println(message); } finally { try { out.close(); } catch (IOException ex) { String methodName = Thread.currentThread().getStackTrace()[1].getMethodName(); String className = this.getClass().getName(); String message = "The task \"" + methodName + "\" in the class \"" + className + "\" was not completed because of " + ex.getMessage() + "." + "\nPlease see the reference guide at 02 for more information on this error. https://code.google.com/p/mzidentml-lib/wiki/CommonErrors "; System.out.println(message); } } }