Example #1
0
 public void createProteinList() {
   minProteins = new HashMap<String, Protein>();
   for (Peptide pg : minPeptides.values()) {
     String pepName = pg.getSequence();
     for (String protName : pg.getProteins()) {
       if (minProteins.containsKey(protName)) {
         Protein prot = minProteins.get(protName);
         prot.addPeptide(pepName);
       } else {
         Protein prot = new Protein();
         prot.setName(protName);
         prot.addPeptide(pepName);
         prot.setCluster(cluster_num);
         minProteins.put(protName, prot);
       }
     }
   }
   for (Protein p : minProteins.values()) {
     ProteinInfo pInfo = new ProteinInfo();
     pInfo.setName(p.getName());
     pInfo.setDescription(p.getDescription());
     pInfo.setLength(p.getLength());
     MassSieveFrame.addProtein(pInfo);
   }
 }
Example #2
0
 public void updatePeptideHits() {
   peptideHits = new ArrayList<PeptideHit>();
   for (Peptide p : minPeptides.values()) {
     peptideHits.addAll(p.getPeptideHits());
     experimentSet.addAll(p.getExperimentSet());
   }
 }
Example #3
0
 public PeptideCollection getPepXML() {
   PeptideCollection new_pc = new PeptideCollection();
   for (Peptide pg : minPeptides.values()) {
     if (pg.containsPepXML()) {
       new_pc.minPeptides.put(pg.getSequence(), pg);
     }
   }
   return new_pc;
 }
Example #4
0
 public PeptideCollection getPeptidesByHits(int numHits) {
   PeptideCollection new_pc = new PeptideCollection();
   for (Peptide pg : minPeptides.values()) {
     if (pg.getNumPeptideHits() >= numHits) {
       new_pc.minPeptides.put(pg.getSequence(), pg);
     }
   }
   new_pc.updatePeptideHits();
   return new_pc;
 }
Example #5
0
 public void updateParsimony() {
   for (Protein prot : minProteins.values()) {
     for (String pepName : prot.getPeptides()) {
       Peptide pep = minPeptides.get(pepName);
       prot.addAssociatedProteins(pep.getProteins());
     }
   }
   for (Protein prot : minProteins.values()) {
     prot.updateParsimony(minProteins);
   }
   for (Protein prot : minProteins.values()) {
     prot.computeParsimonyType();
   }
 }
Example #6
0
 private void addLinkedMembers(PeptideCollection cluster, Integer clustNum, Peptide pg) {
   for (String proName : pg.getProteins()) {
     Protein pro = minProteins.get(proName);
     pro.setCluster(clustNum);
     for (String pepName : pro.getPeptides()) {
       Peptide subPg = minPeptides.get(pepName);
       if (subPg.getCluster() == -1) {
         subPg.setCluster(clustNum);
         cluster.addPeptideGroup(subPg);
         addLinkedMembers(cluster, clustNum, subPg);
       }
     }
   }
 }
Example #7
0
 public void addPeptideHit(PeptideHit p) {
   if (p == null) {
     return;
   }
   peptideHits.add(p);
   experimentSet.add(p.getExperiment());
   String key = p.getSequence();
   if (minPeptides.containsKey(key)) {
     Peptide pg = minPeptides.get(key);
     pg.addPeptideHit(p);
   } else {
     Peptide pg = new Peptide(p);
     minPeptides.put(key, pg);
   }
 }
Example #8
0
  public Graph toGraph() {
    Table edgeTable = new Table();
    Table nodeTable = new Table();
    HashMap<String, Integer> unique = new HashMap<String, Integer>();

    edgeTable.addColumn("Node1", int.class);
    edgeTable.addColumn("Node2", int.class);
    nodeTable.addColumn("key", int.class);
    nodeTable.addColumn("name", String.class);
    nodeTable.addColumn("type", String.class);
    nodeTable.addColumn("indeterminate", int.class);

    int idx = 0;
    for (Protein prot : minProteins.values()) {
      int row = nodeTable.addRow();
      unique.put(prot.getName(), idx);
      nodeTable.setInt(row, "key", idx++);
      nodeTable.setString(row, "name", prot.getName());
      nodeTable.setString(row, "type", "protein");
      nodeTable.setInt(row, "indeterminate", 0);
    }

    for (Peptide pep : minPeptides.values()) {
      int row = nodeTable.addRow();
      unique.put(pep.getSequence(), idx);
      nodeTable.setInt(row, "key", idx++);
      nodeTable.setString(row, "name", pep.getSequence());
      nodeTable.setString(row, "type", "peptide");
      if (pep.getIndeterminateType() == PeptideIndeterminacyType.NONE) {
        nodeTable.setInt(row, "indeterminate", 0);
      } else {
        nodeTable.setInt(row, "indeterminate", 1);
      }
    }

    for (Protein prot : minProteins.values()) {
      int id1 = unique.get(prot.getName());
      for (String pep : prot.getPeptides()) {
        int id2 = unique.get(pep);
        int row = edgeTable.addRow();
        edgeTable.setInt(row, "Node1", id1);
        edgeTable.setInt(row, "Node2", id2);
      }
    }
    Graph g = new Graph(nodeTable, edgeTable, false, "key", "Node1", "Node2");
    // System.err.println(g.getEdgeCount());
    return g;
  }
Example #9
0
 public void addPeptideGroup(Peptide pg) {
   String key = pg.getSequence();
   if (minPeptides.containsKey(key)) {
     System.out.print("This PeptideCollection already contains " + key);
     System.out.println(" are you sure this is what you want?");
   } else {
     minPeptides.put(key, pg);
   }
 }
Example #10
0
 public void updateProteins(HashMap<String, Protein> mainProteins) {
   minProteins = new HashMap<String, Protein>();
   for (Peptide pg : minPeptides.values()) {
     String pepName = pg.getSequence();
     for (String protName : pg.getProteins()) {
       if (minProteins.containsKey(protName)) {
         Protein prot = minProteins.get(protName);
         prot.addPeptide(pepName);
       } else {
         Protein prot = mainProteins.get(protName);
         minProteins.put(protName, prot);
       }
     }
   }
   for (Peptide pg : minPeptides.values()) {
     pg.updateProteins(minProteins);
   }
   for (Protein p : minProteins.values()) {
     p.updatePeptides(minPeptides);
   }
 }
Example #11
0
 public void updateClusters() {
   clusters = new HashMap<Integer, PeptideCollection>();
   for (Peptide pg : minPeptides.values()) {
     pg.setCluster(-1);
   }
   int cluster_num = 1;
   for (Peptide pg : minPeptides.values()) {
     if (pg.getCluster() == -1) {
       PeptideCollection newCluster = new PeptideCollection();
       newCluster.setClusterNum(cluster_num);
       pg.setCluster(cluster_num);
       newCluster.addPeptideGroup(pg);
       addLinkedMembers(newCluster, cluster_num, pg);
       newCluster.updateProteins(minProteins);
       newCluster.updateParsimony();
       clusters.put(cluster_num, newCluster);
       cluster_num++;
     }
   }
   int equivGroup = 0;
   HashSet<String> usedProteins = new HashSet<String>();
   ArrayList<Protein> equivOrder = new ArrayList<Protein>();
   equivOrder.addAll(getCountables());
   equivOrder.addAll(getSubsets());
   equivOrder.addAll(getSubsumables());
   for (Protein p : equivOrder) {
     if (!usedProteins.contains(p.getName())) {
       p.setEquivalentGroup(equivGroup);
       usedProteins.add(p.getName());
       for (Protein ps : p.getEquivalent()) {
         ps.setEquivalentGroup(equivGroup);
         usedProteins.add(ps.getName());
       }
       equivGroup++;
     }
   }
 }
Example #12
0
  private String siiToString(SpectrumIdentificationItem sii) {
    String siiString = "";

    siiString =
        "\""
            + sii.getId()
            + "\""
            + sep
            + sii.getRank()
            + sep
            + sii.isPassThreshold()
            + sep
            + sii.getCalculatedMassToCharge()
            + sep
            + sii.getExperimentalMassToCharge()
            + sep
            + sii.getChargeState();
    Peptide pep =
        peptideIdHashMap.get(sii.getPeptideRef()); // get Peptide via the hash for this object
    siiString += sep + "\"" + pep.getPeptideSequence() + "\"";

    // Handle Mods
    siiString += sep;

    String modString = "";

    if (pep.getModification() != null) {
      int i = 0;
      for (Modification mod : pep.getModification()) {
        if (i > 0) {
          modString += ";"; // Add an extra separator between mods
        }
        modString += modToString(mod);
        i++;
      }
    }

    if (pep.getSubstitutionModification() != null) {
      int i = 0;
      for (SubstitutionModification subMod : pep.getSubstitutionModification()) {
        if (i > 0 || !modString.equals("")) {
          modString += ";"; // Add an extra separator between mods
        }
        modString += subModToString(subMod);
        i++;
      }
    }

    siiString += modString;

    Map<String, String> mapNameToValue = new HashMap<>();
    for (AbstractParam param : sii.getParamGroup()) {
      mapNameToValue.put(param.getName(), param.getValue());
      // System.out.println("test1" + param.getName() + "-> " + param.getValue());
    }

    // Handle scores
    for (int i = 0; i < columnToScoreMap.size(); i++) {
      String score = columnToScoreMap.get(i);
      // System.out.println("test2" + score);
      if (mapNameToValue.containsKey(score)) {
        String scoreValue = mapNameToValue.get(score);
        // System.out.println("test3" + scoreValue);
        siiString += sep + scoreValue;
      } else {
        siiString += sep;
      }
    }

    // Handle all protein maps
    siiString += sep + "\"";
    List<PeptideEvidenceRef> peptideEvidenceRefList = sii.getPeptideEvidenceRef();
    Boolean isDecoy = false;
    for (int i = 0; i < peptideEvidenceRefList.size(); i++) {
      PeptideEvidenceRef peptideEvidenceRef = peptideEvidenceRefList.get(i);
      PeptideEvidence peptideEvidence =
          peptideEvidenceIdHashMap.get(peptideEvidenceRef.getPeptideEvidenceRef());

      DBSequence dbSeq = dbSequenceIdHashMap.get(peptideEvidence.getDBSequenceRef());
      if (i > 0) {
        siiString += ";"; // Add an extra separator between mods
      }
      siiString +=
          dbSeq.getAccession()
              + "_"
              + peptideEvidence.getStart()
              + "_"
              + peptideEvidence.getEnd()
              + "_"
              + peptideEvidence.getPre()
              + "_"
              + peptideEvidence.getPost();
      if (peptideEvidence.isIsDecoy()) {
        isDecoy = true;
      }
    }
    siiString += "\"";

    siiString += sep + isDecoy;

    return siiString;
  }
Example #13
0
  private void init(String outputFile, String exportOption) {
    Writer out = null;
    try {
      out = new BufferedWriter(new FileWriter(outputFile));
      // Read all the objects we will need into hashes that are not automatically resolved by object
      // reference
      if (isVerbose) {
        System.out.print("About to iterate over PepEvid...");
      }
      Iterator<PeptideEvidence> iterPeptideEvidence =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.PeptideEvidence);
      while (iterPeptideEvidence.hasNext()) {
        PeptideEvidence peptideEvidence = iterPeptideEvidence.next();
        peptideEvidenceIdHashMap.put(peptideEvidence.getId(), peptideEvidence);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over Peptide");
      }
      Iterator<Peptide> iterPeptide =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.Peptide);
      while (iterPeptide.hasNext()) {
        Peptide peptide = iterPeptide.next();
        peptideIdHashMap.put(peptide.getId(), peptide);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over Spectra Data");
      }
      Iterator<SpectraData> iterSpectraData =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectraData);
      while (iterSpectraData.hasNext()) {
        SpectraData spectraData = iterSpectraData.next();
        spectraDataIdHashMap.put(spectraData.getId(), spectraData);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over DBsequence");
      }
      Iterator<DBSequence> iterDBSequence =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.DBSequence);
      while (iterDBSequence.hasNext()) {
        DBSequence dbSequence = iterDBSequence.next();
        dbSequenceIdHashMap.put(dbSequence.getId(), dbSequence);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over PDH");
      }
      Iterator<ProteinDetectionHypothesis> iterPDH =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinDetectionHypothesis);
      Integer pCounter = 0;
      while (iterPDH.hasNext()) {
        ProteinDetectionHypothesis pdh = iterPDH.next();
        pdhIdHashMap.put(pdh.getId(), pdh);

        for (CvParam cvParam : pdh.getCvParam()) {
          if (cvParam.getAccession().equals("MS:1001591")
              || cvParam.getAccession().equals("MS:1001592")
              || cvParam.getAccession().equals("MS:1001593")
              || cvParam.getAccession().equals("MS:1001594")
              || cvParam.getAccession().equals("MS:1001595")
              || cvParam.getAccession().equals("MS:1001596")
              || cvParam.getAccession().equals("MS:1001597")
              || cvParam.getAccession().equals("MS:1001598")
              || cvParam
                  .getAccession()
                  .equals("MS:1001599")) { // do nothing - these are specifically handled
            // ToDO this code could be improved using an array of values...
          } else if (cvParam.getValue() != null) {
            if (!columnToProtScoreMap.containsValue(cvParam.getName())) {
              columnToProtScoreMap.put(pCounter, cvParam.getName());
              pCounter++;
            }
          }
        }

        for (UserParam userParam : pdh.getUserParam()) {
          if (!columnToProtScoreMap.containsValue(userParam.getName())) {
            columnToProtScoreMap.put(pCounter, userParam.getName());
            pCounter++;
          }
        }
      }
      for (int i = 0; i < pCounter; i++) {
        pScoreHeader += columnToProtScoreMap.get(i) + sep;
      }
      // Now let's see what scores we have in the file
      // TODO - I'm not sure this is the fastest way to parse the files; these are unmarshalled
      // again below - inefficient?
      // Iterator<SpectrumIdentificationItem> iterSII =
      // unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationItem);
      Integer counter = 0;
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to iterate over SIR");
      }
      Iterator<SpectrumIdentificationResult> iterSIR =
          unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationResult);
      List<SpectrumIdentificationResult> sirList = new ArrayList<>();
      while (iterSIR.hasNext()) {
        SpectrumIdentificationResult sir = iterSIR.next();
        sirList.add(sir);

        List<SpectrumIdentificationItem> listSII = sir.getSpectrumIdentificationItem();

        for (SpectrumIdentificationItem sii : listSII) {
          siiIdHashMap.put(sii.getId(), sii);
          siiIdToSirHashMap.put(sii.getId(), sir);
          for (CvParam cvParam : sii.getCvParam()) {
            if (cvParam.getValue() != null) {
              if (!columnToScoreMap.containsValue(cvParam.getName())) {
                columnToScoreMap.put(counter, cvParam.getName());
                counter++;
              }
            }
          }
        }
      }
      for (int i = 0; i < counter; i++) {
        scoreHeader += sep + columnToScoreMap.get(i);
      }
      if (isVerbose) {
        System.out.println("...done");
        System.out.print("About to create output");
      }
      if (exportOption.equals("exportPSMs")) {

        out.write(spectrumHeader + psmHeader + scoreHeader);
        out.write(endPsmHeader + "\n");

        // Iterator<SpectrumIdentificationResult> iterSIR =
        // unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.SpectrumIdentificationResult);
        for (SpectrumIdentificationResult sir : sirList) {

          String sirLine = sirToString(sir);

          List<SpectrumIdentificationItem> listSII = sir.getSpectrumIdentificationItem();

          for (SpectrumIdentificationItem sii : listSII) {
            out.write(sirLine + sep + siiToString(sii) + "\n");
          }
        }

      } else if (exportOption.equals("exportProteinGroups")) {

        out.write(pagHeader);
        out.write(pScoreHeader);
        out.write(spectrumHeader + psmHeader + scoreHeader);
        out.write(endPsmHeader + "\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();

          String pagLine = pagToString(pag);

          // handle PDHs
          for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) {
            String pdhLine = pagLine;
            pdhLine += pdhToString(pdh);

            for (PeptideHypothesis pepH : pdh.getPeptideHypothesis()) {

              List<SpectrumIdentificationItemRef> siiRefList =
                  pepH.getSpectrumIdentificationItemRef();
              for (SpectrumIdentificationItemRef siiRef : siiRefList) {
                SpectrumIdentificationResult sir =
                    siiIdToSirHashMap.get(siiRef.getSpectrumIdentificationItemRef());

                SpectrumIdentificationItem sii =
                    siiIdHashMap.get(siiRef.getSpectrumIdentificationItemRef());
                out.write(pdhLine + sirToString(sir) + sep + siiToString(sii) + "\n");
              }
            }
          }
        }

      } else if (exportOption.equals("exportRepProteinPerPAGOnly")) {
        out.write(pagHeader);
        out.write(pScoreHeader);
        out.write("\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();
          String pagLine = pagToString(pag);

          ProteinDetectionHypothesis repPdh = getRepresentativePDH(pag, representativeProteinAcc);

          String pdhLine = pagLine;

          if (repPdh != null) {
            pdhLine += pdhToString(repPdh);
          }
          out.write(pdhLine + "\n");
        }

      } else if (exportOption.equals(
          "exportProteoAnnotator")) { // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator

        out.write(pagHeader);
        out.write(pScoreHeader);
        // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator

        out.write(exportProteoAnnotatorHeader);
        out.write("\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();
          String pagLine = pagToString(pag);

          ProteinDetectionHypothesis repPdh = getRepresentativePDH(pag, representativeProteinAcc);

          String pdhLine = pagLine;

          if (repPdh != null) {
            pdhLine += pdhToString(repPdh);
          }
          // Added by Fawaz Ghali 13/05/2014 exportProteoAnnotator
          String proteoAnnotatorLine = pdhLine;
          proteoAnnotatorLine = proteoAnnotatorLine + proteoAnnotatorLineToString(pag);
          out.write(proteoAnnotatorLine + "\n");
        }
      } else if (exportOption.equals("exportProteinsOnly")) {
        out.write(pagHeader);
        out.write(pScoreHeader);
        out.write("\n");

        Iterator<ProteinAmbiguityGroup> iterPAG =
            unmarshaller.unmarshalCollectionFromXpath(MzIdentMLElement.ProteinAmbiguityGroup);
        while (iterPAG.hasNext()) {
          ProteinAmbiguityGroup pag = iterPAG.next();
          String pagLine = pagToString(pag);

          // handle PDHs
          for (ProteinDetectionHypothesis pdh : pag.getProteinDetectionHypothesis()) {
            String pdhLine = pagLine;
            pdhLine += pdhToString(pdh);
            out.write(pdhLine + "\n");
          }
        }
      } else {
        System.out.println(
            "Error - correct usage MzIdentMLToCSV inputFile outputFile -exportType [exportProteinGroups|exportPSMs|exportProteinsOnly]");
      }
      out.close();
      System.out.println("Output written to " + outputFile);
    } catch (IOException ex) {
      String methodName = Thread.currentThread().getStackTrace()[1].getMethodName();
      String className = this.getClass().getName();
      String message =
          "The task \""
              + methodName
              + "\" in the class \""
              + className
              + "\" was not completed because of "
              + ex.getMessage()
              + "."
              + "\nPlease see the reference guide at 02 for more information on this error. https://code.google.com/p/mzidentml-lib/wiki/CommonErrors ";
      System.out.println(message);
    } finally {
      try {
        out.close();
      } catch (IOException ex) {
        String methodName = Thread.currentThread().getStackTrace()[1].getMethodName();
        String className = this.getClass().getName();
        String message =
            "The task \""
                + methodName
                + "\" in the class \""
                + className
                + "\" was not completed because of "
                + ex.getMessage()
                + "."
                + "\nPlease see the reference guide at 02 for more information on this error. https://code.google.com/p/mzidentml-lib/wiki/CommonErrors ";
        System.out.println(message);
      }
    }
  }