@Override public boolean isValidated( String itemName, FilterItemComparator filterItemComparator, Object value, String matchKey, Identification identification, IdentificationFeaturesGenerator identificationFeaturesGenerator, ShotgunProtocol shotgunProtocol, IdentificationParameters identificationParameters, PeptideSpectrumAnnotator peptideSpectrumAnnotator) throws IOException, InterruptedException, ClassNotFoundException, SQLException, MzMLUnmarshallerException, MathException { PeptideFilterItem filterItem = PeptideFilterItem.getItem(itemName); if (filterItem == null) { throw new IllegalArgumentException( "Filter item " + itemName + "not recognized as peptide filter item."); } String input = value.toString(); switch (filterItem) { case proteinAccession: PeptideMatch peptideMatch = identification.getPeptideMatch(matchKey); return filterItemComparator.passes( input, peptideMatch .getTheoreticPeptide() .getParentProteins(identificationParameters.getSequenceMatchingPreferences())); case proteinDescription: peptideMatch = identification.getPeptideMatch(matchKey); ArrayList<String> accessions = peptideMatch .getTheoreticPeptide() .getParentProteins(identificationParameters.getSequenceMatchingPreferences()); ArrayList<String> descriptions = new ArrayList<String>(); for (String accession : accessions) { Header proteinHeader = SequenceFactory.getInstance().getHeader(accession); descriptions.add(proteinHeader.getDescription()); } return filterItemComparator.passes(input, descriptions); case sequence: return filterItemComparator.passes(input, Peptide.getSequence(matchKey)); case ptm: peptideMatch = identification.getPeptideMatch(matchKey); ArrayList<String> ptms; PSPtmScores psPtmScores = new PSPtmScores(); psPtmScores = (PSPtmScores) peptideMatch.getUrParam(psPtmScores); if (psPtmScores != null) { ptms = psPtmScores.getScoredPTMs(); } else { ptms = new ArrayList<String>(0); } return filterItemComparator.passes(input, ptms); case nPSMs: peptideMatch = identification.getPeptideMatch(matchKey); Integer nPsms = peptideMatch.getSpectrumCount(); return filterItemComparator.passes(input, nPsms.toString()); case nValidatedPSMs: nPsms = identificationFeaturesGenerator.getNValidatedSpectraForPeptide(matchKey); return filterItemComparator.passes(input, nPsms.toString()); case nConfidentPSMs: nPsms = identificationFeaturesGenerator.getNConfidentSpectraForPeptide(matchKey); return filterItemComparator.passes(input, nPsms.toString()); case confidence: PSParameter psParameter = new PSParameter(); psParameter = (PSParameter) identification.getPeptideMatchParameter(matchKey, psParameter); Double confidence = psParameter.getProteinConfidence(); return filterItemComparator.passes(input, confidence.toString()); case proteinInference: psParameter = new PSParameter(); psParameter = (PSParameter) identification.getPeptideMatchParameter(matchKey, psParameter); Integer pi = psParameter.getProteinInferenceClass(); return filterItemComparator.passes(input, pi.toString()); case validationStatus: psParameter = new PSParameter(); psParameter = (PSParameter) identification.getPeptideMatchParameter(matchKey, psParameter); Integer validation = psParameter.getMatchValidationLevel().getIndex(); return filterItemComparator.passes(input, validation.toString()); case stared: psParameter = new PSParameter(); psParameter = (PSParameter) identification.getPeptideMatchParameter(matchKey, psParameter); String starred; if (psParameter.isStarred()) { starred = FilterItemComparator.trueFalse[0]; } else { starred = FilterItemComparator.trueFalse[1]; } return filterItemComparator.passes(input, starred); default: throw new IllegalArgumentException( "Protein filter not implemented for item " + filterItem.name + "."); } }
/** * Returns the component of the section corresponding to the given feature. * * @param identification the identification of the project * @param identificationFeaturesGenerator the identification features generator of the project * @param identificationParameters the identification parameters * @param keys the keys of the protein matches to output * @param nSurroundingAA the number of surrounding amino acids to export * @param linePrefix the line prefix to use. * @param peptideMatch the peptide match * @param psParameter the PeptideShaker parameters of the match * @param peptideFeature the peptide feature to export * @param validatedOnly whether only validated matches should be exported * @param decoys whether decoy matches should be exported as well * @param waitingHandler the waiting handler * @return the component of the section corresponding to the given feature * @throws IOException exception thrown whenever an error occurred while interacting with a file * while mapping potential modification sites * @throws InterruptedException exception thrown whenever a threading issue occurred while mapping * potential modification sites * @throws ClassNotFoundException exception thrown whenever an error occurred while deserializing * an object from the ProteinTree * @throws SQLException exception thrown whenever an error occurred while interacting with the * ProteinTree * @throws uk.ac.ebi.jmzml.xml.io.MzMLUnmarshallerException exception thrown whenever an error * occurred while reading an mzML file */ public static String getfeature( Identification identification, IdentificationFeaturesGenerator identificationFeaturesGenerator, IdentificationParameters identificationParameters, ArrayList<String> keys, int nSurroundingAA, String linePrefix, PeptideMatch peptideMatch, PSParameter psParameter, PsPeptideFeature peptideFeature, boolean validatedOnly, boolean decoys, WaitingHandler waitingHandler) throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, MzMLUnmarshallerException { switch (peptideFeature) { case accessions: StringBuilder proteins = new StringBuilder(); ArrayList<String> accessions = peptideMatch .getTheoreticPeptide() .getParentProteins(identificationParameters.getSequenceMatchingPreferences()); Collections.sort(accessions); for (String accession : accessions) { if (proteins.length() > 0) { proteins.append("; "); } proteins.append(accession); } return proteins.toString(); case protein_description: SequenceFactory sequenceFactory = SequenceFactory.getInstance(); StringBuilder descriptions = new StringBuilder(); accessions = peptideMatch .getTheoreticPeptide() .getParentProteins(identificationParameters.getSequenceMatchingPreferences()); Collections.sort(accessions); for (String accession : accessions) { if (descriptions.length() > 0) { descriptions.append("; "); } descriptions.append(sequenceFactory.getHeader(accession).getDescription()); } return descriptions.toString(); case protein_groups: HashSet<String> proteinGroups = identification.getProteinMatches(peptideMatch.getTheoreticPeptide()); proteins = new StringBuilder(); ArrayList<String> proteinGroupsList = new ArrayList<String>(proteinGroups); Collections.sort(proteinGroupsList); if (proteinGroupsList.size() > 1) { identification.loadProteinMatchParameters( proteinGroupsList, psParameter, waitingHandler, false); } psParameter = new PSParameter(); for (String proteinGroup : proteinGroupsList) { if (identification.getProteinIdentification().contains(proteinGroup)) { psParameter = (PSParameter) identification.getProteinMatchParameter(proteinGroup, psParameter); if (proteins.length() > 0) { proteins.append("; "); } List<String> groupAccessions = Arrays.asList(ProteinMatch.getAccessions(proteinGroup)); Collections.sort(groupAccessions); boolean first = true; for (String accession : groupAccessions) { if (first) { first = false; } else { proteins.append(", "); } proteins.append(accession); } proteins.append(" ("); proteins.append(psParameter.getMatchValidationLevel().getName()); proteins.append(")"); } } return proteins.toString(); case best_protein_group_validation: MatchValidationLevel bestProteinValidationLevel = MatchValidationLevel.none; proteinGroups = identification.getProteinMatches(peptideMatch.getTheoreticPeptide()); proteinGroupsList = new ArrayList<String>(proteinGroups); Collections.sort(proteinGroupsList); if (proteinGroupsList.size() > 1) { identification.loadProteinMatchParameters( proteinGroupsList, psParameter, waitingHandler, false); } psParameter = new PSParameter(); for (String proteinGroup : proteinGroupsList) { if (identification.getProteinIdentification().contains(proteinGroup)) { psParameter = (PSParameter) identification.getProteinMatchParameter(proteinGroup, psParameter); if (psParameter.getMatchValidationLevel().getIndex() > bestProteinValidationLevel.getIndex()) { bestProteinValidationLevel = psParameter.getMatchValidationLevel(); } } } return bestProteinValidationLevel.getName(); case confidence: return psParameter.getPeptideConfidence() + ""; case decoy: if (peptideMatch .getTheoreticPeptide() .isDecoy(identificationParameters.getSequenceMatchingPreferences())) { return "1"; } else { return "0"; } case hidden: if (psParameter.isHidden()) { return "1"; } else { return "0"; } case localization_confidence: return getPeptideModificationLocations( peptideMatch, identificationParameters.getSearchParameters().getPtmSettings()) + ""; case pi: return psParameter.getProteinInferenceClassAsString(); case position: accessions = peptideMatch .getTheoreticPeptide() .getParentProteins(identificationParameters.getSequenceMatchingPreferences()); Collections.sort(accessions); Peptide peptide = peptideMatch.getTheoreticPeptide(); String start = ""; for (String proteinAccession : accessions) { if (!start.equals("")) { start += "; "; } Protein protein = SequenceFactory.getInstance().getProtein(proteinAccession); ArrayList<Integer> starts = protein.getPeptideStart( peptide.getSequence(), identificationParameters.getSequenceMatchingPreferences()); Collections.sort(starts); boolean first = true; for (int startAa : starts) { if (first) { first = false; } else { start += ", "; } start += startAa; } } return start; case psms: return peptideMatch.getSpectrumCount() + ""; case variable_ptms: return Peptide.getPeptideModificationsAsString(peptideMatch.getTheoreticPeptide(), true); case fixed_ptms: return Peptide.getPeptideModificationsAsString(peptideMatch.getTheoreticPeptide(), false); case score: return psParameter.getPeptideScore() + ""; case raw_score: return psParameter.getPeptideProbabilityScore() + ""; case sequence: return peptideMatch.getTheoreticPeptide().getSequence(); case missed_cleavages: peptide = peptideMatch.getTheoreticPeptide(); Integer nMissedCleavages = peptide.getNMissedCleavages( identificationParameters.getSearchParameters().getDigestionPreferences()); if (nMissedCleavages == null) { nMissedCleavages = 0; } return nMissedCleavages + ""; case modified_sequence: return peptideMatch .getTheoreticPeptide() .getTaggedModifiedSequence( identificationParameters.getSearchParameters().getPtmSettings(), false, false, true); case starred: if (psParameter.isStarred()) { return "1"; } else { return "0"; } case aaBefore: peptide = peptideMatch.getTheoreticPeptide(); accessions = peptide.getParentProteins(identificationParameters.getSequenceMatchingPreferences()); Collections.sort(accessions); String subSequence = ""; for (String proteinAccession : accessions) { if (!subSequence.equals("")) { subSequence += "; "; } HashMap<Integer, String[]> surroundingAAs = SequenceFactory.getInstance() .getProtein(proteinAccession) .getSurroundingAA( peptide.getSequence(), nSurroundingAA, identificationParameters.getSequenceMatchingPreferences()); ArrayList<Integer> starts = new ArrayList<Integer>(surroundingAAs.keySet()); Collections.sort(starts); boolean first = true; for (int startAa : starts) { if (first) { first = false; } else { subSequence += ", "; } subSequence += surroundingAAs.get(startAa)[0]; } } return subSequence; case aaAfter: peptide = peptideMatch.getTheoreticPeptide(); accessions = peptide.getParentProteins(identificationParameters.getSequenceMatchingPreferences()); Collections.sort(accessions); subSequence = ""; for (String proteinAccession : accessions) { if (!subSequence.equals("")) { subSequence += "; "; } HashMap<Integer, String[]> surroundingAAs = SequenceFactory.getInstance() .getProtein(proteinAccession) .getSurroundingAA( peptide.getSequence(), nSurroundingAA, identificationParameters.getSequenceMatchingPreferences()); ArrayList<Integer> starts = new ArrayList<Integer>(surroundingAAs.keySet()); Collections.sort(starts); boolean first = true; for (int startAa : starts) { if (first) { first = false; } else { subSequence += ", "; } subSequence += surroundingAAs.get(startAa)[1]; } } return subSequence; case nValidatedProteinGroups: peptide = peptideMatch.getTheoreticPeptide(); return identificationFeaturesGenerator.getNValidatedProteinGroups(peptide, waitingHandler) + ""; case unique_database: peptide = peptideMatch.getTheoreticPeptide(); if (identification.isUniqueInDatabase(peptide)) { return "1"; } else { return "0"; } case validated: return psParameter.getMatchValidationLevel().toString(); case validated_psms: return identificationFeaturesGenerator.getNValidatedSpectraForPeptide(peptideMatch.getKey()) + ""; case probabilistic_score: PSPtmScores ptmScores = new PSPtmScores(); ptmScores = (PSPtmScores) peptideMatch.getUrParam(ptmScores); if (ptmScores != null) { StringBuilder result = new StringBuilder(); ArrayList<String> modList = new ArrayList<String>(ptmScores.getScoredPTMs()); Collections.sort(modList); for (String mod : modList) { PtmScoring ptmScoring = ptmScores.getPtmScoring(mod); ArrayList<Integer> sites = new ArrayList<Integer>(ptmScoring.getProbabilisticSites()); if (!sites.isEmpty()) { Collections.sort(sites); if (result.length() > 0) { result.append(", "); } result.append(mod).append(" ("); boolean firstSite = true; for (int site : sites) { if (firstSite) { firstSite = false; } else { result.append(", "); } result.append(site).append(": ").append(ptmScoring.getProbabilisticScore(site)); } result.append(")"); } } return result.toString(); } return ""; case d_score: StringBuilder result = new StringBuilder(); ptmScores = new PSPtmScores(); ptmScores = (PSPtmScores) peptideMatch.getUrParam(ptmScores); if (ptmScores != null) { ArrayList<String> modList = new ArrayList<String>(ptmScores.getScoredPTMs()); Collections.sort(modList); for (String mod : modList) { PtmScoring ptmScoring = ptmScores.getPtmScoring(mod); ArrayList<Integer> sites = new ArrayList<Integer>(ptmScoring.getDSites()); if (!sites.isEmpty()) { Collections.sort(sites); if (result.length() > 0) { result.append(", "); } result.append(mod).append(" ("); boolean firstSite = true; for (int site : sites) { if (firstSite) { firstSite = false; } else { result.append(", "); } result.append(site).append(": ").append(ptmScoring.getDeltaScore(site)); } result.append(")"); } } return result.toString(); } return ""; case confident_modification_sites: String sequence = peptideMatch.getTheoreticPeptide().getSequence(); return identificationFeaturesGenerator.getConfidentPtmSites(peptideMatch, sequence); case confident_modification_sites_number: return identificationFeaturesGenerator.getConfidentPtmSitesNumber(peptideMatch); case ambiguous_modification_sites: sequence = peptideMatch.getTheoreticPeptide().getSequence(); return identificationFeaturesGenerator.getAmbiguousPtmSites(peptideMatch, sequence); case ambiguous_modification_sites_number: return identificationFeaturesGenerator.getAmbiguousPtmSiteNumber(peptideMatch); case confident_phosphosites: ArrayList<String> modifications = new ArrayList<String>(); for (String ptm : identificationParameters .getSearchParameters() .getPtmSettings() .getAllNotFixedModifications()) { if (ptm.contains("Phospho")) { modifications.add(ptm); } } return identificationFeaturesGenerator.getConfidentPtmSites( peptideMatch, peptideMatch.getTheoreticPeptide().getSequence(), modifications); case confident_phosphosites_number: modifications = new ArrayList<String>(); for (String ptm : identificationParameters .getSearchParameters() .getPtmSettings() .getAllNotFixedModifications()) { if (ptm.contains("Phospho")) { modifications.add(ptm); } } return identificationFeaturesGenerator.getConfidentPtmSitesNumber( peptideMatch, modifications); case ambiguous_phosphosites: modifications = new ArrayList<String>(); for (String ptm : identificationParameters .getSearchParameters() .getPtmSettings() .getAllNotFixedModifications()) { if (ptm.contains("Phospho")) { modifications.add(ptm); } } return identificationFeaturesGenerator.getAmbiguousPtmSites( peptideMatch, peptideMatch.getTheoreticPeptide().getSequence(), modifications); case ambiguous_phosphosites_number: modifications = new ArrayList<String>(); for (String ptm : identificationParameters .getSearchParameters() .getPtmSettings() .getAllNotFixedModifications()) { if (ptm.contains("Phospho")) { modifications.add(ptm); } } return identificationFeaturesGenerator.getAmbiguousPtmSiteNumber( peptideMatch, modifications); default: return "Not implemented"; } }
/** * Indicates whether the match designated by the match key validates the given item using the * given comparator and value threshold. * * @param itemName the name of the item to filter on * @param filterItemComparator the comparator to use * @param value the value to use as a threshold * @param spectrumKey the key of the match of interest * @param peptideAssumption the assumption to validate * @param identification the identification objects where to get identification matches from * @param identificationFeaturesGenerator the identification feature generator where to get * identification features * @param identificationParameters the identification parameters used * @param peptideSpectrumAnnotator the annotator to use to annotate spectra when filtering on PSM * or assumptions * @return a boolean indicating whether the match designated by the protein key validates the * given item using the given comparator and value threshold. * @throws java.io.IOException exception thrown whenever an exception occurred while reading or * writing a file * @throws java.lang.InterruptedException exception thrown whenever a threading issue occurred * while validating that the match passes the filter * @throws java.lang.ClassNotFoundException exception thrown whenever an error occurred while * deserilalizing a match * @throws java.sql.SQLException exception thrown whenever an error occurred while interacting * with a database * @throws uk.ac.ebi.jmzml.xml.io.MzMLUnmarshallerException exception thrown whenever an error * occurred while reading an mzML file * @throws org.apache.commons.math.MathException exception thrown whenever an error occurred while * doing statistics on a distribution */ public boolean isValidated( String itemName, FilterItemComparator filterItemComparator, Object value, String spectrumKey, PeptideAssumption peptideAssumption, Identification identification, IdentificationFeaturesGenerator identificationFeaturesGenerator, IdentificationParameters identificationParameters, PeptideSpectrumAnnotator peptideSpectrumAnnotator) throws IOException, InterruptedException, ClassNotFoundException, SQLException, MzMLUnmarshallerException, MathException { AssumptionFilterItem filterItem = AssumptionFilterItem.getItem(itemName); if (filterItem == null) { throw new IllegalArgumentException( "Filter item " + itemName + "not recognized as spectrum assumption filter item."); } String input = value.toString(); switch (filterItem) { case precrusorMz: Precursor precursor = SpectrumFactory.getInstance().getPrecursor(spectrumKey); Double mz = precursor.getMz(); return filterItemComparator.passes(input, mz.toString()); case precrusorRT: precursor = SpectrumFactory.getInstance().getPrecursor(spectrumKey); Double rt = precursor.getRt(); return filterItemComparator.passes(input, rt.toString()); case precrusorCharge: Integer charge = peptideAssumption.getIdentificationCharge().value; return filterItemComparator.passes(input, charge.toString()); case precrusorMzErrorDa: precursor = SpectrumFactory.getInstance().getPrecursor(spectrumKey); SearchParameters searchParameters = identificationParameters.getSearchParameters(); Double mzError = Math.abs( peptideAssumption.getDeltaMass( precursor.getMz(), false, searchParameters.getMinIsotopicCorrection(), searchParameters.getMaxIsotopicCorrection())); return filterItemComparator.passes(input, mzError.toString()); case precrusorMzErrorPpm: searchParameters = identificationParameters.getSearchParameters(); precursor = SpectrumFactory.getInstance().getPrecursor(spectrumKey); mzError = Math.abs( peptideAssumption.getDeltaMass( precursor.getMz(), true, searchParameters.getMinIsotopicCorrection(), searchParameters.getMaxIsotopicCorrection())); return filterItemComparator.passes(input, mzError.toString()); case precrusorMzErrorStat: searchParameters = identificationParameters.getSearchParameters(); precursor = SpectrumFactory.getInstance().getPrecursor(spectrumKey); mzError = peptideAssumption.getDeltaMass( precursor.getMz(), identificationParameters.getSearchParameters().isPrecursorAccuracyTypePpm(), searchParameters.getMinIsotopicCorrection(), searchParameters.getMaxIsotopicCorrection()); NonSymmetricalNormalDistribution precDeviationDistribution = identificationFeaturesGenerator.getMassErrorDistribution( Spectrum.getSpectrumFile(spectrumKey)); Double p; if (mzError > precDeviationDistribution.getMean()) { p = precDeviationDistribution.getDescendingCumulativeProbabilityAt(mzError); } else { p = precDeviationDistribution.getCumulativeProbabilityAt(mzError); } return filterItemComparator.passes(input, p.toString()); case sequenceCoverage: SpectrumFactory spectrumFactory = SpectrumFactory.getInstance(); MSnSpectrum spectrum = (MSnSpectrum) spectrumFactory.getSpectrum(spectrumKey); Peptide peptide = peptideAssumption.getPeptide(); AnnotationSettings annotationPreferences = identificationParameters.getAnnotationPreferences(); SpecificAnnotationSettings specificAnnotationPreferences = annotationPreferences.getSpecificAnnotationPreferences( spectrum.getSpectrumKey(), peptideAssumption, identificationParameters.getSequenceMatchingPreferences(), identificationParameters .getPtmScoringPreferences() .getSequenceMatchingPreferences()); HashMap<Integer, ArrayList<IonMatch>> matches = peptideSpectrumAnnotator.getCoveredAminoAcids( annotationPreferences, specificAnnotationPreferences, (MSnSpectrum) spectrum, peptide); double nCovered = 0; int nAA = peptide.getSequence().length(); for (int i = 0; i <= nAA; i++) { ArrayList<IonMatch> matchesAtAa = matches.get(i); if (matchesAtAa != null && !matchesAtAa.isEmpty()) { nCovered++; } } Double coverage = 100.0 * nCovered / nAA; return filterItemComparator.passes(input, coverage.toString()); case algorithmScore: Double score = peptideAssumption.getRawScore(); if (score == null) { score = peptideAssumption.getScore(); } return filterItemComparator.passes(input, score.toString()); case fileNames: return filterItemComparator.passes(input, Spectrum.getSpectrumFile(spectrumKey)); case confidence: PSParameter psParameter = new PSParameter(); psParameter = (PSParameter) identification.getPeptideMatchParameter(spectrumKey, psParameter); Double confidence = psParameter.getProteinConfidence(); return filterItemComparator.passes(input, confidence.toString()); case validationStatus: psParameter = new PSParameter(); psParameter = (PSParameter) identification.getPeptideMatchParameter(spectrumKey, psParameter); Integer validation = psParameter.getMatchValidationLevel().getIndex(); return filterItemComparator.passes(input, validation.toString()); case stared: psParameter = new PSParameter(); psParameter = (PSParameter) identification.getPeptideMatchParameter(spectrumKey, psParameter); String starred; if (psParameter.isStarred()) { starred = FilterItemComparator.trueFalse[0]; } else { starred = FilterItemComparator.trueFalse[1]; } return filterItemComparator.passes(input, starred); default: throw new IllegalArgumentException( "Protein filter not implemented for item " + filterItem.name + "."); } }