/** * this is what a Mapper does * * @param keyin * @param valuein * @return iterator over mapped key values */ @Nonnull @Override public Iterable<KeyValueObject<String, String>> mapValues( @Nonnull final String annotation, @Nonnull final String sequence) { List<KeyValueObject<String, String>> holder = new ArrayList<KeyValueObject<String, String>>(); IPeptideDigester digester = getDigester(); IProtein prot = Protein.getProtein(annotation, annotation, sequence, null); // do a boolean for a peptide belonging to a decoy protein, but use the public isDecoy // boolean/method in Protein class boolean isDecoy = prot.isDecoy(); IPolypeptide[] pps = digester.digest(prot); PeptideModification[] modifications1 = getModifications(); for (int i = 0; i < pps.length; i++) { IPolypeptide pp = pps[i]; if (!pp.isValid()) continue; // hadoop write intermediate seq finder writePeptide(pp, holder); // if(isDecoy) // continue; // skip the rest of the loop // if it is decoy, don't add modifications to it if (!isDecoy || isGenerateDecoysForModifiedPeptides()) { // generate modified peptides and add to the output IModifiedPeptide[] modifications = ModifiedPolypeptide.buildModifications(pp, modifications1); for (int m = 0; m < modifications.length; m++) { IModifiedPeptide modification = modifications[m]; writePeptide(modification, holder); } } } boolean semiTryptic = digester.isSemiTryptic(); if (semiTryptic) { IPolypeptide[] semipps = digester.addSemiCleavages(prot); for (int j = 0; j < semipps.length; j++) { IPolypeptide semipp = semipps[j]; if (!semipp.isValid()) continue; writePeptide(semipp, holder); IModifiedPeptide[] modifications = ModifiedPolypeptide.buildModifications(semipp, modifications1); for (int k = 0; k < modifications.length; k++) { IModifiedPeptide modification = modifications[k]; writePeptide(modification, holder); } } } return holder; }
@SuppressWarnings({"UnusedParameters", "UnusedAssignment"}) protected boolean handleSearchHit( String[] lines, double retentionTime, boolean onlyUniquePeptides, ISpectrumDataFilter... filters) { //noinspection UnnecessaryLocalVariable,UnusedDeclaration,UnusedAssignment Double expectedValue = null; //noinspection UnnecessaryLocalVariable,UnusedDeclaration,UnusedAssignment Double hyperScoreValue = null; int index = 0; String line = lines[index++]; // handle first line while (!line.contains("<search_hit")) { line = lines[index++]; if (index >= lines.length) return false; } String id = scan_id; if ("".equals(id)) throw new UnsupportedOperationException("Fix This"); // ToDo boolean trueHit = !line.contains("protein=\"DECOY_"); boolean processSpectrum = parseHitValue(line) <= 2; //noinspection UnnecessaryLocalVariable,UnusedDeclaration,UnusedAssignment boolean isUnique = true; //noinspection UnnecessaryLocalVariable,UnusedDeclaration,UnusedAssignment boolean isModified = false; if (!line.contains("hit_rank=\"1\"")) return false; IdentifiedPSM peptide = processPeptide(line, retentionTime, id); IProtein protein = null; for (; index < lines.length; index++) { line = lines[index]; if (line.contains("</search_hit")) break; // we are done if (line.contains(" modified_peptide=")) peptide = processModifiedPeptide(line, retentionTime, id); if (line.contains("<alternative_protein")) { isUnique = false; if (onlyUniquePeptides) processSpectrum = false; // only process unique hits } if (line.contains("<search_score name=\"hyperscore\" value=\"")) { hyperScoreValue = parseValue(line); } if (line.contains("<search_score name=\"expect\" value=\"")) { expectedValue = parseValue(line); } if (line.contains("protein=\"DECOY_")) { // another protein if (trueHit) processSpectrum = false; // one decoy one not } if (line.contains("<alternative_protein")) { // another protein if (!trueHit && !line.contains("protein=\"DECOY_")) // we start as decoy and fit to a real processSpectrum = false; // one decoy one not } if (line.contains("protein_descr=\"")) { protein = processProtein(line); } } if (processSpectrum) { @SuppressWarnings("ConstantConditions") String idP = protein.getId(); if (id.contains("DECOY")) return false; Set<IdentifiedPSM> pps = proteinToHits.get(idP); if (pps == null) { pps = new HashSet<IdentifiedPSM>(); proteinToHits.put(idP, pps); } pps.add(peptide); return true; // processed } return false; // unprocessed }