Ejemplo n.º 1
0
  /**
   * this is what a Mapper does
   *
   * @param keyin
   * @param valuein
   * @return iterator over mapped key values
   */
  @Nonnull
  @Override
  public Iterable<KeyValueObject<String, String>> mapValues(
      @Nonnull final String annotation, @Nonnull final String sequence) {

    List<KeyValueObject<String, String>> holder = new ArrayList<KeyValueObject<String, String>>();

    IPeptideDigester digester = getDigester();
    IProtein prot = Protein.getProtein(annotation, annotation, sequence, null);

    // do a boolean for a peptide belonging to a decoy protein, but use the public isDecoy
    // boolean/method in Protein class

    boolean isDecoy = prot.isDecoy();

    IPolypeptide[] pps = digester.digest(prot);
    PeptideModification[] modifications1 = getModifications();
    for (int i = 0; i < pps.length; i++) {
      IPolypeptide pp = pps[i];

      if (!pp.isValid()) continue;

      // hadoop write intermediate seq finder
      writePeptide(pp, holder);

      //   if(isDecoy)
      //       continue; // skip the rest of the loop

      // if it is decoy, don't add modifications to it
      if (!isDecoy || isGenerateDecoysForModifiedPeptides()) {
        //  generate modified peptides and add to the output
        IModifiedPeptide[] modifications =
            ModifiedPolypeptide.buildModifications(pp, modifications1);
        for (int m = 0; m < modifications.length; m++) {
          IModifiedPeptide modification = modifications[m];
          writePeptide(modification, holder);
        }
      }
    }

    boolean semiTryptic = digester.isSemiTryptic();
    if (semiTryptic) {
      IPolypeptide[] semipps = digester.addSemiCleavages(prot);
      for (int j = 0; j < semipps.length; j++) {
        IPolypeptide semipp = semipps[j];
        if (!semipp.isValid()) continue;
        writePeptide(semipp, holder);
        IModifiedPeptide[] modifications =
            ModifiedPolypeptide.buildModifications(semipp, modifications1);
        for (int k = 0; k < modifications.length; k++) {
          IModifiedPeptide modification = modifications[k];
          writePeptide(modification, holder);
        }
      }
    }
    return holder;
  }
  @SuppressWarnings({"UnusedParameters", "UnusedAssignment"})
  protected boolean handleSearchHit(
      String[] lines,
      double retentionTime,
      boolean onlyUniquePeptides,
      ISpectrumDataFilter... filters) {
    //noinspection UnnecessaryLocalVariable,UnusedDeclaration,UnusedAssignment
    Double expectedValue = null;
    //noinspection UnnecessaryLocalVariable,UnusedDeclaration,UnusedAssignment
    Double hyperScoreValue = null;
    int index = 0;
    String line = lines[index++]; // handle first line
    while (!line.contains("<search_hit")) {
      line = lines[index++];
      if (index >= lines.length) return false;
    }
    String id = scan_id;
    if ("".equals(id)) throw new UnsupportedOperationException("Fix This"); // ToDo

    boolean trueHit = !line.contains("protein=\"DECOY_");
    boolean processSpectrum = parseHitValue(line) <= 2;
    //noinspection UnnecessaryLocalVariable,UnusedDeclaration,UnusedAssignment
    boolean isUnique = true;
    //noinspection UnnecessaryLocalVariable,UnusedDeclaration,UnusedAssignment
    boolean isModified = false;
    if (!line.contains("hit_rank=\"1\"")) return false;
    IdentifiedPSM peptide = processPeptide(line, retentionTime, id);

    IProtein protein = null;

    for (; index < lines.length; index++) {
      line = lines[index];

      if (line.contains("</search_hit")) break; // we are done

      if (line.contains(" modified_peptide="))
        peptide = processModifiedPeptide(line, retentionTime, id);

      if (line.contains("<alternative_protein")) {
        isUnique = false;
        if (onlyUniquePeptides) processSpectrum = false; // only process unique hits
      }

      if (line.contains("<search_score name=\"hyperscore\" value=\"")) {
        hyperScoreValue = parseValue(line);
      }
      if (line.contains("<search_score name=\"expect\" value=\"")) {
        expectedValue = parseValue(line);
      }
      if (line.contains("protein=\"DECOY_")) { // another protein
        if (trueHit) processSpectrum = false; // one decoy one not
      }
      if (line.contains("<alternative_protein")) { // another protein
        if (!trueHit && !line.contains("protein=\"DECOY_")) // we start as decoy and fit to a real
        processSpectrum = false; // one decoy one not
      }

      if (line.contains("protein_descr=\"")) {
        protein = processProtein(line);
      }
    }

    if (processSpectrum) {
      @SuppressWarnings("ConstantConditions")
      String idP = protein.getId();
      if (id.contains("DECOY")) return false;
      Set<IdentifiedPSM> pps = proteinToHits.get(idP);
      if (pps == null) {
        pps = new HashSet<IdentifiedPSM>();
        proteinToHits.put(idP, pps);
      }
      pps.add(peptide);
      return true; // processed
    }
    return false; // unprocessed
  }