/**
  * Takes a structure and sequence corresponding to an alignment between a structure or sequence
  * and itself (or even a structure with a sequence), where the result has a circular permutation
  * site {@link cpSite} residues to the right.
  *
  * @param fastaFile A FASTA file containing exactly 2 sequences, the first unpermuted and the
  *     second permuted
  * @param cpSite The number of residues from the beginning of the sequence at which the circular
  *     permutation site occurs; can be positive or negative; values greater than the length of the
  *     sequence are acceptable
  * @throws IOException
  * @throws StructureException
  */
 public static AFPChain cpFastaToAfpChain(File fastaFile, Structure structure, int cpSite)
     throws IOException, StructureException {
   InputStream inStream = new FileInputStream(fastaFile);
   SequenceCreatorInterface<AminoAcidCompound> creator =
       new CasePreservingProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet());
   SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser =
       new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>();
   FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
       new FastaReader<ProteinSequence, AminoAcidCompound>(inStream, headerParser, creator);
   LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process();
   inStream.close();
   Iterator<ProteinSequence> iter = sequences.values().iterator();
   ProteinSequence first = iter.next();
   ProteinSequence second = iter.next();
   return cpFastaToAfpChain(first, second, structure, cpSite);
 }
Example #2
0
  /**
   * Takes a potentially non-sequential alignment and guesses a sequential version of it. Residues
   * from each structure are sorted sequentially and then compared directly.
   *
   * <p>The results of this method are consistent with what one might expect from an identity
   * function, and are therefore useful with {@link #getSymmetryOrder(Map, Map identity, int,
   * float)}.
   *
   * <ul>
   *   <li>Perfect self-alignments will have the same pre-image and image, so will map X->X
   *   <li>Gaps and alignment errors will cause errors in the resulting map, but only locally.
   *       Errors do not propagate through the whole alignment.
   * </ul>
   *
   * <h4>Example:</h4>
   *
   * A non sequential alignment, represented schematically as
   *
   * <pre>
   * 12456789
   * 78912345</pre>
   *
   * would result in a map
   *
   * <pre>
   * 12456789
   * 12345789</pre>
   *
   * @param alignment The non-sequential input alignment
   * @param inverseAlignment If false, map from structure1 to structure2. If true, generate the
   *     inverse of that map.
   * @return A mapping from sequential residues of one protein to those of the other
   * @throws IllegalArgumentException if the input alignment is not one-to-one.
   */
  public static Map<Integer, Integer> guessSequentialAlignment(
      Map<Integer, Integer> alignment, boolean inverseAlignment) {
    Map<Integer, Integer> identity = new HashMap<Integer, Integer>();

    SortedSet<Integer> aligned1 = new TreeSet<Integer>();
    SortedSet<Integer> aligned2 = new TreeSet<Integer>();

    for (Entry<Integer, Integer> pair : alignment.entrySet()) {
      aligned1.add(pair.getKey());
      if (!aligned2.add(pair.getValue()))
        throw new IllegalArgumentException(
            "Alignment is not one-to-one for residue "
                + pair.getValue()
                + " of the second structure.");
    }

    Iterator<Integer> it1 = aligned1.iterator();
    Iterator<Integer> it2 = aligned2.iterator();
    while (it1.hasNext()) {
      if (inverseAlignment) { // 2->1
        identity.put(it2.next(), it1.next());
      } else { // 1->2
        identity.put(it1.next(), it2.next());
      }
    }
    return identity;
  }
  /** Merge identified modified compounds if linked. */
  private void mergeModComps(List<ModifiedCompound> modComps) {
    TreeSet<Integer> remove = new TreeSet<Integer>();
    int n = modComps.size();
    for (int icurr = 1; icurr < n; icurr++) {
      ModifiedCompound curr = modComps.get(icurr);

      String id = curr.getModification().getId();
      if (ProteinModificationRegistry.getById(id).getCategory() != ModificationCategory.UNDEFINED)
        continue;

      // find linked compounds that before curr
      // List<Integer> merging = new ArrayList<Integer>();
      int ipre = 0;
      for (; ipre < icurr; ipre++) {
        if (remove.contains(ipre)) continue;
        ModifiedCompound pre = modComps.get(ipre);
        if (!Collections.disjoint(pre.getGroups(false), curr.getGroups(false))) {
          break;
        }
      }

      if (ipre < icurr) {
        ModifiedCompound mcKeep = modComps.get(ipre);

        // merge modifications of the same type
        if (mcKeep.getModification().getId().equals(id)) {
          // merging the current one to the previous one
          mcKeep.addAtomLinkages(curr.getAtomLinkages());
          remove.add(icurr);
        }
      }
    }

    Iterator<Integer> it = remove.descendingIterator();
    while (it.hasNext()) {
      modComps.remove(it.next().intValue());
    }
  }