Beispiel #1
1
  /**
   * Print an alignment map in a concise representation. Edges are given as two numbers separated by
   * '>'. They are chained together where possible, or separated by spaces where disjoint or
   * branched.
   *
   * <p>Note that more concise representations may be possible. Examples:
   * <li>1>2>3>1
   * <li>1>2>3>2 4>3
   *
   * @param alignment The input function, as a map (see {@link
   *     AlignmentTools#alignmentAsMap(AFPChain)})
   * @param identity An identity-like function providing the isomorphism between the codomain of
   *     alignment (of type <T>) and the domain (type <S>).
   * @return
   */
  public static <S, T> String toConciseAlignmentString(Map<S, T> alignment, Map<T, S> identity) {
    // Clone input to prevent changes
    Map<S, T> alig = new HashMap<S, T>(alignment);

    // Generate inverse alignment
    Map<S, List<S>> inverse = new HashMap<S, List<S>>();
    for (Entry<S, T> e : alig.entrySet()) {
      S val = identity.get(e.getValue());
      if (inverse.containsKey(val)) {
        List<S> l = inverse.get(val);
        l.add(e.getKey());
      } else {
        List<S> l = new ArrayList<S>();
        l.add(e.getKey());
        inverse.put(val, l);
      }
    }

    StringBuilder str = new StringBuilder();

    while (!alig.isEmpty()) {
      // Pick an edge and work upstream to a root or cycle
      S seedNode = alig.keySet().iterator().next();
      S node = seedNode;
      if (inverse.containsKey(seedNode)) {
        node = inverse.get(seedNode).iterator().next();
        while (node != seedNode && inverse.containsKey(node)) {
          node = inverse.get(node).iterator().next();
        }
      }

      // Now work downstream, deleting edges as we go
      seedNode = node;
      str.append(node);

      while (alig.containsKey(node)) {
        S lastNode = node;
        node = identity.get(alig.get(lastNode));

        // Output
        str.append('>');
        str.append(node);

        // Remove edge
        alig.remove(lastNode);
        List<S> inv = inverse.get(node);
        if (inv.size() > 1) {
          inv.remove(node);
        } else {
          inverse.remove(node);
        }
      }
      if (!alig.isEmpty()) {
        str.append(' ');
      }
    }

    return str.toString();
  }
Beispiel #2
0
  /**
   * Takes a potentially non-sequential alignment and guesses a sequential version of it. Residues
   * from each structure are sorted sequentially and then compared directly.
   *
   * <p>The results of this method are consistent with what one might expect from an identity
   * function, and are therefore useful with {@link #getSymmetryOrder(Map, Map identity, int,
   * float)}.
   *
   * <ul>
   *   <li>Perfect self-alignments will have the same pre-image and image, so will map X->X
   *   <li>Gaps and alignment errors will cause errors in the resulting map, but only locally.
   *       Errors do not propagate through the whole alignment.
   * </ul>
   *
   * <h4>Example:</h4>
   *
   * A non sequential alignment, represented schematically as
   *
   * <pre>
   * 12456789
   * 78912345</pre>
   *
   * would result in a map
   *
   * <pre>
   * 12456789
   * 12345789</pre>
   *
   * @param alignment The non-sequential input alignment
   * @param inverseAlignment If false, map from structure1 to structure2. If true, generate the
   *     inverse of that map.
   * @return A mapping from sequential residues of one protein to those of the other
   * @throws IllegalArgumentException if the input alignment is not one-to-one.
   */
  public static Map<Integer, Integer> guessSequentialAlignment(
      Map<Integer, Integer> alignment, boolean inverseAlignment) {
    Map<Integer, Integer> identity = new HashMap<Integer, Integer>();

    SortedSet<Integer> aligned1 = new TreeSet<Integer>();
    SortedSet<Integer> aligned2 = new TreeSet<Integer>();

    for (Entry<Integer, Integer> pair : alignment.entrySet()) {
      aligned1.add(pair.getKey());
      if (!aligned2.add(pair.getValue()))
        throw new IllegalArgumentException(
            "Alignment is not one-to-one for residue "
                + pair.getValue()
                + " of the second structure.");
    }

    Iterator<Integer> it1 = aligned1.iterator();
    Iterator<Integer> it2 = aligned2.iterator();
    while (it1.hasNext()) {
      if (inverseAlignment) { // 2->1
        identity.put(it2.next(), it1.next());
      } else { // 1->2
        identity.put(it1.next(), it2.next());
      }
    }
    return identity;
  }
  /**
   * Uses two sequences each with a corresponding structure to create an AFPChain corresponding to
   * the alignment. Provided only for convenience since FastaReaders return such maps.
   *
   * @param sequences A Map containing exactly two entries from sequence names as Strings to gapped
   *     ProteinSequences; the name is ignored
   * @see #fastaToAfpChain(ProteinSequence, ProteinSequence, Structure, Structure)
   * @throws StructureException
   */
  public static AFPChain fastaToAfpChain(
      Map<String, ProteinSequence> sequences, Structure structure1, Structure structure2)
      throws StructureException {

    if (sequences.size() != 2) {
      throw new IllegalArgumentException(
          "There must be exactly 2 sequences, but there were " + sequences.size());
    }

    if (structure1 == null || structure2 == null) {
      throw new IllegalArgumentException("A structure is null");
    }

    List<ProteinSequence> seqs = new ArrayList<ProteinSequence>();
    List<String> names = new ArrayList<String>(2);
    for (Map.Entry<String, ProteinSequence> entry : sequences.entrySet()) {
      seqs.add(entry.getValue());
      names.add(entry.getKey());
    }

    return fastaToAfpChain(seqs.get(0), seqs.get(1), structure1, structure2);
  }