/** * Print an alignment map in a concise representation. Edges are given as two numbers separated by * '>'. They are chained together where possible, or separated by spaces where disjoint or * branched. * * <p>Note that more concise representations may be possible. Examples: * <li>1>2>3>1 * <li>1>2>3>2 4>3 * * @param alignment The input function, as a map (see {@link * AlignmentTools#alignmentAsMap(AFPChain)}) * @param identity An identity-like function providing the isomorphism between the codomain of * alignment (of type <T>) and the domain (type <S>). * @return */ public static <S, T> String toConciseAlignmentString(Map<S, T> alignment, Map<T, S> identity) { // Clone input to prevent changes Map<S, T> alig = new HashMap<S, T>(alignment); // Generate inverse alignment Map<S, List<S>> inverse = new HashMap<S, List<S>>(); for (Entry<S, T> e : alig.entrySet()) { S val = identity.get(e.getValue()); if (inverse.containsKey(val)) { List<S> l = inverse.get(val); l.add(e.getKey()); } else { List<S> l = new ArrayList<S>(); l.add(e.getKey()); inverse.put(val, l); } } StringBuilder str = new StringBuilder(); while (!alig.isEmpty()) { // Pick an edge and work upstream to a root or cycle S seedNode = alig.keySet().iterator().next(); S node = seedNode; if (inverse.containsKey(seedNode)) { node = inverse.get(seedNode).iterator().next(); while (node != seedNode && inverse.containsKey(node)) { node = inverse.get(node).iterator().next(); } } // Now work downstream, deleting edges as we go seedNode = node; str.append(node); while (alig.containsKey(node)) { S lastNode = node; node = identity.get(alig.get(lastNode)); // Output str.append('>'); str.append(node); // Remove edge alig.remove(lastNode); List<S> inv = inverse.get(node); if (inv.size() > 1) { inv.remove(node); } else { inverse.remove(node); } } if (!alig.isEmpty()) { str.append(' '); } } return str.toString(); }
/** * Takes a potentially non-sequential alignment and guesses a sequential version of it. Residues * from each structure are sorted sequentially and then compared directly. * * <p>The results of this method are consistent with what one might expect from an identity * function, and are therefore useful with {@link #getSymmetryOrder(Map, Map identity, int, * float)}. * * <ul> * <li>Perfect self-alignments will have the same pre-image and image, so will map X->X * <li>Gaps and alignment errors will cause errors in the resulting map, but only locally. * Errors do not propagate through the whole alignment. * </ul> * * <h4>Example:</h4> * * A non sequential alignment, represented schematically as * * <pre> * 12456789 * 78912345</pre> * * would result in a map * * <pre> * 12456789 * 12345789</pre> * * @param alignment The non-sequential input alignment * @param inverseAlignment If false, map from structure1 to structure2. If true, generate the * inverse of that map. * @return A mapping from sequential residues of one protein to those of the other * @throws IllegalArgumentException if the input alignment is not one-to-one. */ public static Map<Integer, Integer> guessSequentialAlignment( Map<Integer, Integer> alignment, boolean inverseAlignment) { Map<Integer, Integer> identity = new HashMap<Integer, Integer>(); SortedSet<Integer> aligned1 = new TreeSet<Integer>(); SortedSet<Integer> aligned2 = new TreeSet<Integer>(); for (Entry<Integer, Integer> pair : alignment.entrySet()) { aligned1.add(pair.getKey()); if (!aligned2.add(pair.getValue())) throw new IllegalArgumentException( "Alignment is not one-to-one for residue " + pair.getValue() + " of the second structure."); } Iterator<Integer> it1 = aligned1.iterator(); Iterator<Integer> it2 = aligned2.iterator(); while (it1.hasNext()) { if (inverseAlignment) { // 2->1 identity.put(it2.next(), it1.next()); } else { // 1->2 identity.put(it1.next(), it2.next()); } } return identity; }
/** * Uses two sequences each with a corresponding structure to create an AFPChain corresponding to * the alignment. Provided only for convenience since FastaReaders return such maps. * * @param sequences A Map containing exactly two entries from sequence names as Strings to gapped * ProteinSequences; the name is ignored * @see #fastaToAfpChain(ProteinSequence, ProteinSequence, Structure, Structure) * @throws StructureException */ public static AFPChain fastaToAfpChain( Map<String, ProteinSequence> sequences, Structure structure1, Structure structure2) throws StructureException { if (sequences.size() != 2) { throw new IllegalArgumentException( "There must be exactly 2 sequences, but there were " + sequences.size()); } if (structure1 == null || structure2 == null) { throw new IllegalArgumentException("A structure is null"); } List<ProteinSequence> seqs = new ArrayList<ProteinSequence>(); List<String> names = new ArrayList<String>(2); for (Map.Entry<String, ProteinSequence> entry : sequences.entrySet()) { seqs.add(entry.getValue()); names.add(entry.getKey()); } return fastaToAfpChain(seqs.get(0), seqs.get(1), structure1, structure2); }