/** * Takes a potentially non-sequential alignment and guesses a sequential version of it. Residues * from each structure are sorted sequentially and then compared directly. * * <p>The results of this method are consistent with what one might expect from an identity * function, and are therefore useful with {@link #getSymmetryOrder(Map, Map identity, int, * float)}. * * <ul> * <li>Perfect self-alignments will have the same pre-image and image, so will map X->X * <li>Gaps and alignment errors will cause errors in the resulting map, but only locally. * Errors do not propagate through the whole alignment. * </ul> * * <h4>Example:</h4> * * A non sequential alignment, represented schematically as * * <pre> * 12456789 * 78912345</pre> * * would result in a map * * <pre> * 12456789 * 12345789</pre> * * @param alignment The non-sequential input alignment * @param inverseAlignment If false, map from structure1 to structure2. If true, generate the * inverse of that map. * @return A mapping from sequential residues of one protein to those of the other * @throws IllegalArgumentException if the input alignment is not one-to-one. */ public static Map<Integer, Integer> guessSequentialAlignment( Map<Integer, Integer> alignment, boolean inverseAlignment) { Map<Integer, Integer> identity = new HashMap<Integer, Integer>(); SortedSet<Integer> aligned1 = new TreeSet<Integer>(); SortedSet<Integer> aligned2 = new TreeSet<Integer>(); for (Entry<Integer, Integer> pair : alignment.entrySet()) { aligned1.add(pair.getKey()); if (!aligned2.add(pair.getValue())) throw new IllegalArgumentException( "Alignment is not one-to-one for residue " + pair.getValue() + " of the second structure."); } Iterator<Integer> it1 = aligned1.iterator(); Iterator<Integer> it2 = aligned2.iterator(); while (it1.hasNext()) { if (inverseAlignment) { // 2->1 identity.put(it2.next(), it1.next()); } else { // 1->2 identity.put(it1.next(), it2.next()); } } return identity; }
/** @see #toConciseAlignmentString(Map, Map) */ public static Map<Integer, Integer> fromConciseAlignmentString(String string) { Map<Integer, Integer> map = new HashMap<Integer, Integer>(); boolean matches = true; while (matches) { Pattern pattern = Pattern.compile("(\\d+)>(\\d+)"); Matcher matcher = pattern.matcher(string); matches = matcher.find(); if (matches) { Integer from = Integer.parseInt(matcher.group(1)); Integer to = Integer.parseInt(matcher.group(2)); map.put(from, to); string = string.substring(matcher.end(1) + 1); } } return map; }
/** * Applies an alignment k times. Eg if alignmentMap defines function f(x), this returns a function * f^k(x)=f(f(...f(x)...)). * * <p>To allow for functions with different domains and codomains, the identity function allows * converting back in a reasonable way. For instance, if alignmentMap represented an alignment * between two proteins with different numbering schemes, the identity function could calculate * the offset between residue numbers, eg I(x) = x-offset. * * <p>When an identity function is provided, the returned function calculates f^k(x) = f(I( f(I( * ... f(x) ... )) )). * * @param <S> * @param <T> * @param alignmentMap The input function, as a map (see {@link * AlignmentTools#alignmentAsMap(AFPChain)}) * @param identity An identity-like function providing the isomorphism between the codomain of * alignmentMap (of type <T>) and the domain (type <S>). * @param k The number of times to apply the alignment * @return A new alignment. If the input function is not automorphic (one-to-one), then some * inputs may map to null, indicating that the function is undefined for that input. */ public static <S, T> Map<S, T> applyAlignment(Map<S, T> alignmentMap, Map<T, S> identity, int k) { // This implementation simply applies the map k times. // If k were large, it would be more efficient to do this recursively, // (eg f^4 = (f^2)^2) but k will usually be small. if (k < 0) throw new IllegalArgumentException("k must be positive"); if (k == 1) { return new HashMap<S, T>(alignmentMap); } // Convert to lists to establish a fixed order List<S> preimage = new ArrayList<S>(alignmentMap.keySet()); // currently unmodified List<S> image = new ArrayList<S>(preimage); for (int n = 1; n < k; n++) { // apply alignment for (int i = 0; i < image.size(); i++) { S pre = image.get(i); T intermediate = (pre == null ? null : alignmentMap.get(pre)); S post = (intermediate == null ? null : identity.get(intermediate)); image.set(i, post); } } Map<S, T> imageMap = new HashMap<S, T>(alignmentMap.size()); // TODO handle nulls consistently. // assure that all the residues in the domain are valid keys /* for(int i=0;i<preimage.size();i++) { S pre = preimage.get(i); T intermediate = (pre==null?null: alignmentMap.get(pre)); S post = (intermediate==null?null: identity.get(intermediate)); imageMap.put(post, null); } */ // now populate with actual values for (int i = 0; i < preimage.size(); i++) { S pre = preimage.get(i); // image is currently f^k-1(x), so take the final step S preK1 = image.get(i); T postK = (preK1 == null ? null : alignmentMap.get(preK1)); imageMap.put(pre, postK); } return imageMap; }
/** * Creates a Map specifying the alignment as a mapping between residue indices of protein 1 and * residue indices of protein 2. * * <p>For example, * * <pre> * 1234 * 5678</pre> * * becomes * * <pre> * 1->5 * 2->6 * 3->7 * 4->8</pre> * * @param afpChain An alignment * @return A mapping from aligned residues of protein 1 to their partners in protein 2. * @throws StructureException If afpChain is not one-to-one */ public static Map<Integer, Integer> alignmentAsMap(AFPChain afpChain) throws StructureException { Map<Integer, Integer> map = new HashMap<Integer, Integer>(); if (afpChain.getAlnLength() < 1) { return map; } int[][][] optAln = afpChain.getOptAln(); int[] optLen = afpChain.getOptLen(); for (int block = 0; block < afpChain.getBlockNum(); block++) { for (int pos = 0; pos < optLen[block]; pos++) { int res1 = optAln[block][0][pos]; int res2 = optAln[block][1][pos]; if (map.containsKey(res1)) { throw new StructureException( String.format("Residue %d aligned to both %d and %d.", res1, map.get(res1), res2)); } map.put(res1, res2); } } return map; }
/** * Identify a set of modifications in a a list of chains. * * @param chains query {@link Chain}s. * @param potentialModifications query {@link ProteinModification}s. */ public void identify( final List<Chain> chains, final Set<ProteinModification> potentialModifications) { if (chains == null) { throw new IllegalArgumentException("Null structure."); } if (potentialModifications == null) { throw new IllegalArgumentException("Null potentialModifications."); } reset(); if (potentialModifications.isEmpty()) { return; } Map<String, Chain> mapChainIdChain = new HashMap<String, Chain>(chains.size()); residues = new ArrayList<Group>(); List<Group> ligands = new ArrayList<Group>(); Map<Component, Set<Group>> mapCompGroups = new HashMap<Component, Set<Group>>(); for (Chain chain : chains) { mapChainIdChain.put(chain.getChainID(), chain); List<Group> ress = StructureUtil.getAminoAcids(chain); // List<Group> ligs = chain.getAtomLigands(); List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups()); residues.addAll(ress); residues.removeAll(ligs); ligands.addAll(ligs); addModificationGroups(potentialModifications, ress, ligs, mapCompGroups); } if (residues.isEmpty()) { String pdbId = "?"; if (chains.size() > 0) { Structure struc = chains.get(0).getParent(); if (struc != null) pdbId = struc.getPDBCode(); } logger.warn( "No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.", pdbId); } List<ModifiedCompound> modComps = new ArrayList<ModifiedCompound>(); for (ProteinModification mod : potentialModifications) { ModificationCondition condition = mod.getCondition(); List<Component> components = condition.getComponents(); if (!mapCompGroups.keySet().containsAll(components)) { // not all components exist for this mod. continue; } int sizeComps = components.size(); if (sizeComps == 1) { processCrosslink1(mapCompGroups, modComps, mod, components); } else { processMultiCrosslink(mapCompGroups, modComps, mod, condition); } } if (recordAdditionalAttachments) { // identify additional groups that are not directly attached to amino acids. for (ModifiedCompound mc : modComps) { identifyAdditionalAttachments(mc, ligands, mapChainIdChain); } } mergeModComps(modComps); identifiedModifiedCompounds.addAll(modComps); // record unidentifiable linkage if (recordUnidentifiableModifiedCompounds) { recordUnidentifiableAtomLinkages(modComps, ligands); recordUnidentifiableModifiedResidues(modComps); } }