/** * Print an alignment map in a concise representation. Edges are given as two numbers separated by * '>'. They are chained together where possible, or separated by spaces where disjoint or * branched. * * <p>Note that more concise representations may be possible. Examples: * <li>1>2>3>1 * <li>1>2>3>2 4>3 * * @param alignment The input function, as a map (see {@link * AlignmentTools#alignmentAsMap(AFPChain)}) * @param identity An identity-like function providing the isomorphism between the codomain of * alignment (of type <T>) and the domain (type <S>). * @return */ public static <S, T> String toConciseAlignmentString(Map<S, T> alignment, Map<T, S> identity) { // Clone input to prevent changes Map<S, T> alig = new HashMap<S, T>(alignment); // Generate inverse alignment Map<S, List<S>> inverse = new HashMap<S, List<S>>(); for (Entry<S, T> e : alig.entrySet()) { S val = identity.get(e.getValue()); if (inverse.containsKey(val)) { List<S> l = inverse.get(val); l.add(e.getKey()); } else { List<S> l = new ArrayList<S>(); l.add(e.getKey()); inverse.put(val, l); } } StringBuilder str = new StringBuilder(); while (!alig.isEmpty()) { // Pick an edge and work upstream to a root or cycle S seedNode = alig.keySet().iterator().next(); S node = seedNode; if (inverse.containsKey(seedNode)) { node = inverse.get(seedNode).iterator().next(); while (node != seedNode && inverse.containsKey(node)) { node = inverse.get(node).iterator().next(); } } // Now work downstream, deleting edges as we go seedNode = node; str.append(node); while (alig.containsKey(node)) { S lastNode = node; node = identity.get(alig.get(lastNode)); // Output str.append('>'); str.append(node); // Remove edge alig.remove(lastNode); List<S> inv = inverse.get(node); if (inv.size() > 1) { inv.remove(node); } else { inverse.remove(node); } } if (!alig.isEmpty()) { str.append(' '); } } return str.toString(); }
/** * Takes a protein sequence string with capital and lowercase letters and sets its {@link * ProteinSequence#getUserCollection() user collection} to record which letters are uppercase * (aligned) and which are lowercase (unaligned). * * @param sequence Make sure <em>not</em> to use {@link ProteinSequence#getSequenceAsString()} for * this, as it won't preserve upper- and lower-case */ public static List<Object> getAlignedUserCollection(String sequence) { List<Object> aligned = new ArrayList<Object>(sequence.length()); for (char c : sequence.toCharArray()) { aligned.add(Character.isUpperCase(c)); } return aligned; }
/** * identify additional groups that are not directly attached to amino acids. * * @param mc {@link ModifiedCompound}. * @param chain a {@link Chain}. * @return a list of added groups. */ private void identifyAdditionalAttachments( ModifiedCompound mc, List<Group> ligands, Map<String, Chain> mapChainIdChain) { if (ligands.isEmpty()) { return; } // TODO: should the additional groups only be allowed to the identified // ligands or both amino acids and ligands? Currently only on ligands // ligands to amino acid bonds for same modification of unknown category // will be combined in mergeModComps() // TODO: how about chain-chain links? List<Group> identifiedGroups = new ArrayList<Group>(); for (StructureGroup num : mc.getGroups(false)) { Group group; try { // String numIns = "" + num.getResidueNumber(); // if (num.getInsCode() != null) { // numIns += num.getInsCode(); // } ResidueNumber resNum = new ResidueNumber(); resNum.setChainId(num.getChainId()); resNum.setSeqNum(num.getResidueNumber()); resNum.setInsCode(num.getInsCode()); // group = chain.getGroupByPDB(numIns); group = mapChainIdChain.get(num.getChainId()).getGroupByPDB(resNum); } catch (StructureException e) { logger.error("Exception: ", e); // should not happen continue; } identifiedGroups.add(group); } int start = 0; int n = identifiedGroups.size(); while (n > start) { for (Group group1 : ligands) { for (int i = start; i < n; i++) { Group group2 = identifiedGroups.get(i); if (!identifiedGroups.contains(group1)) { List<Atom[]> linkedAtoms = StructureUtil.findAtomLinkages(group1, group2, false, bondLengthTolerance); if (!linkedAtoms.isEmpty()) { for (Atom[] atoms : linkedAtoms) { mc.addAtomLinkage( StructureUtil.getStructureAtomLinkage(atoms[0], false, atoms[1], false)); } identifiedGroups.add(group1); break; } } } } start = n; n = identifiedGroups.size(); } }
/** * Takes an AFPChain and replaces the optimal alignment based on an alignment map * * <p>Parameters are filled with defaults (often null) or sometimes calculated. * * <p>For a way to create a new AFPChain, see {@link AlignmentTools#createAFPChain(Atom[], Atom[], * ResidueNumber[], ResidueNumber[])} * * @param afpChain The alignment to be modified * @param alignment The new alignment, as a Map * @throws StructureException if an error occurred during superposition * @see AlignmentTools#createAFPChain(Atom[], Atom[], ResidueNumber[], ResidueNumber[]) */ public static AFPChain replaceOptAln( AFPChain afpChain, Atom[] ca1, Atom[] ca2, Map<Integer, Integer> alignment) throws StructureException { // Determine block lengths // Sort ca1 indices, then start a new block whenever ca2 indices aren't // increasing monotonically. Integer[] res1 = alignment.keySet().toArray(new Integer[0]); Arrays.sort(res1); List<Integer> blockLens = new ArrayList<Integer>(2); int optLength = 0; Integer lastRes = alignment.get(res1[0]); int blkLen = lastRes == null ? 0 : 1; for (int i = 1; i < res1.length; i++) { Integer currRes = alignment.get(res1[i]); // res2 index assert (currRes != null); // could be converted to if statement if assertion doesn't hold; just modify // below as well. if (lastRes < currRes) { blkLen++; } else { // CP! blockLens.add(blkLen); optLength += blkLen; blkLen = 1; } lastRes = currRes; } blockLens.add(blkLen); optLength += blkLen; // Create array structure for alignment int[][][] optAln = new int[blockLens.size()][][]; int pos1 = 0; // index into res1 for (int blk = 0; blk < blockLens.size(); blk++) { optAln[blk] = new int[2][]; blkLen = blockLens.get(blk); optAln[blk][0] = new int[blkLen]; optAln[blk][1] = new int[blkLen]; int pos = 0; // index into optAln while (pos < blkLen) { optAln[blk][0][pos] = res1[pos1]; Integer currRes = alignment.get(res1[pos1]); optAln[blk][1][pos] = currRes; pos++; pos1++; } } assert (pos1 == optLength); // Create length array int[] optLens = new int[blockLens.size()]; for (int i = 0; i < blockLens.size(); i++) { optLens[i] = blockLens.get(i); } return replaceOptAln(afpChain, ca1, ca2, blockLens.size(), optLens, optAln); }
/** * Builds an {@link AFPChain} from already-matched arrays of atoms and residues. * * @param ca1 An array of atoms in the first structure * @param ca2 An array of atoms in the second structure * @param residues1 An array of {@link ResidueNumber ResidueNumbers} in the first structure that * are aligned. Only null ResidueNumbers are considered to be unaligned * @param residues2 An array of {@link ResidueNumber ResidueNumbers} in the second structure that * are aligned. Only null ResidueNumbers are considered to be unaligned * @throws StructureException */ private static AFPChain buildAlignment( Atom[] ca1, Atom[] ca2, ResidueNumber[] residues1, ResidueNumber[] residues2) throws StructureException { // remove any gap // this includes the ones introduced by the nullifying above List<ResidueNumber> alignedResiduesList1 = new ArrayList<ResidueNumber>(); List<ResidueNumber> alignedResiduesList2 = new ArrayList<ResidueNumber>(); for (int i = 0; i < residues1.length; i++) { if (residues1[i] != null && residues2[i] != null) { alignedResiduesList1.add(residues1[i]); alignedResiduesList2.add(residues2[i]); } } ResidueNumber[] alignedResidues1 = alignedResiduesList1.toArray(new ResidueNumber[alignedResiduesList1.size()]); ResidueNumber[] alignedResidues2 = alignedResiduesList2.toArray(new ResidueNumber[alignedResiduesList2.size()]); AFPChain afpChain = AlignmentTools.createAFPChain(ca1, ca2, alignedResidues1, alignedResidues2); afpChain.setAlgorithmName("unknown"); AlignmentTools.updateSuperposition(afpChain, ca1, ca2); afpChain.setBlockSize(new int[] {afpChain.getNrEQR()}); afpChain.setBlockRmsd(new double[] {afpChain.getTotalRmsdOpt()}); afpChain.setBlockGap(new int[] {afpChain.getGapLen()}); return afpChain; }
private void processCrosslink1( Map<Component, Set<Group>> mapCompGroups, List<ModifiedCompound> modComps, ProteinModification mod, List<Component> components) { // modified residue // TODO: is this the correct logic for CROSS_LINK_1? Set<Group> modifiedResidues = mapCompGroups.get(components.get(0)); if (modifiedResidues != null) { for (Group residue : modifiedResidues) { StructureGroup strucGroup = StructureUtil.getStructureGroup(residue, true); ModifiedCompound modRes = new ModifiedCompoundImpl(mod, strucGroup); modComps.add(modRes); } } }
/** * Assembly the matched linkages. * * @param matchedAtomsOfLinkages * @param mod * @param condition * @param ret ModifiedCompound will be stored here. */ private void assembleLinkages( List<List<Atom[]>> matchedAtomsOfLinkages, ProteinModification mod, List<ModifiedCompound> ret) { ModificationCondition condition = mod.getCondition(); List<ModificationLinkage> modLinks = condition.getLinkages(); int nLink = matchedAtomsOfLinkages.size(); int[] indices = new int[nLink]; Set<ModifiedCompound> identifiedCompounds = new HashSet<ModifiedCompound>(); while (indices[0] < matchedAtomsOfLinkages.get(0).size()) { List<Atom[]> atomLinkages = new ArrayList<Atom[]>(nLink); for (int iLink = 0; iLink < nLink; iLink++) { Atom[] atoms = matchedAtomsOfLinkages.get(iLink).get(indices[iLink]); atomLinkages.add(atoms); } if (matchLinkages(modLinks, atomLinkages)) { // matched int n = atomLinkages.size(); List<StructureAtomLinkage> linkages = new ArrayList<StructureAtomLinkage>(n); for (int i = 0; i < n; i++) { Atom[] linkage = atomLinkages.get(i); StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage( linkage[0], residues.contains(linkage[0].getGroup()), linkage[1], residues.contains(linkage[1].getGroup())); linkages.add(link); } ModifiedCompound mc = new ModifiedCompoundImpl(mod, linkages); if (!identifiedCompounds.contains(mc)) { ret.add(mc); identifiedCompounds.add(mc); } } // indices++ (e.g. [0,0,1]=>[0,0,2]=>[1,2,0]) int i = nLink - 1; while (i >= 0) { if (i == 0 || indices[i] < matchedAtomsOfLinkages.get(i).size() - 1) { indices[i]++; break; } else { indices[i] = 0; i--; } } } }
/** * Retrieves the optimum alignment from an AFPChain and returns it as a java collection. The * result is indexed in the same way as {@link AFPChain#getOptAln()}, but has the correct size(). * * <pre> * List<List<List<Integer>>> aln = getOptAlnAsList(AFPChain afpChain); * aln.get(blockNum).get(structureNum={0,1}).get(pos)</pre> * * @param afpChain * @return */ public static List<List<List<Integer>>> getOptAlnAsList(AFPChain afpChain) { int[][][] optAln = afpChain.getOptAln(); int[] optLen = afpChain.getOptLen(); List<List<List<Integer>>> blocks = new ArrayList<List<List<Integer>>>(afpChain.getBlockNum()); for (int blockNum = 0; blockNum < afpChain.getBlockNum(); blockNum++) { // TODO could improve speed an memory by wrapping the arrays with // an unmodifiable list, similar to Arrays.asList(...) but with the // correct size parameter. List<Integer> align1 = new ArrayList<Integer>(optLen[blockNum]); List<Integer> align2 = new ArrayList<Integer>(optLen[blockNum]); for (int pos = 0; pos < optLen[blockNum]; pos++) { align1.add(optAln[blockNum][0][pos]); align2.add(optAln[blockNum][1][pos]); } List<List<Integer>> block = new ArrayList<List<Integer>>(2); block.add(align1); block.add(align2); blocks.add(block); } return blocks; }
/** Merge identified modified compounds if linked. */ private void mergeModComps(List<ModifiedCompound> modComps) { TreeSet<Integer> remove = new TreeSet<Integer>(); int n = modComps.size(); for (int icurr = 1; icurr < n; icurr++) { ModifiedCompound curr = modComps.get(icurr); String id = curr.getModification().getId(); if (ProteinModificationRegistry.getById(id).getCategory() != ModificationCategory.UNDEFINED) continue; // find linked compounds that before curr // List<Integer> merging = new ArrayList<Integer>(); int ipre = 0; for (; ipre < icurr; ipre++) { if (remove.contains(ipre)) continue; ModifiedCompound pre = modComps.get(ipre); if (!Collections.disjoint(pre.getGroups(false), curr.getGroups(false))) { break; } } if (ipre < icurr) { ModifiedCompound mcKeep = modComps.get(ipre); // merge modifications of the same type if (mcKeep.getModification().getId().equals(id)) { // merging the current one to the previous one mcKeep.addAtomLinkages(curr.getAtomLinkages()); remove.add(icurr); } } } Iterator<Integer> it = remove.descendingIterator(); while (it.hasNext()) { modComps.remove(it.next().intValue()); } }
/** * Applies an alignment k times. Eg if alignmentMap defines function f(x), this returns a function * f^k(x)=f(f(...f(x)...)). * * <p>To allow for functions with different domains and codomains, the identity function allows * converting back in a reasonable way. For instance, if alignmentMap represented an alignment * between two proteins with different numbering schemes, the identity function could calculate * the offset between residue numbers, eg I(x) = x-offset. * * <p>When an identity function is provided, the returned function calculates f^k(x) = f(I( f(I( * ... f(x) ... )) )). * * @param <S> * @param <T> * @param alignmentMap The input function, as a map (see {@link * AlignmentTools#alignmentAsMap(AFPChain)}) * @param identity An identity-like function providing the isomorphism between the codomain of * alignmentMap (of type <T>) and the domain (type <S>). * @param k The number of times to apply the alignment * @return A new alignment. If the input function is not automorphic (one-to-one), then some * inputs may map to null, indicating that the function is undefined for that input. */ public static <S, T> Map<S, T> applyAlignment(Map<S, T> alignmentMap, Map<T, S> identity, int k) { // This implementation simply applies the map k times. // If k were large, it would be more efficient to do this recursively, // (eg f^4 = (f^2)^2) but k will usually be small. if (k < 0) throw new IllegalArgumentException("k must be positive"); if (k == 1) { return new HashMap<S, T>(alignmentMap); } // Convert to lists to establish a fixed order List<S> preimage = new ArrayList<S>(alignmentMap.keySet()); // currently unmodified List<S> image = new ArrayList<S>(preimage); for (int n = 1; n < k; n++) { // apply alignment for (int i = 0; i < image.size(); i++) { S pre = image.get(i); T intermediate = (pre == null ? null : alignmentMap.get(pre)); S post = (intermediate == null ? null : identity.get(intermediate)); image.set(i, post); } } Map<S, T> imageMap = new HashMap<S, T>(alignmentMap.size()); // TODO handle nulls consistently. // assure that all the residues in the domain are valid keys /* for(int i=0;i<preimage.size();i++) { S pre = preimage.get(i); T intermediate = (pre==null?null: alignmentMap.get(pre)); S post = (intermediate==null?null: identity.get(intermediate)); imageMap.put(post, null); } */ // now populate with actual values for (int i = 0; i < preimage.size(); i++) { S pre = preimage.get(i); // image is currently f^k-1(x), so take the final step S preK1 = image.get(i); T postK = (preK1 == null ? null : alignmentMap.get(preK1)); imageMap.put(pre, postK); } return imageMap; }
/** * Record unidentifiable atom linkages in a chain. Only linkages between two residues or one * residue and one ligand will be recorded. */ private void recordUnidentifiableAtomLinkages( List<ModifiedCompound> modComps, List<Group> ligands) { // first put identified linkages in a map for fast query Set<StructureAtomLinkage> identifiedLinkages = new HashSet<StructureAtomLinkage>(); for (ModifiedCompound mc : modComps) { identifiedLinkages.addAll(mc.getAtomLinkages()); } // record // cross link int nRes = residues.size(); for (int i = 0; i < nRes - 1; i++) { Group group1 = residues.get(i); for (int j = i + 1; j < nRes; j++) { Group group2 = residues.get(j); List<Atom[]> linkages = StructureUtil.findAtomLinkages(group1, group2, true, bondLengthTolerance); for (Atom[] atoms : linkages) { StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0], true, atoms[1], true); unidentifiableAtomLinkages.add(link); } } } // attachment int nLig = ligands.size(); for (int i = 0; i < nRes; i++) { Group group1 = residues.get(i); for (int j = 0; j < nLig; j++) { Group group2 = ligands.get(j); if (group1.equals(group2)) { // overlap between residues and ligands continue; } List<Atom[]> linkages = StructureUtil.findAtomLinkages(group1, group2, false, bondLengthTolerance); for (Atom[] atoms : linkages) { StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0], true, atoms[1], false); unidentifiableAtomLinkages.add(link); } } } }
/** * @param linkages * @param atomLinkages * @return true if atomLinkages satisfy the condition; false, otherwise. */ private boolean matchLinkages(List<ModificationLinkage> linkages, List<Atom[]> atomLinkages) { int nLink = linkages.size(); if (nLink != atomLinkages.size()) { return false; } for (int i = 0; i < nLink - 1; i++) { ModificationLinkage link1 = linkages.get(i); Atom[] atoms1 = atomLinkages.get(i); for (int j = i + 1; j < nLink; j++) { ModificationLinkage link2 = linkages.get(j); Atom[] atoms2 = atomLinkages.get(j); // check components if (((link1.getIndexOfComponent1() == link2.getIndexOfComponent1()) != (atoms1[0].getGroup().equals(atoms2[0].getGroup()))) || ((link1.getIndexOfComponent1() == link2.getIndexOfComponent2()) != (atoms1[0].getGroup().equals(atoms2[1].getGroup()))) || ((link1.getIndexOfComponent2() == link2.getIndexOfComponent1()) != (atoms1[1].getGroup().equals(atoms2[0].getGroup()))) || ((link1.getIndexOfComponent2() == link2.getIndexOfComponent2()) != (atoms1[1].getGroup().equals(atoms2[1].getGroup())))) { return false; } // check atoms String label11 = link1.getLabelOfAtomOnComponent1(); String label12 = link1.getLabelOfAtomOnComponent2(); String label21 = link2.getLabelOfAtomOnComponent1(); String label22 = link2.getLabelOfAtomOnComponent2(); if ((label11 != null && label21 != null && label11.equals(label21)) != (atoms1[0].equals(atoms2[0])) || (label11 != null && label22 != null && label11.equals(label22)) != (atoms1[0].equals(atoms2[1])) || (label12 != null && label21 != null && label12.equals(label21)) != (atoms1[1].equals(atoms2[0])) || (label12 != null && label22 != null && label12.equals(label22)) != (atoms1[1].equals(atoms2[1]))) { return false; } } } return true; }
/** * Uses two sequences each with a corresponding structure to create an AFPChain corresponding to * the alignment. Provided only for convenience since FastaReaders return such maps. * * @param sequences A Map containing exactly two entries from sequence names as Strings to gapped * ProteinSequences; the name is ignored * @see #fastaToAfpChain(ProteinSequence, ProteinSequence, Structure, Structure) * @throws StructureException */ public static AFPChain fastaToAfpChain( Map<String, ProteinSequence> sequences, Structure structure1, Structure structure2) throws StructureException { if (sequences.size() != 2) { throw new IllegalArgumentException( "There must be exactly 2 sequences, but there were " + sequences.size()); } if (structure1 == null || structure2 == null) { throw new IllegalArgumentException("A structure is null"); } List<ProteinSequence> seqs = new ArrayList<ProteinSequence>(); List<String> names = new ArrayList<String>(2); for (Map.Entry<String, ProteinSequence> entry : sequences.entrySet()) { seqs.add(entry.getValue()); names.add(entry.getKey()); } return fastaToAfpChain(seqs.get(0), seqs.get(1), structure1, structure2); }
/** * Identify a set of modifications in a a list of chains. * * @param chains query {@link Chain}s. * @param potentialModifications query {@link ProteinModification}s. */ public void identify( final List<Chain> chains, final Set<ProteinModification> potentialModifications) { if (chains == null) { throw new IllegalArgumentException("Null structure."); } if (potentialModifications == null) { throw new IllegalArgumentException("Null potentialModifications."); } reset(); if (potentialModifications.isEmpty()) { return; } Map<String, Chain> mapChainIdChain = new HashMap<String, Chain>(chains.size()); residues = new ArrayList<Group>(); List<Group> ligands = new ArrayList<Group>(); Map<Component, Set<Group>> mapCompGroups = new HashMap<Component, Set<Group>>(); for (Chain chain : chains) { mapChainIdChain.put(chain.getChainID(), chain); List<Group> ress = StructureUtil.getAminoAcids(chain); // List<Group> ligs = chain.getAtomLigands(); List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups()); residues.addAll(ress); residues.removeAll(ligs); ligands.addAll(ligs); addModificationGroups(potentialModifications, ress, ligs, mapCompGroups); } if (residues.isEmpty()) { String pdbId = "?"; if (chains.size() > 0) { Structure struc = chains.get(0).getParent(); if (struc != null) pdbId = struc.getPDBCode(); } logger.warn( "No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.", pdbId); } List<ModifiedCompound> modComps = new ArrayList<ModifiedCompound>(); for (ProteinModification mod : potentialModifications) { ModificationCondition condition = mod.getCondition(); List<Component> components = condition.getComponents(); if (!mapCompGroups.keySet().containsAll(components)) { // not all components exist for this mod. continue; } int sizeComps = components.size(); if (sizeComps == 1) { processCrosslink1(mapCompGroups, modComps, mod, components); } else { processMultiCrosslink(mapCompGroups, modComps, mod, condition); } } if (recordAdditionalAttachments) { // identify additional groups that are not directly attached to amino acids. for (ModifiedCompound mc : modComps) { identifyAdditionalAttachments(mc, ligands, mapChainIdChain); } } mergeModComps(modComps); identifiedModifiedCompounds.addAll(modComps); // record unidentifiable linkage if (recordUnidentifiableModifiedCompounds) { recordUnidentifiableAtomLinkages(modComps, ligands); recordUnidentifiableModifiedResidues(modComps); } }
/** * @param modifications a set of {@link ProteinModification}s. * @param residues * @param ligands * @param saveTo save result to * @return map from component to list of corresponding residues in the chain. */ private void addModificationGroups( final Set<ProteinModification> modifications, final List<Group> residues, final List<Group> ligands, final Map<Component, Set<Group>> saveTo) { if (residues == null || ligands == null || modifications == null) { throw new IllegalArgumentException("Null argument(s)."); } Map<Component, Set<Component>> mapSingleMultiComps = new HashMap<Component, Set<Component>>(); for (ProteinModification mod : modifications) { ModificationCondition condition = mod.getCondition(); for (Component comp : condition.getComponents()) { for (String pdbccId : comp.getPdbccIds()) { Component single = Component.of(Collections.singleton(pdbccId), comp.isNTerminal(), comp.isCTerminal()); Set<Component> mult = mapSingleMultiComps.get(single); if (mult == null) { mult = new HashSet<Component>(); mapSingleMultiComps.put(single, mult); } mult.add(comp); } } } { // ligands Set<Component> ligandsWildCard = mapSingleMultiComps.get(Component.of("*")); for (Group group : ligands) { String pdbccId = group.getPDBName().trim(); Set<Component> comps = mapSingleMultiComps.get(Component.of(pdbccId)); for (Component comp : unionComponentSet(ligandsWildCard, comps)) { Set<Group> gs = saveTo.get(comp); if (gs == null) { gs = new LinkedHashSet<Group>(); saveTo.put(comp, gs); } gs.add(group); } } } { // residues if (residues.isEmpty()) { return; } Set<Component> residuesWildCard = mapSingleMultiComps.get(Component.of("*")); // for all residues for (Group group : residues) { String pdbccId = group.getPDBName().trim(); Set<Component> comps = mapSingleMultiComps.get(Component.of(pdbccId)); for (Component comp : unionComponentSet(residuesWildCard, comps)) { Set<Group> gs = saveTo.get(comp); if (gs == null) { gs = new LinkedHashSet<Group>(); saveTo.put(comp, gs); } gs.add(group); } } // for N-terminal int nRes = residues.size(); int iRes = 0; Group res; do { // for all ligands on N terminal and the first residue res = residues.get(iRes++); Set<Component> nTermWildCard = mapSingleMultiComps.get(Component.of("*", true, false)); Set<Component> comps = mapSingleMultiComps.get(Component.of(res.getPDBName(), true, false)); for (Component comp : unionComponentSet(nTermWildCard, comps)) { Set<Group> gs = saveTo.get(comp); if (gs == null) { gs = new LinkedHashSet<Group>(); saveTo.put(comp, gs); } gs.add(res); } } while (iRes < nRes && ligands.contains(res)); // for C-terminal iRes = residues.size() - 1; do { // for all ligands on C terminal and the last residue res = residues.get(iRes--); Set<Component> cTermWildCard = mapSingleMultiComps.get(Component.of("*", false, true)); Set<Component> comps = mapSingleMultiComps.get(Component.of(res.getPDBName(), false, true)); for (Component comp : unionComponentSet(cTermWildCard, comps)) { Set<Group> gs = saveTo.get(comp); if (gs == null) { gs = new LinkedHashSet<Group>(); saveTo.put(comp, gs); } gs.add(res); } } while (iRes >= 0 && ligands.contains(res)); } }
/** Get matched atoms for all linkages. */ private List<List<Atom[]>> getMatchedAtomsOfLinkages( ModificationCondition condition, Map<Component, Set<Group>> mapCompGroups) { List<ModificationLinkage> linkages = condition.getLinkages(); int nLink = linkages.size(); List<List<Atom[]>> matchedAtomsOfLinkages = new ArrayList<List<Atom[]>>(nLink); for (int iLink = 0; iLink < nLink; iLink++) { ModificationLinkage linkage = linkages.get(iLink); Component comp1 = linkage.getComponent1(); Component comp2 = linkage.getComponent2(); // boolean isAA1 = comp1.; // boolean isAA2 = comp2.getType()==true; Set<Group> groups1 = mapCompGroups.get(comp1); Set<Group> groups2 = mapCompGroups.get(comp2); List<Atom[]> list = new ArrayList<Atom[]>(); List<String> potentialNamesOfAtomOnGroup1 = linkage.getPDBNameOfPotentialAtomsOnComponent1(); for (String name : potentialNamesOfAtomOnGroup1) { if (name.equals("*")) { // wildcard potentialNamesOfAtomOnGroup1 = null; // search all atoms break; } } List<String> potentialNamesOfAtomOnGroup2 = linkage.getPDBNameOfPotentialAtomsOnComponent2(); for (String name : potentialNamesOfAtomOnGroup2) { if (name.equals("*")) { // wildcard potentialNamesOfAtomOnGroup2 = null; // search all atoms break; } } for (Group g1 : groups1) { for (Group g2 : groups2) { if (g1.equals(g2)) { continue; } // only for wildcard match of two residues boolean ignoreNCLinkage = potentialNamesOfAtomOnGroup1 == null && potentialNamesOfAtomOnGroup2 == null && residues.contains(g1) && residues.contains(g2); Atom[] atoms = StructureUtil.findNearestAtomLinkage( g1, g2, potentialNamesOfAtomOnGroup1, potentialNamesOfAtomOnGroup2, ignoreNCLinkage, bondLengthTolerance); if (atoms != null) { list.add(atoms); } } } if (list.isEmpty()) { // broken linkage break; } matchedAtomsOfLinkages.add(list); } return matchedAtomsOfLinkages; }
/** * @param a * @param ca1 * @param ca2 * @return * @throws StructureException if an error occurred during superposition */ public static AFPChain splitBlocksByTopology(AFPChain a, Atom[] ca1, Atom[] ca2) throws StructureException { int[][][] optAln = a.getOptAln(); int blockNum = a.getBlockNum(); int[] optLen = a.getOptLen(); // Determine block lengths // Split blocks if residue indices don't increase monotonically List<Integer> newBlkLen = new ArrayList<Integer>(); boolean blockChanged = false; for (int blk = 0; blk < blockNum; blk++) { int currLen = 1; for (int pos = 1; pos < optLen[blk]; pos++) { if (optAln[blk][0][pos] <= optAln[blk][0][pos - 1] || optAln[blk][1][pos] <= optAln[blk][1][pos - 1]) { // start a new block newBlkLen.add(currLen); currLen = 0; blockChanged = true; } currLen++; } if (optLen[blk] < 2) { newBlkLen.add(optLen[blk]); } else { newBlkLen.add(currLen); } } // Check if anything needs to be split if (!blockChanged) { return a; } // Split blocks List<int[][]> blocks = new ArrayList<int[][]>(newBlkLen.size()); int oldBlk = 0; int pos = 0; for (int blkLen : newBlkLen) { if (blkLen == optLen[oldBlk]) { assert (pos == 0); // should be the whole block // Use the old block blocks.add(optAln[oldBlk]); } else { int[][] newBlock = new int[2][blkLen]; assert (pos + blkLen <= optLen[oldBlk]); // don't overrun block for (int i = 0; i < blkLen; i++) { newBlock[0][i] = optAln[oldBlk][0][pos + i]; newBlock[1][i] = optAln[oldBlk][1][pos + i]; } pos += blkLen; blocks.add(newBlock); if (pos == optLen[oldBlk]) { // Finished this oldBlk, start the next oldBlk++; pos = 0; } } } // Store new blocks int[][][] newOptAln = blocks.toArray(new int[0][][]); int[] newBlockLens = new int[newBlkLen.size()]; for (int i = 0; i < newBlkLen.size(); i++) { newBlockLens[i] = newBlkLen.get(i); } return replaceOptAln(a, ca1, ca2, blocks.size(), newBlockLens, newOptAln); }
/** * Tries to detect symmetry in an alignment. * * <p>Conceptually, an alignment is a function f:A->B between two sets of integers. The function * may have simple topology (meaning that if two elements of A are close, then their images in B * will also be close), or may have more complex topology (such as a circular permutation). This * function checks <i>alignment</i> against a reference function <i>identity</i>, which should * have simple topology. It then tries to determine the symmetry order of <i>alignment</i> * relative to <i>identity</i>, up to a maximum order of <i>maxSymmetry</i>. * * <p><strong>Details</strong><br> * Considers the offset (in number of residues) which a residue moves after undergoing <i>n</i> * alternating transforms by alignment and identity. If <i>n</i> corresponds to the intrinsic * order of the alignment, this will be small. This algorithm tries increasing values of <i>n</i> * and looks for abrupt decreases in the root mean squared offset. If none are found at * <i>n</i><=maxSymmetry, the alignment is reported as non-symmetric. * * @param alignment The alignment to test for symmetry * @param identity An alignment with simple topology which approximates the sequential * relationship between the two proteins. Should map in the reverse direction from alignment. * @param maxSymmetry Maximum symmetry to consider. High values increase the calculation time and * can lead to overfitting. * @param minimumMetricChange Percent decrease in root mean squared offsets in order to declare * symmetry. 0.4f seems to work well for CeSymm. * @return The order of symmetry of alignment, or 1 if no order <= maxSymmetry is found. * @see IdentityMap For a simple identity function */ public static int getSymmetryOrder( Map<Integer, Integer> alignment, Map<Integer, Integer> identity, final int maxSymmetry, final float minimumMetricChange) { List<Integer> preimage = new ArrayList<Integer>(alignment.keySet()); // currently unmodified List<Integer> image = new ArrayList<Integer>(preimage); int bestSymmetry = 1; double bestMetric = Double.POSITIVE_INFINITY; // lower is better boolean foundSymmetry = false; if (debug) { logger.trace("Symm\tPos\tDelta"); } for (int n = 1; n <= maxSymmetry; n++) { int deltasSq = 0; int numDeltas = 0; // apply alignment for (int i = 0; i < image.size(); i++) { Integer pre = image.get(i); Integer intermediate = (pre == null ? null : alignment.get(pre)); Integer post = (intermediate == null ? null : identity.get(intermediate)); image.set(i, post); if (post != null) { int delta = post - preimage.get(i); deltasSq += delta * delta; numDeltas++; if (debug) { logger.debug("%d\t%d\t%d\n", n, preimage.get(i), delta); } } } // Metrics: RMS compensates for the trend of smaller numDeltas with higher order // Not normalizing by numDeltas favors smaller orders double metric = Math.sqrt((double) deltasSq / numDeltas); // root mean squared distance if (!foundSymmetry && metric < bestMetric * minimumMetricChange) { // n = 1 is never the best symmetry if (bestMetric < Double.POSITIVE_INFINITY) { foundSymmetry = true; } bestSymmetry = n; bestMetric = metric; } // When debugging need to loop over everything. Unneeded in production if (!debug && foundSymmetry) { break; } } if (foundSymmetry) { return bestSymmetry; } else { return 1; } }