/** * identify additional groups that are not directly attached to amino acids. * * @param mc {@link ModifiedCompound}. * @param chain a {@link Chain}. * @return a list of added groups. */ private void identifyAdditionalAttachments( ModifiedCompound mc, List<Group> ligands, Map<String, Chain> mapChainIdChain) { if (ligands.isEmpty()) { return; } // TODO: should the additional groups only be allowed to the identified // ligands or both amino acids and ligands? Currently only on ligands // ligands to amino acid bonds for same modification of unknown category // will be combined in mergeModComps() // TODO: how about chain-chain links? List<Group> identifiedGroups = new ArrayList<Group>(); for (StructureGroup num : mc.getGroups(false)) { Group group; try { // String numIns = "" + num.getResidueNumber(); // if (num.getInsCode() != null) { // numIns += num.getInsCode(); // } ResidueNumber resNum = new ResidueNumber(); resNum.setChainId(num.getChainId()); resNum.setSeqNum(num.getResidueNumber()); resNum.setInsCode(num.getInsCode()); // group = chain.getGroupByPDB(numIns); group = mapChainIdChain.get(num.getChainId()).getGroupByPDB(resNum); } catch (StructureException e) { logger.error("Exception: ", e); // should not happen continue; } identifiedGroups.add(group); } int start = 0; int n = identifiedGroups.size(); while (n > start) { for (Group group1 : ligands) { for (int i = start; i < n; i++) { Group group2 = identifiedGroups.get(i); if (!identifiedGroups.contains(group1)) { List<Atom[]> linkedAtoms = StructureUtil.findAtomLinkages(group1, group2, false, bondLengthTolerance); if (!linkedAtoms.isEmpty()) { for (Atom[] atoms : linkedAtoms) { mc.addAtomLinkage( StructureUtil.getStructureAtomLinkage(atoms[0], false, atoms[1], false)); } identifiedGroups.add(group1); break; } } } } start = n; n = identifiedGroups.size(); } }
/** * Record unidentifiable atom linkages in a chain. Only linkages between two residues or one * residue and one ligand will be recorded. */ private void recordUnidentifiableAtomLinkages( List<ModifiedCompound> modComps, List<Group> ligands) { // first put identified linkages in a map for fast query Set<StructureAtomLinkage> identifiedLinkages = new HashSet<StructureAtomLinkage>(); for (ModifiedCompound mc : modComps) { identifiedLinkages.addAll(mc.getAtomLinkages()); } // record // cross link int nRes = residues.size(); for (int i = 0; i < nRes - 1; i++) { Group group1 = residues.get(i); for (int j = i + 1; j < nRes; j++) { Group group2 = residues.get(j); List<Atom[]> linkages = StructureUtil.findAtomLinkages(group1, group2, true, bondLengthTolerance); for (Atom[] atoms : linkages) { StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0], true, atoms[1], true); unidentifiableAtomLinkages.add(link); } } } // attachment int nLig = ligands.size(); for (int i = 0; i < nRes; i++) { Group group1 = residues.get(i); for (int j = 0; j < nLig; j++) { Group group2 = ligands.get(j); if (group1.equals(group2)) { // overlap between residues and ligands continue; } List<Atom[]> linkages = StructureUtil.findAtomLinkages(group1, group2, false, bondLengthTolerance); for (Atom[] atoms : linkages) { StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0], true, atoms[1], false); unidentifiableAtomLinkages.add(link); } } } }
/** * Assembly the matched linkages. * * @param matchedAtomsOfLinkages * @param mod * @param condition * @param ret ModifiedCompound will be stored here. */ private void assembleLinkages( List<List<Atom[]>> matchedAtomsOfLinkages, ProteinModification mod, List<ModifiedCompound> ret) { ModificationCondition condition = mod.getCondition(); List<ModificationLinkage> modLinks = condition.getLinkages(); int nLink = matchedAtomsOfLinkages.size(); int[] indices = new int[nLink]; Set<ModifiedCompound> identifiedCompounds = new HashSet<ModifiedCompound>(); while (indices[0] < matchedAtomsOfLinkages.get(0).size()) { List<Atom[]> atomLinkages = new ArrayList<Atom[]>(nLink); for (int iLink = 0; iLink < nLink; iLink++) { Atom[] atoms = matchedAtomsOfLinkages.get(iLink).get(indices[iLink]); atomLinkages.add(atoms); } if (matchLinkages(modLinks, atomLinkages)) { // matched int n = atomLinkages.size(); List<StructureAtomLinkage> linkages = new ArrayList<StructureAtomLinkage>(n); for (int i = 0; i < n; i++) { Atom[] linkage = atomLinkages.get(i); StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage( linkage[0], residues.contains(linkage[0].getGroup()), linkage[1], residues.contains(linkage[1].getGroup())); linkages.add(link); } ModifiedCompound mc = new ModifiedCompoundImpl(mod, linkages); if (!identifiedCompounds.contains(mc)) { ret.add(mc); identifiedCompounds.add(mc); } } // indices++ (e.g. [0,0,1]=>[0,0,2]=>[1,2,0]) int i = nLink - 1; while (i >= 0) { if (i == 0 || indices[i] < matchedAtomsOfLinkages.get(i).size() - 1) { indices[i]++; break; } else { indices[i] = 0; i--; } } } }
private void recordUnidentifiableModifiedResidues(List<ModifiedCompound> modComps) { Set<StructureGroup> identifiedComps = new HashSet<StructureGroup>(); for (ModifiedCompound mc : modComps) { identifiedComps.addAll(mc.getGroups(true)); } // TODO: use the ModifiedAminoAcid after Andreas add that. for (Group group : residues) { if (group.getType().equals(GroupType.HETATM)) { StructureGroup strucGroup = StructureUtil.getStructureGroup(group, true); if (!identifiedComps.contains(strucGroup)) { unidentifiableModifiedResidues.add(strucGroup); } } } }
private void processCrosslink1( Map<Component, Set<Group>> mapCompGroups, List<ModifiedCompound> modComps, ProteinModification mod, List<Component> components) { // modified residue // TODO: is this the correct logic for CROSS_LINK_1? Set<Group> modifiedResidues = mapCompGroups.get(components.get(0)); if (modifiedResidues != null) { for (Group residue : modifiedResidues) { StructureGroup strucGroup = StructureUtil.getStructureGroup(residue, true); ModifiedCompound modRes = new ModifiedCompoundImpl(mod, strucGroup); modComps.add(modRes); } } }
/** Get matched atoms for all linkages. */ private List<List<Atom[]>> getMatchedAtomsOfLinkages( ModificationCondition condition, Map<Component, Set<Group>> mapCompGroups) { List<ModificationLinkage> linkages = condition.getLinkages(); int nLink = linkages.size(); List<List<Atom[]>> matchedAtomsOfLinkages = new ArrayList<List<Atom[]>>(nLink); for (int iLink = 0; iLink < nLink; iLink++) { ModificationLinkage linkage = linkages.get(iLink); Component comp1 = linkage.getComponent1(); Component comp2 = linkage.getComponent2(); // boolean isAA1 = comp1.; // boolean isAA2 = comp2.getType()==true; Set<Group> groups1 = mapCompGroups.get(comp1); Set<Group> groups2 = mapCompGroups.get(comp2); List<Atom[]> list = new ArrayList<Atom[]>(); List<String> potentialNamesOfAtomOnGroup1 = linkage.getPDBNameOfPotentialAtomsOnComponent1(); for (String name : potentialNamesOfAtomOnGroup1) { if (name.equals("*")) { // wildcard potentialNamesOfAtomOnGroup1 = null; // search all atoms break; } } List<String> potentialNamesOfAtomOnGroup2 = linkage.getPDBNameOfPotentialAtomsOnComponent2(); for (String name : potentialNamesOfAtomOnGroup2) { if (name.equals("*")) { // wildcard potentialNamesOfAtomOnGroup2 = null; // search all atoms break; } } for (Group g1 : groups1) { for (Group g2 : groups2) { if (g1.equals(g2)) { continue; } // only for wildcard match of two residues boolean ignoreNCLinkage = potentialNamesOfAtomOnGroup1 == null && potentialNamesOfAtomOnGroup2 == null && residues.contains(g1) && residues.contains(g2); Atom[] atoms = StructureUtil.findNearestAtomLinkage( g1, g2, potentialNamesOfAtomOnGroup1, potentialNamesOfAtomOnGroup2, ignoreNCLinkage, bondLengthTolerance); if (atoms != null) { list.add(atoms); } } } if (list.isEmpty()) { // broken linkage break; } matchedAtomsOfLinkages.add(list); } return matchedAtomsOfLinkages; }
/** * Identify a set of modifications in a a list of chains. * * @param chains query {@link Chain}s. * @param potentialModifications query {@link ProteinModification}s. */ public void identify( final List<Chain> chains, final Set<ProteinModification> potentialModifications) { if (chains == null) { throw new IllegalArgumentException("Null structure."); } if (potentialModifications == null) { throw new IllegalArgumentException("Null potentialModifications."); } reset(); if (potentialModifications.isEmpty()) { return; } Map<String, Chain> mapChainIdChain = new HashMap<String, Chain>(chains.size()); residues = new ArrayList<Group>(); List<Group> ligands = new ArrayList<Group>(); Map<Component, Set<Group>> mapCompGroups = new HashMap<Component, Set<Group>>(); for (Chain chain : chains) { mapChainIdChain.put(chain.getChainID(), chain); List<Group> ress = StructureUtil.getAminoAcids(chain); // List<Group> ligs = chain.getAtomLigands(); List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups()); residues.addAll(ress); residues.removeAll(ligs); ligands.addAll(ligs); addModificationGroups(potentialModifications, ress, ligs, mapCompGroups); } if (residues.isEmpty()) { String pdbId = "?"; if (chains.size() > 0) { Structure struc = chains.get(0).getParent(); if (struc != null) pdbId = struc.getPDBCode(); } logger.warn( "No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.", pdbId); } List<ModifiedCompound> modComps = new ArrayList<ModifiedCompound>(); for (ProteinModification mod : potentialModifications) { ModificationCondition condition = mod.getCondition(); List<Component> components = condition.getComponents(); if (!mapCompGroups.keySet().containsAll(components)) { // not all components exist for this mod. continue; } int sizeComps = components.size(); if (sizeComps == 1) { processCrosslink1(mapCompGroups, modComps, mod, components); } else { processMultiCrosslink(mapCompGroups, modComps, mod, condition); } } if (recordAdditionalAttachments) { // identify additional groups that are not directly attached to amino acids. for (ModifiedCompound mc : modComps) { identifyAdditionalAttachments(mc, ligands, mapChainIdChain); } } mergeModComps(modComps); identifiedModifiedCompounds.addAll(modComps); // record unidentifiable linkage if (recordUnidentifiableModifiedCompounds) { recordUnidentifiableAtomLinkages(modComps, ligands); recordUnidentifiableModifiedResidues(modComps); } }