/** * identify additional groups that are not directly attached to amino acids. * * @param mc {@link ModifiedCompound}. * @param chain a {@link Chain}. * @return a list of added groups. */ private void identifyAdditionalAttachments( ModifiedCompound mc, List<Group> ligands, Map<String, Chain> mapChainIdChain) { if (ligands.isEmpty()) { return; } // TODO: should the additional groups only be allowed to the identified // ligands or both amino acids and ligands? Currently only on ligands // ligands to amino acid bonds for same modification of unknown category // will be combined in mergeModComps() // TODO: how about chain-chain links? List<Group> identifiedGroups = new ArrayList<Group>(); for (StructureGroup num : mc.getGroups(false)) { Group group; try { // String numIns = "" + num.getResidueNumber(); // if (num.getInsCode() != null) { // numIns += num.getInsCode(); // } ResidueNumber resNum = new ResidueNumber(); resNum.setChainId(num.getChainId()); resNum.setSeqNum(num.getResidueNumber()); resNum.setInsCode(num.getInsCode()); // group = chain.getGroupByPDB(numIns); group = mapChainIdChain.get(num.getChainId()).getGroupByPDB(resNum); } catch (StructureException e) { logger.error("Exception: ", e); // should not happen continue; } identifiedGroups.add(group); } int start = 0; int n = identifiedGroups.size(); while (n > start) { for (Group group1 : ligands) { for (int i = start; i < n; i++) { Group group2 = identifiedGroups.get(i); if (!identifiedGroups.contains(group1)) { List<Atom[]> linkedAtoms = StructureUtil.findAtomLinkages(group1, group2, false, bondLengthTolerance); if (!linkedAtoms.isEmpty()) { for (Atom[] atoms : linkedAtoms) { mc.addAtomLinkage( StructureUtil.getStructureAtomLinkage(atoms[0], false, atoms[1], false)); } identifiedGroups.add(group1); break; } } } } start = n; n = identifiedGroups.size(); } }
/** * Assembly the matched linkages. * * @param matchedAtomsOfLinkages * @param mod * @param condition * @param ret ModifiedCompound will be stored here. */ private void assembleLinkages( List<List<Atom[]>> matchedAtomsOfLinkages, ProteinModification mod, List<ModifiedCompound> ret) { ModificationCondition condition = mod.getCondition(); List<ModificationLinkage> modLinks = condition.getLinkages(); int nLink = matchedAtomsOfLinkages.size(); int[] indices = new int[nLink]; Set<ModifiedCompound> identifiedCompounds = new HashSet<ModifiedCompound>(); while (indices[0] < matchedAtomsOfLinkages.get(0).size()) { List<Atom[]> atomLinkages = new ArrayList<Atom[]>(nLink); for (int iLink = 0; iLink < nLink; iLink++) { Atom[] atoms = matchedAtomsOfLinkages.get(iLink).get(indices[iLink]); atomLinkages.add(atoms); } if (matchLinkages(modLinks, atomLinkages)) { // matched int n = atomLinkages.size(); List<StructureAtomLinkage> linkages = new ArrayList<StructureAtomLinkage>(n); for (int i = 0; i < n; i++) { Atom[] linkage = atomLinkages.get(i); StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage( linkage[0], residues.contains(linkage[0].getGroup()), linkage[1], residues.contains(linkage[1].getGroup())); linkages.add(link); } ModifiedCompound mc = new ModifiedCompoundImpl(mod, linkages); if (!identifiedCompounds.contains(mc)) { ret.add(mc); identifiedCompounds.add(mc); } } // indices++ (e.g. [0,0,1]=>[0,0,2]=>[1,2,0]) int i = nLink - 1; while (i >= 0) { if (i == 0 || indices[i] < matchedAtomsOfLinkages.get(i).size() - 1) { indices[i]++; break; } else { indices[i] = 0; i--; } } } }
private void processCrosslink1( Map<Component, Set<Group>> mapCompGroups, List<ModifiedCompound> modComps, ProteinModification mod, List<Component> components) { // modified residue // TODO: is this the correct logic for CROSS_LINK_1? Set<Group> modifiedResidues = mapCompGroups.get(components.get(0)); if (modifiedResidues != null) { for (Group residue : modifiedResidues) { StructureGroup strucGroup = StructureUtil.getStructureGroup(residue, true); ModifiedCompound modRes = new ModifiedCompoundImpl(mod, strucGroup); modComps.add(modRes); } } }
/** * Utility method to group child nodes by their names. * * @param parent parent node. * @return Map from name to child nodes. */ private static Map<String, List<Node>> getChildNodes(Node parent) { if (parent == null) return Collections.emptyMap(); Map<String, List<Node>> children = new HashMap<String, List<Node>>(); NodeList nodes = parent.getChildNodes(); int nNodes = nodes.getLength(); for (int i = 0; i < nNodes; i++) { Node node = nodes.item(i); if (node.getNodeType() != Node.ELEMENT_NODE) continue; String name = node.getNodeName(); List<Node> namesakes = children.get(name); if (namesakes == null) { namesakes = new ArrayList<Node>(); children.put(name, namesakes); } namesakes.add(node); } return children; }
/** * Read protein modifications from XML file and register them. * * @param isXml {@link InputStream} of the XML file. * @throws IOException if failed to read the XML file. * @throws ParserConfigurationException if parse errors occur. * @throws SAXException the {@link DocumentBuilder} cannot be created. */ public static void registerProteinModificationFromXml(InputStream isXml) throws IOException, ParserConfigurationException, SAXException { if (isXml == null) { throw new IllegalArgumentException("Null argument."); } DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(isXml); NodeList modNodes = doc.getElementsByTagName("Entry"); int modSize = modNodes.getLength(); List<Node> nodes; for (int iMod = 0; iMod < modSize; iMod++) { Node modNode = modNodes.item(iMod); Map<String, List<Node>> infoNodes = getChildNodes(modNode); // ID nodes = infoNodes.get("Id"); if (nodes == null || nodes.size() != 1) { throw new RuntimeException("Each modification must have exact " + "one <Id> field."); } String id = nodes.get(0).getTextContent(); // modification category nodes = infoNodes.get("Category"); if (nodes == null || nodes.size() != 1) { throw new RuntimeException( "Each modification must have exact " + "one <Category> field. See Modification " + id + "."); } ModificationCategory cat = ModificationCategory.getByLabel(nodes.get(0).getTextContent()); if (cat == null) { throw new RuntimeException( nodes.get(0).getTextContent() + " is not defined as an modification category." + " See Modification " + id + "."); } // occurrence type nodes = infoNodes.get("Occurrence"); if (nodes == null || nodes.size() != 1) { throw new RuntimeException( "Each modification must have exact " + "one <Occurrence> field. See Modification " + id + "."); } ModificationOccurrenceType occType = ModificationOccurrenceType.getByLabel(nodes.get(0).getTextContent()); if (occType == null) { throw new RuntimeException( nodes.get(0).getTextContent() + " is not defined as an modification occurence type." + " See Modification " + id + "."); } // condition ModificationCondition condition = null; { nodes = infoNodes.get("Condition"); if (nodes == null || nodes.size() != 1) { throw new RuntimeException( "Each modification must have exact " + "one <Condition> field. See Modification " + id + "."); } Node compsNode = nodes.get(0); // keep track of the labels of component indices Map<String, Integer> mapLabelComp = new HashMap<String, Integer>(); Map<String, List<Node>> compInfoNodes = getChildNodes(compsNode); // components List<Node> compNodes = compInfoNodes.get("Component"); int sizeComp = compNodes.size(); List<Component> comps = new ArrayList<Component>(sizeComp); for (int iComp = 0; iComp < sizeComp; iComp++) { Node compNode = compNodes.get(iComp); // comp label NamedNodeMap compNodeAttrs = compNode.getAttributes(); Node labelNode = compNodeAttrs.getNamedItem("component"); if (labelNode == null) { throw new RuntimeException( "Each component must have a label." + " See Modification " + id + "."); } String label = labelNode.getTextContent(); if (mapLabelComp.containsKey(label)) { throw new RuntimeException( "Each component must have a unique label." + " See Modification " + id + "."); } // comp PDBCC ID Set<String> compIds = new HashSet<String>(); List<Node> compIdNodes = getChildNodes(compNode).get("Id"); if (compIdNodes != null) { for (Node compIdNode : compIdNodes) { NamedNodeMap compIdNodeAttr = compIdNode.getAttributes(); Node compIdSource = compIdNodeAttr.getNamedItem("source"); if (compIdSource != null && compIdSource.getTextContent().equals("PDBCC")) { String strComps = compIdNode.getTextContent(); if (strComps.isEmpty()) { throw new RuntimeException("Empty component." + " See Modification " + id + "."); } compIds.addAll(Arrays.asList(strComps.split(","))); } } } if (compIds.isEmpty()) { throw new RuntimeException( "Each component must have a PDBCC ID." + " See Modification " + id + "."); } // terminal boolean nTerminal = false; boolean cTerminal = false; List<Node> compTermNode = getChildNodes(compNode).get("Terminal"); if (compTermNode != null) { if (compTermNode.size() != 1) { throw new RuntimeException( "Only one <Terminal> condition is allowed for " + "each component. See Modification " + id + "."); } String nc = compTermNode.get(0).getTextContent(); if (nc.equals("N")) { nTerminal = true; } else if (nc.equals("C")) { cTerminal = true; } else { throw new RuntimeException( "Only N or C is allowed for <Terminal>." + " See Modification " + id + "."); } } // register Component comp = Component.of(compIds, nTerminal, cTerminal); comps.add(comp); mapLabelComp.put(label, comps.size() - 1); } // bonds List<Node> bondNodes = compInfoNodes.get("Bond"); List<ModificationLinkage> linkages = null; if (bondNodes != null) { int sizeBonds = bondNodes.size(); linkages = new ArrayList<ModificationLinkage>(sizeBonds); for (int iBond = 0; iBond < sizeBonds; iBond++) { Node bondNode = bondNodes.get(iBond); Map<String, List<Node>> bondChildNodes = getChildNodes(bondNode); if (bondChildNodes == null) { throw new RuntimeException( "Each bond must contain two atoms" + " See Modification " + id + "."); } List<Node> atomNodes = bondChildNodes.get("Atom"); if (atomNodes == null || atomNodes.size() != 2) { throw new RuntimeException( "Each bond must contain two atoms" + " See Modification " + id + "."); } // atom 1 NamedNodeMap atomNodeAttrs = atomNodes.get(0).getAttributes(); Node compNode = atomNodeAttrs.getNamedItem("component"); if (compNode == null) { throw new RuntimeException( "Each atom must on a component." + " See Modification " + id + "."); } String labelComp1 = compNode.getTextContent(); int iComp1 = mapLabelComp.get(labelComp1); Node labelNode = atomNodeAttrs.getNamedItem("atom"); String labelAtom1 = labelNode == null ? null : labelNode.getTextContent(); String atom1 = atomNodes.get(0).getTextContent(); if (atom1.isEmpty()) { throw new RuntimeException( "Each atom must have a name. Please use wildcard * if unknown." + " See Modification " + id + "."); } List<String> potentialAtoms1 = Arrays.asList(atom1.split(",")); // atom 2 atomNodeAttrs = atomNodes.get(1).getAttributes(); compNode = atomNodeAttrs.getNamedItem("component"); if (compNode == null) { throw new RuntimeException( "Each atom must on a component." + " See Modification " + id + "."); } String labelComp2 = compNode.getTextContent(); int iComp2 = mapLabelComp.get(labelComp2); labelNode = atomNodeAttrs.getNamedItem("atom"); String labelAtom2 = labelNode == null ? null : labelNode.getTextContent(); String atom2 = atomNodes.get(1).getTextContent(); if (atom2.isEmpty()) { throw new RuntimeException( "Each atom must have a name. Please use wildcard * if unknown." + " See Modification " + id + "."); } List<String> potentialAtoms2 = Arrays.asList(atom2.split(",")); // add linkage ModificationLinkage linkage = new ModificationLinkage( comps, iComp1, potentialAtoms1, labelAtom1, iComp2, potentialAtoms2, labelAtom2); linkages.add(linkage); } } condition = new ModificationConditionImpl(comps, linkages); } // end of condition ProteinModificationImpl.Builder modBuilder = new ProteinModificationImpl.Builder(id, cat, occType, condition); // description nodes = infoNodes.get("Description"); if (nodes != null && !nodes.isEmpty()) { modBuilder.setDescription(nodes.get(0).getTextContent()); } // cross references nodes = infoNodes.get("CrossReference"); if (nodes != null) { for (Node node : nodes) { Map<String, List<Node>> xrefInfoNodes = getChildNodes(node); // source List<Node> xrefNode = xrefInfoNodes.get("Source"); if (xrefNode == null || xrefNode.size() != 1) { throw new RuntimeException( "Error in XML file: " + "a cross reference must contain exactly one <Source> field." + " See Modification " + id + "."); } String xrefDb = xrefNode.get(0).getTextContent(); // id xrefNode = xrefInfoNodes.get("Id"); if (xrefNode == null || xrefNode.size() != 1) { throw new RuntimeException( "Error in XML file: " + "a cross reference must contain exactly one <Id> field." + " See Modification " + id + "."); } String xrefId = xrefNode.get(0).getTextContent(); // name String xrefName = null; xrefNode = xrefInfoNodes.get("Name"); if (xrefNode != null && !xrefNode.isEmpty()) { xrefName = xrefNode.get(0).getTextContent(); } if (xrefDb.equals("PDBCC")) { modBuilder.setPdbccId(xrefId).setPdbccName(xrefName); } else if (xrefDb.equals("RESID")) { modBuilder.setResidId(xrefId).setResidName(xrefName); } else if (xrefDb.equals("PSI-MOD")) { modBuilder.setPsimodId(xrefId).setPsimodName(xrefName); } } } // end of cross references // formula nodes = infoNodes.get("Formula"); if (nodes != null && !nodes.isEmpty()) { modBuilder.setFormula(nodes.get(0).getTextContent()); } // keywords nodes = infoNodes.get("Keyword"); if (nodes != null && !nodes.isEmpty()) { for (Node node : nodes) { modBuilder.addKeyword(node.getTextContent()); } } ProteinModificationRegistry.register(modBuilder.build()); } }
/** Get matched atoms for all linkages. */ private List<List<Atom[]>> getMatchedAtomsOfLinkages( ModificationCondition condition, Map<Component, Set<Group>> mapCompGroups) { List<ModificationLinkage> linkages = condition.getLinkages(); int nLink = linkages.size(); List<List<Atom[]>> matchedAtomsOfLinkages = new ArrayList<List<Atom[]>>(nLink); for (int iLink = 0; iLink < nLink; iLink++) { ModificationLinkage linkage = linkages.get(iLink); Component comp1 = linkage.getComponent1(); Component comp2 = linkage.getComponent2(); // boolean isAA1 = comp1.; // boolean isAA2 = comp2.getType()==true; Set<Group> groups1 = mapCompGroups.get(comp1); Set<Group> groups2 = mapCompGroups.get(comp2); List<Atom[]> list = new ArrayList<Atom[]>(); List<String> potentialNamesOfAtomOnGroup1 = linkage.getPDBNameOfPotentialAtomsOnComponent1(); for (String name : potentialNamesOfAtomOnGroup1) { if (name.equals("*")) { // wildcard potentialNamesOfAtomOnGroup1 = null; // search all atoms break; } } List<String> potentialNamesOfAtomOnGroup2 = linkage.getPDBNameOfPotentialAtomsOnComponent2(); for (String name : potentialNamesOfAtomOnGroup2) { if (name.equals("*")) { // wildcard potentialNamesOfAtomOnGroup2 = null; // search all atoms break; } } for (Group g1 : groups1) { for (Group g2 : groups2) { if (g1.equals(g2)) { continue; } // only for wildcard match of two residues boolean ignoreNCLinkage = potentialNamesOfAtomOnGroup1 == null && potentialNamesOfAtomOnGroup2 == null && residues.contains(g1) && residues.contains(g2); Atom[] atoms = StructureUtil.findNearestAtomLinkage( g1, g2, potentialNamesOfAtomOnGroup1, potentialNamesOfAtomOnGroup2, ignoreNCLinkage, bondLengthTolerance); if (atoms != null) { list.add(atoms); } } } if (list.isEmpty()) { // broken linkage break; } matchedAtomsOfLinkages.add(list); } return matchedAtomsOfLinkages; }