/** Merge identified modified compounds if linked. */ private void mergeModComps(List<ModifiedCompound> modComps) { TreeSet<Integer> remove = new TreeSet<Integer>(); int n = modComps.size(); for (int icurr = 1; icurr < n; icurr++) { ModifiedCompound curr = modComps.get(icurr); String id = curr.getModification().getId(); if (ProteinModificationRegistry.getById(id).getCategory() != ModificationCategory.UNDEFINED) continue; // find linked compounds that before curr // List<Integer> merging = new ArrayList<Integer>(); int ipre = 0; for (; ipre < icurr; ipre++) { if (remove.contains(ipre)) continue; ModifiedCompound pre = modComps.get(ipre); if (!Collections.disjoint(pre.getGroups(false), curr.getGroups(false))) { break; } } if (ipre < icurr) { ModifiedCompound mcKeep = modComps.get(ipre); // merge modifications of the same type if (mcKeep.getModification().getId().equals(id)) { // merging the current one to the previous one mcKeep.addAtomLinkages(curr.getAtomLinkages()); remove.add(icurr); } } } Iterator<Integer> it = remove.descendingIterator(); while (it.hasNext()) { modComps.remove(it.next().intValue()); } }
/** * Read protein modifications from XML file and register them. * * @param isXml {@link InputStream} of the XML file. * @throws IOException if failed to read the XML file. * @throws ParserConfigurationException if parse errors occur. * @throws SAXException the {@link DocumentBuilder} cannot be created. */ public static void registerProteinModificationFromXml(InputStream isXml) throws IOException, ParserConfigurationException, SAXException { if (isXml == null) { throw new IllegalArgumentException("Null argument."); } DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(isXml); NodeList modNodes = doc.getElementsByTagName("Entry"); int modSize = modNodes.getLength(); List<Node> nodes; for (int iMod = 0; iMod < modSize; iMod++) { Node modNode = modNodes.item(iMod); Map<String, List<Node>> infoNodes = getChildNodes(modNode); // ID nodes = infoNodes.get("Id"); if (nodes == null || nodes.size() != 1) { throw new RuntimeException("Each modification must have exact " + "one <Id> field."); } String id = nodes.get(0).getTextContent(); // modification category nodes = infoNodes.get("Category"); if (nodes == null || nodes.size() != 1) { throw new RuntimeException( "Each modification must have exact " + "one <Category> field. See Modification " + id + "."); } ModificationCategory cat = ModificationCategory.getByLabel(nodes.get(0).getTextContent()); if (cat == null) { throw new RuntimeException( nodes.get(0).getTextContent() + " is not defined as an modification category." + " See Modification " + id + "."); } // occurrence type nodes = infoNodes.get("Occurrence"); if (nodes == null || nodes.size() != 1) { throw new RuntimeException( "Each modification must have exact " + "one <Occurrence> field. See Modification " + id + "."); } ModificationOccurrenceType occType = ModificationOccurrenceType.getByLabel(nodes.get(0).getTextContent()); if (occType == null) { throw new RuntimeException( nodes.get(0).getTextContent() + " is not defined as an modification occurence type." + " See Modification " + id + "."); } // condition ModificationCondition condition = null; { nodes = infoNodes.get("Condition"); if (nodes == null || nodes.size() != 1) { throw new RuntimeException( "Each modification must have exact " + "one <Condition> field. See Modification " + id + "."); } Node compsNode = nodes.get(0); // keep track of the labels of component indices Map<String, Integer> mapLabelComp = new HashMap<String, Integer>(); Map<String, List<Node>> compInfoNodes = getChildNodes(compsNode); // components List<Node> compNodes = compInfoNodes.get("Component"); int sizeComp = compNodes.size(); List<Component> comps = new ArrayList<Component>(sizeComp); for (int iComp = 0; iComp < sizeComp; iComp++) { Node compNode = compNodes.get(iComp); // comp label NamedNodeMap compNodeAttrs = compNode.getAttributes(); Node labelNode = compNodeAttrs.getNamedItem("component"); if (labelNode == null) { throw new RuntimeException( "Each component must have a label." + " See Modification " + id + "."); } String label = labelNode.getTextContent(); if (mapLabelComp.containsKey(label)) { throw new RuntimeException( "Each component must have a unique label." + " See Modification " + id + "."); } // comp PDBCC ID Set<String> compIds = new HashSet<String>(); List<Node> compIdNodes = getChildNodes(compNode).get("Id"); if (compIdNodes != null) { for (Node compIdNode : compIdNodes) { NamedNodeMap compIdNodeAttr = compIdNode.getAttributes(); Node compIdSource = compIdNodeAttr.getNamedItem("source"); if (compIdSource != null && compIdSource.getTextContent().equals("PDBCC")) { String strComps = compIdNode.getTextContent(); if (strComps.isEmpty()) { throw new RuntimeException("Empty component." + " See Modification " + id + "."); } compIds.addAll(Arrays.asList(strComps.split(","))); } } } if (compIds.isEmpty()) { throw new RuntimeException( "Each component must have a PDBCC ID." + " See Modification " + id + "."); } // terminal boolean nTerminal = false; boolean cTerminal = false; List<Node> compTermNode = getChildNodes(compNode).get("Terminal"); if (compTermNode != null) { if (compTermNode.size() != 1) { throw new RuntimeException( "Only one <Terminal> condition is allowed for " + "each component. See Modification " + id + "."); } String nc = compTermNode.get(0).getTextContent(); if (nc.equals("N")) { nTerminal = true; } else if (nc.equals("C")) { cTerminal = true; } else { throw new RuntimeException( "Only N or C is allowed for <Terminal>." + " See Modification " + id + "."); } } // register Component comp = Component.of(compIds, nTerminal, cTerminal); comps.add(comp); mapLabelComp.put(label, comps.size() - 1); } // bonds List<Node> bondNodes = compInfoNodes.get("Bond"); List<ModificationLinkage> linkages = null; if (bondNodes != null) { int sizeBonds = bondNodes.size(); linkages = new ArrayList<ModificationLinkage>(sizeBonds); for (int iBond = 0; iBond < sizeBonds; iBond++) { Node bondNode = bondNodes.get(iBond); Map<String, List<Node>> bondChildNodes = getChildNodes(bondNode); if (bondChildNodes == null) { throw new RuntimeException( "Each bond must contain two atoms" + " See Modification " + id + "."); } List<Node> atomNodes = bondChildNodes.get("Atom"); if (atomNodes == null || atomNodes.size() != 2) { throw new RuntimeException( "Each bond must contain two atoms" + " See Modification " + id + "."); } // atom 1 NamedNodeMap atomNodeAttrs = atomNodes.get(0).getAttributes(); Node compNode = atomNodeAttrs.getNamedItem("component"); if (compNode == null) { throw new RuntimeException( "Each atom must on a component." + " See Modification " + id + "."); } String labelComp1 = compNode.getTextContent(); int iComp1 = mapLabelComp.get(labelComp1); Node labelNode = atomNodeAttrs.getNamedItem("atom"); String labelAtom1 = labelNode == null ? null : labelNode.getTextContent(); String atom1 = atomNodes.get(0).getTextContent(); if (atom1.isEmpty()) { throw new RuntimeException( "Each atom must have a name. Please use wildcard * if unknown." + " See Modification " + id + "."); } List<String> potentialAtoms1 = Arrays.asList(atom1.split(",")); // atom 2 atomNodeAttrs = atomNodes.get(1).getAttributes(); compNode = atomNodeAttrs.getNamedItem("component"); if (compNode == null) { throw new RuntimeException( "Each atom must on a component." + " See Modification " + id + "."); } String labelComp2 = compNode.getTextContent(); int iComp2 = mapLabelComp.get(labelComp2); labelNode = atomNodeAttrs.getNamedItem("atom"); String labelAtom2 = labelNode == null ? null : labelNode.getTextContent(); String atom2 = atomNodes.get(1).getTextContent(); if (atom2.isEmpty()) { throw new RuntimeException( "Each atom must have a name. Please use wildcard * if unknown." + " See Modification " + id + "."); } List<String> potentialAtoms2 = Arrays.asList(atom2.split(",")); // add linkage ModificationLinkage linkage = new ModificationLinkage( comps, iComp1, potentialAtoms1, labelAtom1, iComp2, potentialAtoms2, labelAtom2); linkages.add(linkage); } } condition = new ModificationConditionImpl(comps, linkages); } // end of condition ProteinModificationImpl.Builder modBuilder = new ProteinModificationImpl.Builder(id, cat, occType, condition); // description nodes = infoNodes.get("Description"); if (nodes != null && !nodes.isEmpty()) { modBuilder.setDescription(nodes.get(0).getTextContent()); } // cross references nodes = infoNodes.get("CrossReference"); if (nodes != null) { for (Node node : nodes) { Map<String, List<Node>> xrefInfoNodes = getChildNodes(node); // source List<Node> xrefNode = xrefInfoNodes.get("Source"); if (xrefNode == null || xrefNode.size() != 1) { throw new RuntimeException( "Error in XML file: " + "a cross reference must contain exactly one <Source> field." + " See Modification " + id + "."); } String xrefDb = xrefNode.get(0).getTextContent(); // id xrefNode = xrefInfoNodes.get("Id"); if (xrefNode == null || xrefNode.size() != 1) { throw new RuntimeException( "Error in XML file: " + "a cross reference must contain exactly one <Id> field." + " See Modification " + id + "."); } String xrefId = xrefNode.get(0).getTextContent(); // name String xrefName = null; xrefNode = xrefInfoNodes.get("Name"); if (xrefNode != null && !xrefNode.isEmpty()) { xrefName = xrefNode.get(0).getTextContent(); } if (xrefDb.equals("PDBCC")) { modBuilder.setPdbccId(xrefId).setPdbccName(xrefName); } else if (xrefDb.equals("RESID")) { modBuilder.setResidId(xrefId).setResidName(xrefName); } else if (xrefDb.equals("PSI-MOD")) { modBuilder.setPsimodId(xrefId).setPsimodName(xrefName); } } } // end of cross references // formula nodes = infoNodes.get("Formula"); if (nodes != null && !nodes.isEmpty()) { modBuilder.setFormula(nodes.get(0).getTextContent()); } // keywords nodes = infoNodes.get("Keyword"); if (nodes != null && !nodes.isEmpty()) { for (Node node : nodes) { modBuilder.addKeyword(node.getTextContent()); } } ProteinModificationRegistry.register(modBuilder.build()); } }
/** * Identify all registered modifications in chains. * * @param chains query {@link Chain}s. */ public void identify(final List<Chain> chains) { identify(chains, ProteinModificationRegistry.allModifications()); }
/** * Identify all registered modifications in a structure. * * @param structure */ public void identify(final Structure structure) { identify(structure, ProteinModificationRegistry.allModifications()); }