   * identify additional groups that are not directly attached to amino acids.
   * @param mc {@link ModifiedCompound}.
   * @param chain a {@link Chain}.
   * @return a list of added groups.
  private void identifyAdditionalAttachments(
      ModifiedCompound mc, List<Group> ligands, Map<String, Chain> mapChainIdChain) {
    if (ligands.isEmpty()) {

    // TODO: should the additional groups only be allowed to the identified
    // ligands or both amino acids and ligands? Currently only on ligands
    // ligands to amino acid bonds for same modification of unknown category
    // will be combined in mergeModComps()
    // TODO: how about chain-chain links?
    List<Group> identifiedGroups = new ArrayList<Group>();
    for (StructureGroup num : mc.getGroups(false)) {
      Group group;
      try {
        // String numIns = "" + num.getResidueNumber();
        // if (num.getInsCode() != null) {
        //	numIns += num.getInsCode();
        // }
        ResidueNumber resNum = new ResidueNumber();
        // group = chain.getGroupByPDB(numIns);
        group = mapChainIdChain.get(num.getChainId()).getGroupByPDB(resNum);
      } catch (StructureException e) {
        logger.error("Exception: ", e);
        // should not happen

    int start = 0;

    int n = identifiedGroups.size();
    while (n > start) {
      for (Group group1 : ligands) {
        for (int i = start; i < n; i++) {
          Group group2 = identifiedGroups.get(i);
          if (!identifiedGroups.contains(group1)) {
            List<Atom[]> linkedAtoms =
                StructureUtil.findAtomLinkages(group1, group2, false, bondLengthTolerance);
            if (!linkedAtoms.isEmpty()) {
              for (Atom[] atoms : linkedAtoms) {
                    StructureUtil.getStructureAtomLinkage(atoms[0], false, atoms[1], false));

      start = n;
      n = identifiedGroups.size();
   * Assembly the matched linkages.
   * @param matchedAtomsOfLinkages
   * @param mod
   * @param condition
   * @param ret ModifiedCompound will be stored here.
  private void assembleLinkages(
      List<List<Atom[]>> matchedAtomsOfLinkages,
      ProteinModification mod,
      List<ModifiedCompound> ret) {
    ModificationCondition condition = mod.getCondition();
    List<ModificationLinkage> modLinks = condition.getLinkages();

    int nLink = matchedAtomsOfLinkages.size();
    int[] indices = new int[nLink];
    Set<ModifiedCompound> identifiedCompounds = new HashSet<ModifiedCompound>();
    while (indices[0] < matchedAtomsOfLinkages.get(0).size()) {
      List<Atom[]> atomLinkages = new ArrayList<Atom[]>(nLink);
      for (int iLink = 0; iLink < nLink; iLink++) {
        Atom[] atoms = matchedAtomsOfLinkages.get(iLink).get(indices[iLink]);
      if (matchLinkages(modLinks, atomLinkages)) {
        // matched

        int n = atomLinkages.size();
        List<StructureAtomLinkage> linkages = new ArrayList<StructureAtomLinkage>(n);
        for (int i = 0; i < n; i++) {
          Atom[] linkage = atomLinkages.get(i);
          StructureAtomLinkage link =
                  linkage[0], residues.contains(linkage[0].getGroup()),
                  linkage[1], residues.contains(linkage[1].getGroup()));

        ModifiedCompound mc = new ModifiedCompoundImpl(mod, linkages);
        if (!identifiedCompounds.contains(mc)) {

      // indices++ (e.g. [0,0,1]=>[0,0,2]=>[1,2,0])
      int i = nLink - 1;
      while (i >= 0) {
        if (i == 0 || indices[i] < matchedAtomsOfLinkages.get(i).size() - 1) {
        } else {
          indices[i] = 0;
 private void processCrosslink1(
     Map<Component, Set<Group>> mapCompGroups,
     List<ModifiedCompound> modComps,
     ProteinModification mod,
     List<Component> components) {
   // modified residue
   // TODO: is this the correct logic for CROSS_LINK_1?
   Set<Group> modifiedResidues = mapCompGroups.get(components.get(0));
   if (modifiedResidues != null) {
     for (Group residue : modifiedResidues) {
       StructureGroup strucGroup = StructureUtil.getStructureGroup(residue, true);
       ModifiedCompound modRes = new ModifiedCompoundImpl(mod, strucGroup);
   * Utility method to group child nodes by their names.
   * @param parent parent node.
   * @return Map from name to child nodes.
  private static Map<String, List<Node>> getChildNodes(Node parent) {
    if (parent == null) return Collections.emptyMap();

    Map<String, List<Node>> children = new HashMap<String, List<Node>>();

    NodeList nodes = parent.getChildNodes();
    int nNodes = nodes.getLength();
    for (int i = 0; i < nNodes; i++) {
      Node node = nodes.item(i);
      if (node.getNodeType() != Node.ELEMENT_NODE) continue;

      String name = node.getNodeName();
      List<Node> namesakes = children.get(name);
      if (namesakes == null) {
        namesakes = new ArrayList<Node>();
        children.put(name, namesakes);

    return children;
   * Read protein modifications from XML file and register them.
   * @param isXml {@link InputStream} of the XML file.
   * @throws IOException if failed to read the XML file.
   * @throws ParserConfigurationException if parse errors occur.
   * @throws SAXException the {@link DocumentBuilder} cannot be created.
  public static void registerProteinModificationFromXml(InputStream isXml)
      throws IOException, ParserConfigurationException, SAXException {
    if (isXml == null) {
      throw new IllegalArgumentException("Null argument.");

    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    DocumentBuilder builder = factory.newDocumentBuilder();
    Document doc = builder.parse(isXml);

    NodeList modNodes = doc.getElementsByTagName("Entry");
    int modSize = modNodes.getLength();
    List<Node> nodes;
    for (int iMod = 0; iMod < modSize; iMod++) {
      Node modNode = modNodes.item(iMod);
      Map<String, List<Node>> infoNodes = getChildNodes(modNode);

      // ID
      nodes = infoNodes.get("Id");
      if (nodes == null || nodes.size() != 1) {
        throw new RuntimeException("Each modification must have exact " + "one <Id> field.");
      String id = nodes.get(0).getTextContent();

      // modification category
      nodes = infoNodes.get("Category");
      if (nodes == null || nodes.size() != 1) {
        throw new RuntimeException(
            "Each modification must have exact "
                + "one <Category> field. See Modification "
                + id
                + ".");
      ModificationCategory cat = ModificationCategory.getByLabel(nodes.get(0).getTextContent());
      if (cat == null) {
        throw new RuntimeException(
                + " is not defined as an modification category."
                + " See Modification "
                + id
                + ".");

      // occurrence type
      nodes = infoNodes.get("Occurrence");
      if (nodes == null || nodes.size() != 1) {
        throw new RuntimeException(
            "Each modification must have exact "
                + "one <Occurrence> field. See Modification "
                + id
                + ".");
      ModificationOccurrenceType occType =
      if (occType == null) {
        throw new RuntimeException(
                + " is not defined as an modification occurence type."
                + " See Modification "
                + id
                + ".");

      // condition
      ModificationCondition condition = null;
        nodes = infoNodes.get("Condition");
        if (nodes == null || nodes.size() != 1) {
          throw new RuntimeException(
              "Each modification must have exact "
                  + "one <Condition> field. See Modification "
                  + id
                  + ".");

        Node compsNode = nodes.get(0);

        // keep track of the labels of component indices
        Map<String, Integer> mapLabelComp = new HashMap<String, Integer>();

        Map<String, List<Node>> compInfoNodes = getChildNodes(compsNode);

        // components
        List<Node> compNodes = compInfoNodes.get("Component");
        int sizeComp = compNodes.size();
        List<Component> comps = new ArrayList<Component>(sizeComp);
        for (int iComp = 0; iComp < sizeComp; iComp++) {
          Node compNode = compNodes.get(iComp);
          // comp label
          NamedNodeMap compNodeAttrs = compNode.getAttributes();
          Node labelNode = compNodeAttrs.getNamedItem("component");
          if (labelNode == null) {
            throw new RuntimeException(
                "Each component must have a label." + " See Modification " + id + ".");
          String label = labelNode.getTextContent();

          if (mapLabelComp.containsKey(label)) {
            throw new RuntimeException(
                "Each component must have a unique label." + " See Modification " + id + ".");

          // comp PDBCC ID
          Set<String> compIds = new HashSet<String>();
          List<Node> compIdNodes = getChildNodes(compNode).get("Id");
          if (compIdNodes != null) {
            for (Node compIdNode : compIdNodes) {
              NamedNodeMap compIdNodeAttr = compIdNode.getAttributes();
              Node compIdSource = compIdNodeAttr.getNamedItem("source");
              if (compIdSource != null && compIdSource.getTextContent().equals("PDBCC")) {
                String strComps = compIdNode.getTextContent();
                if (strComps.isEmpty()) {
                  throw new RuntimeException("Empty component." + " See Modification " + id + ".");

          if (compIds.isEmpty()) {
            throw new RuntimeException(
                "Each component must have a PDBCC ID." + " See Modification " + id + ".");

          // terminal
          boolean nTerminal = false;
          boolean cTerminal = false;
          List<Node> compTermNode = getChildNodes(compNode).get("Terminal");
          if (compTermNode != null) {
            if (compTermNode.size() != 1) {
              throw new RuntimeException(
                  "Only one <Terminal> condition is allowed for "
                      + "each component. See Modification "
                      + id
                      + ".");
            String nc = compTermNode.get(0).getTextContent();
            if (nc.equals("N")) {
              nTerminal = true;
            } else if (nc.equals("C")) {
              cTerminal = true;
            } else {
              throw new RuntimeException(
                  "Only N or C is allowed for <Terminal>." + " See Modification " + id + ".");

          // register
          Component comp = Component.of(compIds, nTerminal, cTerminal);
          mapLabelComp.put(label, comps.size() - 1);

        // bonds
        List<Node> bondNodes = compInfoNodes.get("Bond");
        List<ModificationLinkage> linkages = null;
        if (bondNodes != null) {
          int sizeBonds = bondNodes.size();
          linkages = new ArrayList<ModificationLinkage>(sizeBonds);
          for (int iBond = 0; iBond < sizeBonds; iBond++) {
            Node bondNode = bondNodes.get(iBond);
            Map<String, List<Node>> bondChildNodes = getChildNodes(bondNode);
            if (bondChildNodes == null) {
              throw new RuntimeException(
                  "Each bond must contain two atoms" + " See Modification " + id + ".");

            List<Node> atomNodes = bondChildNodes.get("Atom");
            if (atomNodes == null || atomNodes.size() != 2) {
              throw new RuntimeException(
                  "Each bond must contain two atoms" + " See Modification " + id + ".");

            // atom 1
            NamedNodeMap atomNodeAttrs = atomNodes.get(0).getAttributes();
            Node compNode = atomNodeAttrs.getNamedItem("component");
            if (compNode == null) {
              throw new RuntimeException(
                  "Each atom must on a component." + " See Modification " + id + ".");
            String labelComp1 = compNode.getTextContent();
            int iComp1 = mapLabelComp.get(labelComp1);

            Node labelNode = atomNodeAttrs.getNamedItem("atom");
            String labelAtom1 = labelNode == null ? null : labelNode.getTextContent();

            String atom1 = atomNodes.get(0).getTextContent();
            if (atom1.isEmpty()) {
              throw new RuntimeException(
                  "Each atom must have a name. Please use wildcard * if unknown."
                      + " See Modification "
                      + id
                      + ".");
            List<String> potentialAtoms1 = Arrays.asList(atom1.split(","));

            // atom 2
            atomNodeAttrs = atomNodes.get(1).getAttributes();
            compNode = atomNodeAttrs.getNamedItem("component");
            if (compNode == null) {
              throw new RuntimeException(
                  "Each atom must on a component." + " See Modification " + id + ".");
            String labelComp2 = compNode.getTextContent();
            int iComp2 = mapLabelComp.get(labelComp2);

            labelNode = atomNodeAttrs.getNamedItem("atom");
            String labelAtom2 = labelNode == null ? null : labelNode.getTextContent();

            String atom2 = atomNodes.get(1).getTextContent();
            if (atom2.isEmpty()) {
              throw new RuntimeException(
                  "Each atom must have a name. Please use wildcard * if unknown."
                      + " See Modification "
                      + id
                      + ".");
            List<String> potentialAtoms2 = Arrays.asList(atom2.split(","));

            // add linkage
            ModificationLinkage linkage =
                new ModificationLinkage(

        condition = new ModificationConditionImpl(comps, linkages);
      } // end of condition

      ProteinModificationImpl.Builder modBuilder =
          new ProteinModificationImpl.Builder(id, cat, occType, condition);

      // description
      nodes = infoNodes.get("Description");
      if (nodes != null && !nodes.isEmpty()) {

      // cross references
      nodes = infoNodes.get("CrossReference");
      if (nodes != null) {
        for (Node node : nodes) {
          Map<String, List<Node>> xrefInfoNodes = getChildNodes(node);

          // source
          List<Node> xrefNode = xrefInfoNodes.get("Source");
          if (xrefNode == null || xrefNode.size() != 1) {
            throw new RuntimeException(
                "Error in XML file: "
                    + "a cross reference must contain exactly one <Source> field."
                    + " See Modification "
                    + id
                    + ".");
          String xrefDb = xrefNode.get(0).getTextContent();

          // id
          xrefNode = xrefInfoNodes.get("Id");
          if (xrefNode == null || xrefNode.size() != 1) {
            throw new RuntimeException(
                "Error in XML file: "
                    + "a cross reference must contain exactly one <Id> field."
                    + " See Modification "
                    + id
                    + ".");
          String xrefId = xrefNode.get(0).getTextContent();

          // name
          String xrefName = null;
          xrefNode = xrefInfoNodes.get("Name");
          if (xrefNode != null && !xrefNode.isEmpty()) {
            xrefName = xrefNode.get(0).getTextContent();

          if (xrefDb.equals("PDBCC")) {
          } else if (xrefDb.equals("RESID")) {
          } else if (xrefDb.equals("PSI-MOD")) {
      } // end of cross references

      // formula
      nodes = infoNodes.get("Formula");
      if (nodes != null && !nodes.isEmpty()) {

      // keywords
      nodes = infoNodes.get("Keyword");
      if (nodes != null && !nodes.isEmpty()) {
        for (Node node : nodes) {

  /** Get matched atoms for all linkages. */
  private List<List<Atom[]>> getMatchedAtomsOfLinkages(
      ModificationCondition condition, Map<Component, Set<Group>> mapCompGroups) {
    List<ModificationLinkage> linkages = condition.getLinkages();
    int nLink = linkages.size();

    List<List<Atom[]>> matchedAtomsOfLinkages = new ArrayList<List<Atom[]>>(nLink);

    for (int iLink = 0; iLink < nLink; iLink++) {
      ModificationLinkage linkage = linkages.get(iLink);
      Component comp1 = linkage.getComponent1();
      Component comp2 = linkage.getComponent2();

      //			boolean isAA1 = comp1.;
      //			boolean isAA2 = comp2.getType()==true;

      Set<Group> groups1 = mapCompGroups.get(comp1);
      Set<Group> groups2 = mapCompGroups.get(comp2);

      List<Atom[]> list = new ArrayList<Atom[]>();

      List<String> potentialNamesOfAtomOnGroup1 = linkage.getPDBNameOfPotentialAtomsOnComponent1();
      for (String name : potentialNamesOfAtomOnGroup1) {
        if (name.equals("*")) {
          // wildcard
          potentialNamesOfAtomOnGroup1 = null; // search all atoms

      List<String> potentialNamesOfAtomOnGroup2 = linkage.getPDBNameOfPotentialAtomsOnComponent2();
      for (String name : potentialNamesOfAtomOnGroup2) {
        if (name.equals("*")) {
          // wildcard
          potentialNamesOfAtomOnGroup2 = null; // search all atoms

      for (Group g1 : groups1) {
        for (Group g2 : groups2) {
          if (g1.equals(g2)) {

          // only for wildcard match of two residues
          boolean ignoreNCLinkage =
              potentialNamesOfAtomOnGroup1 == null
                  && potentialNamesOfAtomOnGroup2 == null
                  && residues.contains(g1)
                  && residues.contains(g2);

          Atom[] atoms =
          if (atoms != null) {

      if (list.isEmpty()) {
        // broken linkage


    return matchedAtomsOfLinkages;