コード例 #1
0
  /**
   * Read protein modifications from XML file and register them.
   *
   * @param isXml {@link InputStream} of the XML file.
   * @throws IOException if failed to read the XML file.
   * @throws ParserConfigurationException if parse errors occur.
   * @throws SAXException the {@link DocumentBuilder} cannot be created.
   */
  public static void registerProteinModificationFromXml(InputStream isXml)
      throws IOException, ParserConfigurationException, SAXException {
    if (isXml == null) {
      throw new IllegalArgumentException("Null argument.");
    }

    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    DocumentBuilder builder = factory.newDocumentBuilder();
    Document doc = builder.parse(isXml);

    NodeList modNodes = doc.getElementsByTagName("Entry");
    int modSize = modNodes.getLength();
    List<Node> nodes;
    for (int iMod = 0; iMod < modSize; iMod++) {
      Node modNode = modNodes.item(iMod);
      Map<String, List<Node>> infoNodes = getChildNodes(modNode);

      // ID
      nodes = infoNodes.get("Id");
      if (nodes == null || nodes.size() != 1) {
        throw new RuntimeException("Each modification must have exact " + "one <Id> field.");
      }
      String id = nodes.get(0).getTextContent();

      // modification category
      nodes = infoNodes.get("Category");
      if (nodes == null || nodes.size() != 1) {
        throw new RuntimeException(
            "Each modification must have exact "
                + "one <Category> field. See Modification "
                + id
                + ".");
      }
      ModificationCategory cat = ModificationCategory.getByLabel(nodes.get(0).getTextContent());
      if (cat == null) {
        throw new RuntimeException(
            nodes.get(0).getTextContent()
                + " is not defined as an modification category."
                + " See Modification "
                + id
                + ".");
      }

      // occurrence type
      nodes = infoNodes.get("Occurrence");
      if (nodes == null || nodes.size() != 1) {
        throw new RuntimeException(
            "Each modification must have exact "
                + "one <Occurrence> field. See Modification "
                + id
                + ".");
      }
      ModificationOccurrenceType occType =
          ModificationOccurrenceType.getByLabel(nodes.get(0).getTextContent());
      if (occType == null) {
        throw new RuntimeException(
            nodes.get(0).getTextContent()
                + " is not defined as an modification occurence type."
                + " See Modification "
                + id
                + ".");
      }

      // condition
      ModificationCondition condition = null;
      {
        nodes = infoNodes.get("Condition");
        if (nodes == null || nodes.size() != 1) {
          throw new RuntimeException(
              "Each modification must have exact "
                  + "one <Condition> field. See Modification "
                  + id
                  + ".");
        }

        Node compsNode = nodes.get(0);

        // keep track of the labels of component indices
        Map<String, Integer> mapLabelComp = new HashMap<String, Integer>();

        Map<String, List<Node>> compInfoNodes = getChildNodes(compsNode);

        // components
        List<Node> compNodes = compInfoNodes.get("Component");
        int sizeComp = compNodes.size();
        List<Component> comps = new ArrayList<Component>(sizeComp);
        for (int iComp = 0; iComp < sizeComp; iComp++) {
          Node compNode = compNodes.get(iComp);
          // comp label
          NamedNodeMap compNodeAttrs = compNode.getAttributes();
          Node labelNode = compNodeAttrs.getNamedItem("component");
          if (labelNode == null) {
            throw new RuntimeException(
                "Each component must have a label." + " See Modification " + id + ".");
          }
          String label = labelNode.getTextContent();

          if (mapLabelComp.containsKey(label)) {
            throw new RuntimeException(
                "Each component must have a unique label." + " See Modification " + id + ".");
          }

          // comp PDBCC ID
          Set<String> compIds = new HashSet<String>();
          List<Node> compIdNodes = getChildNodes(compNode).get("Id");
          if (compIdNodes != null) {
            for (Node compIdNode : compIdNodes) {
              NamedNodeMap compIdNodeAttr = compIdNode.getAttributes();
              Node compIdSource = compIdNodeAttr.getNamedItem("source");
              if (compIdSource != null && compIdSource.getTextContent().equals("PDBCC")) {
                String strComps = compIdNode.getTextContent();
                if (strComps.isEmpty()) {
                  throw new RuntimeException("Empty component." + " See Modification " + id + ".");
                }
                compIds.addAll(Arrays.asList(strComps.split(",")));
              }
            }
          }

          if (compIds.isEmpty()) {
            throw new RuntimeException(
                "Each component must have a PDBCC ID." + " See Modification " + id + ".");
          }

          // terminal
          boolean nTerminal = false;
          boolean cTerminal = false;
          List<Node> compTermNode = getChildNodes(compNode).get("Terminal");
          if (compTermNode != null) {
            if (compTermNode.size() != 1) {
              throw new RuntimeException(
                  "Only one <Terminal> condition is allowed for "
                      + "each component. See Modification "
                      + id
                      + ".");
            }
            String nc = compTermNode.get(0).getTextContent();
            if (nc.equals("N")) {
              nTerminal = true;
            } else if (nc.equals("C")) {
              cTerminal = true;
            } else {
              throw new RuntimeException(
                  "Only N or C is allowed for <Terminal>." + " See Modification " + id + ".");
            }
          }

          // register
          Component comp = Component.of(compIds, nTerminal, cTerminal);
          comps.add(comp);
          mapLabelComp.put(label, comps.size() - 1);
        }

        // bonds
        List<Node> bondNodes = compInfoNodes.get("Bond");
        List<ModificationLinkage> linkages = null;
        if (bondNodes != null) {
          int sizeBonds = bondNodes.size();
          linkages = new ArrayList<ModificationLinkage>(sizeBonds);
          for (int iBond = 0; iBond < sizeBonds; iBond++) {
            Node bondNode = bondNodes.get(iBond);
            Map<String, List<Node>> bondChildNodes = getChildNodes(bondNode);
            if (bondChildNodes == null) {
              throw new RuntimeException(
                  "Each bond must contain two atoms" + " See Modification " + id + ".");
            }

            List<Node> atomNodes = bondChildNodes.get("Atom");
            if (atomNodes == null || atomNodes.size() != 2) {
              throw new RuntimeException(
                  "Each bond must contain two atoms" + " See Modification " + id + ".");
            }

            // atom 1
            NamedNodeMap atomNodeAttrs = atomNodes.get(0).getAttributes();
            Node compNode = atomNodeAttrs.getNamedItem("component");
            if (compNode == null) {
              throw new RuntimeException(
                  "Each atom must on a component." + " See Modification " + id + ".");
            }
            String labelComp1 = compNode.getTextContent();
            int iComp1 = mapLabelComp.get(labelComp1);

            Node labelNode = atomNodeAttrs.getNamedItem("atom");
            String labelAtom1 = labelNode == null ? null : labelNode.getTextContent();

            String atom1 = atomNodes.get(0).getTextContent();
            if (atom1.isEmpty()) {
              throw new RuntimeException(
                  "Each atom must have a name. Please use wildcard * if unknown."
                      + " See Modification "
                      + id
                      + ".");
            }
            List<String> potentialAtoms1 = Arrays.asList(atom1.split(","));

            // atom 2
            atomNodeAttrs = atomNodes.get(1).getAttributes();
            compNode = atomNodeAttrs.getNamedItem("component");
            if (compNode == null) {
              throw new RuntimeException(
                  "Each atom must on a component." + " See Modification " + id + ".");
            }
            String labelComp2 = compNode.getTextContent();
            int iComp2 = mapLabelComp.get(labelComp2);

            labelNode = atomNodeAttrs.getNamedItem("atom");
            String labelAtom2 = labelNode == null ? null : labelNode.getTextContent();

            String atom2 = atomNodes.get(1).getTextContent();
            if (atom2.isEmpty()) {
              throw new RuntimeException(
                  "Each atom must have a name. Please use wildcard * if unknown."
                      + " See Modification "
                      + id
                      + ".");
            }
            List<String> potentialAtoms2 = Arrays.asList(atom2.split(","));

            // add linkage
            ModificationLinkage linkage =
                new ModificationLinkage(
                    comps,
                    iComp1,
                    potentialAtoms1,
                    labelAtom1,
                    iComp2,
                    potentialAtoms2,
                    labelAtom2);
            linkages.add(linkage);
          }
        }

        condition = new ModificationConditionImpl(comps, linkages);
      } // end of condition

      ProteinModificationImpl.Builder modBuilder =
          new ProteinModificationImpl.Builder(id, cat, occType, condition);

      // description
      nodes = infoNodes.get("Description");
      if (nodes != null && !nodes.isEmpty()) {
        modBuilder.setDescription(nodes.get(0).getTextContent());
      }

      // cross references
      nodes = infoNodes.get("CrossReference");
      if (nodes != null) {
        for (Node node : nodes) {
          Map<String, List<Node>> xrefInfoNodes = getChildNodes(node);

          // source
          List<Node> xrefNode = xrefInfoNodes.get("Source");
          if (xrefNode == null || xrefNode.size() != 1) {
            throw new RuntimeException(
                "Error in XML file: "
                    + "a cross reference must contain exactly one <Source> field."
                    + " See Modification "
                    + id
                    + ".");
          }
          String xrefDb = xrefNode.get(0).getTextContent();

          // id
          xrefNode = xrefInfoNodes.get("Id");
          if (xrefNode == null || xrefNode.size() != 1) {
            throw new RuntimeException(
                "Error in XML file: "
                    + "a cross reference must contain exactly one <Id> field."
                    + " See Modification "
                    + id
                    + ".");
          }
          String xrefId = xrefNode.get(0).getTextContent();

          // name
          String xrefName = null;
          xrefNode = xrefInfoNodes.get("Name");
          if (xrefNode != null && !xrefNode.isEmpty()) {
            xrefName = xrefNode.get(0).getTextContent();
          }

          if (xrefDb.equals("PDBCC")) {
            modBuilder.setPdbccId(xrefId).setPdbccName(xrefName);
          } else if (xrefDb.equals("RESID")) {
            modBuilder.setResidId(xrefId).setResidName(xrefName);
          } else if (xrefDb.equals("PSI-MOD")) {
            modBuilder.setPsimodId(xrefId).setPsimodName(xrefName);
          }
        }
      } // end of cross references

      // formula
      nodes = infoNodes.get("Formula");
      if (nodes != null && !nodes.isEmpty()) {
        modBuilder.setFormula(nodes.get(0).getTextContent());
      }

      // keywords
      nodes = infoNodes.get("Keyword");
      if (nodes != null && !nodes.isEmpty()) {
        for (Node node : nodes) {
          modBuilder.addKeyword(node.getTextContent());
        }
      }

      ProteinModificationRegistry.register(modBuilder.build());
    }
  }
コード例 #2
0
  /**
   * @param modifications a set of {@link ProteinModification}s.
   * @param residues
   * @param ligands
   * @param saveTo save result to
   * @return map from component to list of corresponding residues in the chain.
   */
  private void addModificationGroups(
      final Set<ProteinModification> modifications,
      final List<Group> residues,
      final List<Group> ligands,
      final Map<Component, Set<Group>> saveTo) {
    if (residues == null || ligands == null || modifications == null) {
      throw new IllegalArgumentException("Null argument(s).");
    }

    Map<Component, Set<Component>> mapSingleMultiComps = new HashMap<Component, Set<Component>>();
    for (ProteinModification mod : modifications) {
      ModificationCondition condition = mod.getCondition();
      for (Component comp : condition.getComponents()) {
        for (String pdbccId : comp.getPdbccIds()) {
          Component single =
              Component.of(Collections.singleton(pdbccId), comp.isNTerminal(), comp.isCTerminal());
          Set<Component> mult = mapSingleMultiComps.get(single);
          if (mult == null) {
            mult = new HashSet<Component>();
            mapSingleMultiComps.put(single, mult);
          }
          mult.add(comp);
        }
      }
    }

    {
      // ligands
      Set<Component> ligandsWildCard = mapSingleMultiComps.get(Component.of("*"));
      for (Group group : ligands) {
        String pdbccId = group.getPDBName().trim();
        Set<Component> comps = mapSingleMultiComps.get(Component.of(pdbccId));

        for (Component comp : unionComponentSet(ligandsWildCard, comps)) {
          Set<Group> gs = saveTo.get(comp);
          if (gs == null) {
            gs = new LinkedHashSet<Group>();
            saveTo.put(comp, gs);
          }
          gs.add(group);
        }
      }
    }

    {
      // residues
      if (residues.isEmpty()) {
        return;
      }

      Set<Component> residuesWildCard = mapSingleMultiComps.get(Component.of("*"));

      // for all residues
      for (Group group : residues) {
        String pdbccId = group.getPDBName().trim();
        Set<Component> comps = mapSingleMultiComps.get(Component.of(pdbccId));

        for (Component comp : unionComponentSet(residuesWildCard, comps)) {
          Set<Group> gs = saveTo.get(comp);
          if (gs == null) {
            gs = new LinkedHashSet<Group>();
            saveTo.put(comp, gs);
          }
          gs.add(group);
        }
      }

      // for N-terminal
      int nRes = residues.size();
      int iRes = 0;
      Group res;
      do {
        // for all ligands on N terminal and the first residue
        res = residues.get(iRes++);

        Set<Component> nTermWildCard = mapSingleMultiComps.get(Component.of("*", true, false));

        Set<Component> comps = mapSingleMultiComps.get(Component.of(res.getPDBName(), true, false));

        for (Component comp : unionComponentSet(nTermWildCard, comps)) {
          Set<Group> gs = saveTo.get(comp);
          if (gs == null) {
            gs = new LinkedHashSet<Group>();
            saveTo.put(comp, gs);
          }
          gs.add(res);
        }
      } while (iRes < nRes && ligands.contains(res));

      // for C-terminal
      iRes = residues.size() - 1;
      do {
        // for all ligands on C terminal and the last residue
        res = residues.get(iRes--);

        Set<Component> cTermWildCard = mapSingleMultiComps.get(Component.of("*", false, true));

        Set<Component> comps = mapSingleMultiComps.get(Component.of(res.getPDBName(), false, true));

        for (Component comp : unionComponentSet(cTermWildCard, comps)) {
          Set<Group> gs = saveTo.get(comp);
          if (gs == null) {
            gs = new LinkedHashSet<Group>();
            saveTo.put(comp, gs);
          }
          gs.add(res);
        }
      } while (iRes >= 0 && ligands.contains(res));
    }
  }