Example #1
1
  /**
   * Print an alignment map in a concise representation. Edges are given as two numbers separated by
   * '>'. They are chained together where possible, or separated by spaces where disjoint or
   * branched.
   *
   * <p>Note that more concise representations may be possible. Examples:
   * <li>1>2>3>1
   * <li>1>2>3>2 4>3
   *
   * @param alignment The input function, as a map (see {@link
   *     AlignmentTools#alignmentAsMap(AFPChain)})
   * @param identity An identity-like function providing the isomorphism between the codomain of
   *     alignment (of type <T>) and the domain (type <S>).
   * @return
   */
  public static <S, T> String toConciseAlignmentString(Map<S, T> alignment, Map<T, S> identity) {
    // Clone input to prevent changes
    Map<S, T> alig = new HashMap<S, T>(alignment);

    // Generate inverse alignment
    Map<S, List<S>> inverse = new HashMap<S, List<S>>();
    for (Entry<S, T> e : alig.entrySet()) {
      S val = identity.get(e.getValue());
      if (inverse.containsKey(val)) {
        List<S> l = inverse.get(val);
        l.add(e.getKey());
      } else {
        List<S> l = new ArrayList<S>();
        l.add(e.getKey());
        inverse.put(val, l);
      }
    }

    StringBuilder str = new StringBuilder();

    while (!alig.isEmpty()) {
      // Pick an edge and work upstream to a root or cycle
      S seedNode = alig.keySet().iterator().next();
      S node = seedNode;
      if (inverse.containsKey(seedNode)) {
        node = inverse.get(seedNode).iterator().next();
        while (node != seedNode && inverse.containsKey(node)) {
          node = inverse.get(node).iterator().next();
        }
      }

      // Now work downstream, deleting edges as we go
      seedNode = node;
      str.append(node);

      while (alig.containsKey(node)) {
        S lastNode = node;
        node = identity.get(alig.get(lastNode));

        // Output
        str.append('>');
        str.append(node);

        // Remove edge
        alig.remove(lastNode);
        List<S> inv = inverse.get(node);
        if (inv.size() > 1) {
          inv.remove(node);
        } else {
          inverse.remove(node);
        }
      }
      if (!alig.isEmpty()) {
        str.append(' ');
      }
    }

    return str.toString();
  }
 /**
  * Takes a protein sequence string with capital and lowercase letters and sets its {@link
  * ProteinSequence#getUserCollection() user collection} to record which letters are uppercase
  * (aligned) and which are lowercase (unaligned).
  *
  * @param sequence Make sure <em>not</em> to use {@link ProteinSequence#getSequenceAsString()} for
  *     this, as it won't preserve upper- and lower-case
  */
 public static List<Object> getAlignedUserCollection(String sequence) {
   List<Object> aligned = new ArrayList<Object>(sequence.length());
   for (char c : sequence.toCharArray()) {
     aligned.add(Character.isUpperCase(c));
   }
   return aligned;
 }
  /**
   * identify additional groups that are not directly attached to amino acids.
   *
   * @param mc {@link ModifiedCompound}.
   * @param chain a {@link Chain}.
   * @return a list of added groups.
   */
  private void identifyAdditionalAttachments(
      ModifiedCompound mc, List<Group> ligands, Map<String, Chain> mapChainIdChain) {
    if (ligands.isEmpty()) {
      return;
    }

    // TODO: should the additional groups only be allowed to the identified
    // ligands or both amino acids and ligands? Currently only on ligands
    // ligands to amino acid bonds for same modification of unknown category
    // will be combined in mergeModComps()
    // TODO: how about chain-chain links?
    List<Group> identifiedGroups = new ArrayList<Group>();
    for (StructureGroup num : mc.getGroups(false)) {
      Group group;
      try {
        // String numIns = "" + num.getResidueNumber();
        // if (num.getInsCode() != null) {
        //	numIns += num.getInsCode();
        // }
        ResidueNumber resNum = new ResidueNumber();
        resNum.setChainId(num.getChainId());
        resNum.setSeqNum(num.getResidueNumber());
        resNum.setInsCode(num.getInsCode());
        // group = chain.getGroupByPDB(numIns);
        group = mapChainIdChain.get(num.getChainId()).getGroupByPDB(resNum);
      } catch (StructureException e) {
        logger.error("Exception: ", e);
        // should not happen
        continue;
      }
      identifiedGroups.add(group);
    }

    int start = 0;

    int n = identifiedGroups.size();
    while (n > start) {
      for (Group group1 : ligands) {
        for (int i = start; i < n; i++) {
          Group group2 = identifiedGroups.get(i);
          if (!identifiedGroups.contains(group1)) {
            List<Atom[]> linkedAtoms =
                StructureUtil.findAtomLinkages(group1, group2, false, bondLengthTolerance);
            if (!linkedAtoms.isEmpty()) {
              for (Atom[] atoms : linkedAtoms) {
                mc.addAtomLinkage(
                    StructureUtil.getStructureAtomLinkage(atoms[0], false, atoms[1], false));
              }
              identifiedGroups.add(group1);
              break;
            }
          }
        }
      }

      start = n;
      n = identifiedGroups.size();
    }
  }
Example #4
0
  /**
   * Takes an AFPChain and replaces the optimal alignment based on an alignment map
   *
   * <p>Parameters are filled with defaults (often null) or sometimes calculated.
   *
   * <p>For a way to create a new AFPChain, see {@link AlignmentTools#createAFPChain(Atom[], Atom[],
   * ResidueNumber[], ResidueNumber[])}
   *
   * @param afpChain The alignment to be modified
   * @param alignment The new alignment, as a Map
   * @throws StructureException if an error occurred during superposition
   * @see AlignmentTools#createAFPChain(Atom[], Atom[], ResidueNumber[], ResidueNumber[])
   */
  public static AFPChain replaceOptAln(
      AFPChain afpChain, Atom[] ca1, Atom[] ca2, Map<Integer, Integer> alignment)
      throws StructureException {

    // Determine block lengths
    // Sort ca1 indices, then start a new block whenever ca2 indices aren't
    // increasing monotonically.
    Integer[] res1 = alignment.keySet().toArray(new Integer[0]);
    Arrays.sort(res1);
    List<Integer> blockLens = new ArrayList<Integer>(2);
    int optLength = 0;
    Integer lastRes = alignment.get(res1[0]);
    int blkLen = lastRes == null ? 0 : 1;
    for (int i = 1; i < res1.length; i++) {
      Integer currRes = alignment.get(res1[i]); // res2 index
      assert (currRes
          != null); // could be converted to if statement if assertion doesn't hold; just modify
                    // below as well.
      if (lastRes < currRes) {
        blkLen++;
      } else {
        // CP!
        blockLens.add(blkLen);
        optLength += blkLen;
        blkLen = 1;
      }
      lastRes = currRes;
    }
    blockLens.add(blkLen);
    optLength += blkLen;

    // Create array structure for alignment
    int[][][] optAln = new int[blockLens.size()][][];
    int pos1 = 0; // index into res1
    for (int blk = 0; blk < blockLens.size(); blk++) {
      optAln[blk] = new int[2][];
      blkLen = blockLens.get(blk);
      optAln[blk][0] = new int[blkLen];
      optAln[blk][1] = new int[blkLen];
      int pos = 0; // index into optAln
      while (pos < blkLen) {
        optAln[blk][0][pos] = res1[pos1];
        Integer currRes = alignment.get(res1[pos1]);
        optAln[blk][1][pos] = currRes;
        pos++;
        pos1++;
      }
    }
    assert (pos1 == optLength);

    // Create length array
    int[] optLens = new int[blockLens.size()];
    for (int i = 0; i < blockLens.size(); i++) {
      optLens[i] = blockLens.get(i);
    }

    return replaceOptAln(afpChain, ca1, ca2, blockLens.size(), optLens, optAln);
  }
  /**
   * Builds an {@link AFPChain} from already-matched arrays of atoms and residues.
   *
   * @param ca1 An array of atoms in the first structure
   * @param ca2 An array of atoms in the second structure
   * @param residues1 An array of {@link ResidueNumber ResidueNumbers} in the first structure that
   *     are aligned. Only null ResidueNumbers are considered to be unaligned
   * @param residues2 An array of {@link ResidueNumber ResidueNumbers} in the second structure that
   *     are aligned. Only null ResidueNumbers are considered to be unaligned
   * @throws StructureException
   */
  private static AFPChain buildAlignment(
      Atom[] ca1, Atom[] ca2, ResidueNumber[] residues1, ResidueNumber[] residues2)
      throws StructureException {

    // remove any gap
    // this includes the ones introduced by the nullifying above
    List<ResidueNumber> alignedResiduesList1 = new ArrayList<ResidueNumber>();
    List<ResidueNumber> alignedResiduesList2 = new ArrayList<ResidueNumber>();
    for (int i = 0; i < residues1.length; i++) {
      if (residues1[i] != null && residues2[i] != null) {
        alignedResiduesList1.add(residues1[i]);
        alignedResiduesList2.add(residues2[i]);
      }
    }

    ResidueNumber[] alignedResidues1 =
        alignedResiduesList1.toArray(new ResidueNumber[alignedResiduesList1.size()]);
    ResidueNumber[] alignedResidues2 =
        alignedResiduesList2.toArray(new ResidueNumber[alignedResiduesList2.size()]);

    AFPChain afpChain = AlignmentTools.createAFPChain(ca1, ca2, alignedResidues1, alignedResidues2);
    afpChain.setAlgorithmName("unknown");

    AlignmentTools.updateSuperposition(afpChain, ca1, ca2);

    afpChain.setBlockSize(new int[] {afpChain.getNrEQR()});
    afpChain.setBlockRmsd(new double[] {afpChain.getTotalRmsdOpt()});
    afpChain.setBlockGap(new int[] {afpChain.getGapLen()});

    return afpChain;
  }
 private void processCrosslink1(
     Map<Component, Set<Group>> mapCompGroups,
     List<ModifiedCompound> modComps,
     ProteinModification mod,
     List<Component> components) {
   // modified residue
   // TODO: is this the correct logic for CROSS_LINK_1?
   Set<Group> modifiedResidues = mapCompGroups.get(components.get(0));
   if (modifiedResidues != null) {
     for (Group residue : modifiedResidues) {
       StructureGroup strucGroup = StructureUtil.getStructureGroup(residue, true);
       ModifiedCompound modRes = new ModifiedCompoundImpl(mod, strucGroup);
       modComps.add(modRes);
     }
   }
 }
  /**
   * Assembly the matched linkages.
   *
   * @param matchedAtomsOfLinkages
   * @param mod
   * @param condition
   * @param ret ModifiedCompound will be stored here.
   */
  private void assembleLinkages(
      List<List<Atom[]>> matchedAtomsOfLinkages,
      ProteinModification mod,
      List<ModifiedCompound> ret) {
    ModificationCondition condition = mod.getCondition();
    List<ModificationLinkage> modLinks = condition.getLinkages();

    int nLink = matchedAtomsOfLinkages.size();
    int[] indices = new int[nLink];
    Set<ModifiedCompound> identifiedCompounds = new HashSet<ModifiedCompound>();
    while (indices[0] < matchedAtomsOfLinkages.get(0).size()) {
      List<Atom[]> atomLinkages = new ArrayList<Atom[]>(nLink);
      for (int iLink = 0; iLink < nLink; iLink++) {
        Atom[] atoms = matchedAtomsOfLinkages.get(iLink).get(indices[iLink]);
        atomLinkages.add(atoms);
      }
      if (matchLinkages(modLinks, atomLinkages)) {
        // matched

        int n = atomLinkages.size();
        List<StructureAtomLinkage> linkages = new ArrayList<StructureAtomLinkage>(n);
        for (int i = 0; i < n; i++) {
          Atom[] linkage = atomLinkages.get(i);
          StructureAtomLinkage link =
              StructureUtil.getStructureAtomLinkage(
                  linkage[0], residues.contains(linkage[0].getGroup()),
                  linkage[1], residues.contains(linkage[1].getGroup()));
          linkages.add(link);
        }

        ModifiedCompound mc = new ModifiedCompoundImpl(mod, linkages);
        if (!identifiedCompounds.contains(mc)) {
          ret.add(mc);
          identifiedCompounds.add(mc);
        }
      }

      // indices++ (e.g. [0,0,1]=>[0,0,2]=>[1,2,0])
      int i = nLink - 1;
      while (i >= 0) {
        if (i == 0 || indices[i] < matchedAtomsOfLinkages.get(i).size() - 1) {
          indices[i]++;
          break;
        } else {
          indices[i] = 0;
          i--;
        }
      }
    }
  }
Example #8
0
  /**
   * Retrieves the optimum alignment from an AFPChain and returns it as a java collection. The
   * result is indexed in the same way as {@link AFPChain#getOptAln()}, but has the correct size().
   *
   * <pre>
   * List<List<List<Integer>>> aln = getOptAlnAsList(AFPChain afpChain);
   * aln.get(blockNum).get(structureNum={0,1}).get(pos)</pre>
   *
   * @param afpChain
   * @return
   */
  public static List<List<List<Integer>>> getOptAlnAsList(AFPChain afpChain) {
    int[][][] optAln = afpChain.getOptAln();
    int[] optLen = afpChain.getOptLen();
    List<List<List<Integer>>> blocks = new ArrayList<List<List<Integer>>>(afpChain.getBlockNum());
    for (int blockNum = 0; blockNum < afpChain.getBlockNum(); blockNum++) {
      // TODO could improve speed an memory by wrapping the arrays with
      // an unmodifiable list, similar to Arrays.asList(...) but with the
      // correct size parameter.
      List<Integer> align1 = new ArrayList<Integer>(optLen[blockNum]);
      List<Integer> align2 = new ArrayList<Integer>(optLen[blockNum]);
      for (int pos = 0; pos < optLen[blockNum]; pos++) {
        align1.add(optAln[blockNum][0][pos]);
        align2.add(optAln[blockNum][1][pos]);
      }
      List<List<Integer>> block = new ArrayList<List<Integer>>(2);
      block.add(align1);
      block.add(align2);
      blocks.add(block);
    }

    return blocks;
  }
  /** Merge identified modified compounds if linked. */
  private void mergeModComps(List<ModifiedCompound> modComps) {
    TreeSet<Integer> remove = new TreeSet<Integer>();
    int n = modComps.size();
    for (int icurr = 1; icurr < n; icurr++) {
      ModifiedCompound curr = modComps.get(icurr);

      String id = curr.getModification().getId();
      if (ProteinModificationRegistry.getById(id).getCategory() != ModificationCategory.UNDEFINED)
        continue;

      // find linked compounds that before curr
      // List<Integer> merging = new ArrayList<Integer>();
      int ipre = 0;
      for (; ipre < icurr; ipre++) {
        if (remove.contains(ipre)) continue;
        ModifiedCompound pre = modComps.get(ipre);
        if (!Collections.disjoint(pre.getGroups(false), curr.getGroups(false))) {
          break;
        }
      }

      if (ipre < icurr) {
        ModifiedCompound mcKeep = modComps.get(ipre);

        // merge modifications of the same type
        if (mcKeep.getModification().getId().equals(id)) {
          // merging the current one to the previous one
          mcKeep.addAtomLinkages(curr.getAtomLinkages());
          remove.add(icurr);
        }
      }
    }

    Iterator<Integer> it = remove.descendingIterator();
    while (it.hasNext()) {
      modComps.remove(it.next().intValue());
    }
  }
Example #10
0
  /**
   * Applies an alignment k times. Eg if alignmentMap defines function f(x), this returns a function
   * f^k(x)=f(f(...f(x)...)).
   *
   * <p>To allow for functions with different domains and codomains, the identity function allows
   * converting back in a reasonable way. For instance, if alignmentMap represented an alignment
   * between two proteins with different numbering schemes, the identity function could calculate
   * the offset between residue numbers, eg I(x) = x-offset.
   *
   * <p>When an identity function is provided, the returned function calculates f^k(x) = f(I( f(I(
   * ... f(x) ... )) )).
   *
   * @param <S>
   * @param <T>
   * @param alignmentMap The input function, as a map (see {@link
   *     AlignmentTools#alignmentAsMap(AFPChain)})
   * @param identity An identity-like function providing the isomorphism between the codomain of
   *     alignmentMap (of type <T>) and the domain (type <S>).
   * @param k The number of times to apply the alignment
   * @return A new alignment. If the input function is not automorphic (one-to-one), then some
   *     inputs may map to null, indicating that the function is undefined for that input.
   */
  public static <S, T> Map<S, T> applyAlignment(Map<S, T> alignmentMap, Map<T, S> identity, int k) {

    // This implementation simply applies the map k times.
    // If k were large, it would be more efficient to do this recursively,
    // (eg f^4 = (f^2)^2) but k will usually be small.

    if (k < 0) throw new IllegalArgumentException("k must be positive");
    if (k == 1) {
      return new HashMap<S, T>(alignmentMap);
    }
    // Convert to lists to establish a fixed order
    List<S> preimage = new ArrayList<S>(alignmentMap.keySet()); // currently unmodified
    List<S> image = new ArrayList<S>(preimage);

    for (int n = 1; n < k; n++) {
      // apply alignment
      for (int i = 0; i < image.size(); i++) {
        S pre = image.get(i);
        T intermediate = (pre == null ? null : alignmentMap.get(pre));
        S post = (intermediate == null ? null : identity.get(intermediate));
        image.set(i, post);
      }
    }

    Map<S, T> imageMap = new HashMap<S, T>(alignmentMap.size());

    // TODO handle nulls consistently.
    // assure that all the residues in the domain are valid keys
    /*
    for(int i=0;i<preimage.size();i++) {
    	S pre = preimage.get(i);
    	T intermediate = (pre==null?null: alignmentMap.get(pre));
    	S post = (intermediate==null?null: identity.get(intermediate));
    	imageMap.put(post, null);
    }
     */
    // now populate with actual values
    for (int i = 0; i < preimage.size(); i++) {
      S pre = preimage.get(i);

      // image is currently f^k-1(x), so take the final step
      S preK1 = image.get(i);
      T postK = (preK1 == null ? null : alignmentMap.get(preK1));
      imageMap.put(pre, postK);
    }
    return imageMap;
  }
  /**
   * Record unidentifiable atom linkages in a chain. Only linkages between two residues or one
   * residue and one ligand will be recorded.
   */
  private void recordUnidentifiableAtomLinkages(
      List<ModifiedCompound> modComps, List<Group> ligands) {

    // first put identified linkages in a map for fast query
    Set<StructureAtomLinkage> identifiedLinkages = new HashSet<StructureAtomLinkage>();
    for (ModifiedCompound mc : modComps) {
      identifiedLinkages.addAll(mc.getAtomLinkages());
    }

    // record
    // cross link
    int nRes = residues.size();
    for (int i = 0; i < nRes - 1; i++) {
      Group group1 = residues.get(i);
      for (int j = i + 1; j < nRes; j++) {
        Group group2 = residues.get(j);
        List<Atom[]> linkages =
            StructureUtil.findAtomLinkages(group1, group2, true, bondLengthTolerance);
        for (Atom[] atoms : linkages) {
          StructureAtomLinkage link =
              StructureUtil.getStructureAtomLinkage(atoms[0], true, atoms[1], true);
          unidentifiableAtomLinkages.add(link);
        }
      }
    }

    // attachment
    int nLig = ligands.size();
    for (int i = 0; i < nRes; i++) {
      Group group1 = residues.get(i);
      for (int j = 0; j < nLig; j++) {
        Group group2 = ligands.get(j);
        if (group1.equals(group2)) { // overlap between residues and ligands
          continue;
        }
        List<Atom[]> linkages =
            StructureUtil.findAtomLinkages(group1, group2, false, bondLengthTolerance);
        for (Atom[] atoms : linkages) {
          StructureAtomLinkage link =
              StructureUtil.getStructureAtomLinkage(atoms[0], true, atoms[1], false);
          unidentifiableAtomLinkages.add(link);
        }
      }
    }
  }
  /**
   * @param linkages
   * @param atomLinkages
   * @return true if atomLinkages satisfy the condition; false, otherwise.
   */
  private boolean matchLinkages(List<ModificationLinkage> linkages, List<Atom[]> atomLinkages) {
    int nLink = linkages.size();
    if (nLink != atomLinkages.size()) {
      return false;
    }
    for (int i = 0; i < nLink - 1; i++) {
      ModificationLinkage link1 = linkages.get(i);
      Atom[] atoms1 = atomLinkages.get(i);
      for (int j = i + 1; j < nLink; j++) {
        ModificationLinkage link2 = linkages.get(j);
        Atom[] atoms2 = atomLinkages.get(j);

        // check components
        if (((link1.getIndexOfComponent1() == link2.getIndexOfComponent1())
                != (atoms1[0].getGroup().equals(atoms2[0].getGroup())))
            || ((link1.getIndexOfComponent1() == link2.getIndexOfComponent2())
                != (atoms1[0].getGroup().equals(atoms2[1].getGroup())))
            || ((link1.getIndexOfComponent2() == link2.getIndexOfComponent1())
                != (atoms1[1].getGroup().equals(atoms2[0].getGroup())))
            || ((link1.getIndexOfComponent2() == link2.getIndexOfComponent2())
                != (atoms1[1].getGroup().equals(atoms2[1].getGroup())))) {
          return false;
        }

        // check atoms
        String label11 = link1.getLabelOfAtomOnComponent1();
        String label12 = link1.getLabelOfAtomOnComponent2();
        String label21 = link2.getLabelOfAtomOnComponent1();
        String label22 = link2.getLabelOfAtomOnComponent2();
        if ((label11 != null && label21 != null && label11.equals(label21))
                != (atoms1[0].equals(atoms2[0]))
            || (label11 != null && label22 != null && label11.equals(label22))
                != (atoms1[0].equals(atoms2[1]))
            || (label12 != null && label21 != null && label12.equals(label21))
                != (atoms1[1].equals(atoms2[0]))
            || (label12 != null && label22 != null && label12.equals(label22))
                != (atoms1[1].equals(atoms2[1]))) {
          return false;
        }
      }
    }

    return true;
  }
  /**
   * Uses two sequences each with a corresponding structure to create an AFPChain corresponding to
   * the alignment. Provided only for convenience since FastaReaders return such maps.
   *
   * @param sequences A Map containing exactly two entries from sequence names as Strings to gapped
   *     ProteinSequences; the name is ignored
   * @see #fastaToAfpChain(ProteinSequence, ProteinSequence, Structure, Structure)
   * @throws StructureException
   */
  public static AFPChain fastaToAfpChain(
      Map<String, ProteinSequence> sequences, Structure structure1, Structure structure2)
      throws StructureException {

    if (sequences.size() != 2) {
      throw new IllegalArgumentException(
          "There must be exactly 2 sequences, but there were " + sequences.size());
    }

    if (structure1 == null || structure2 == null) {
      throw new IllegalArgumentException("A structure is null");
    }

    List<ProteinSequence> seqs = new ArrayList<ProteinSequence>();
    List<String> names = new ArrayList<String>(2);
    for (Map.Entry<String, ProteinSequence> entry : sequences.entrySet()) {
      seqs.add(entry.getValue());
      names.add(entry.getKey());
    }

    return fastaToAfpChain(seqs.get(0), seqs.get(1), structure1, structure2);
  }
  /**
   * Identify a set of modifications in a a list of chains.
   *
   * @param chains query {@link Chain}s.
   * @param potentialModifications query {@link ProteinModification}s.
   */
  public void identify(
      final List<Chain> chains, final Set<ProteinModification> potentialModifications) {

    if (chains == null) {
      throw new IllegalArgumentException("Null structure.");
    }

    if (potentialModifications == null) {
      throw new IllegalArgumentException("Null potentialModifications.");
    }

    reset();

    if (potentialModifications.isEmpty()) {
      return;
    }

    Map<String, Chain> mapChainIdChain = new HashMap<String, Chain>(chains.size());
    residues = new ArrayList<Group>();
    List<Group> ligands = new ArrayList<Group>();
    Map<Component, Set<Group>> mapCompGroups = new HashMap<Component, Set<Group>>();

    for (Chain chain : chains) {
      mapChainIdChain.put(chain.getChainID(), chain);

      List<Group> ress = StructureUtil.getAminoAcids(chain);

      // List<Group> ligs = chain.getAtomLigands();
      List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups());
      residues.addAll(ress);
      residues.removeAll(ligs);
      ligands.addAll(ligs);
      addModificationGroups(potentialModifications, ress, ligs, mapCompGroups);
    }

    if (residues.isEmpty()) {
      String pdbId = "?";
      if (chains.size() > 0) {
        Structure struc = chains.get(0).getParent();
        if (struc != null) pdbId = struc.getPDBCode();
      }
      logger.warn(
          "No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.",
          pdbId);
    }
    List<ModifiedCompound> modComps = new ArrayList<ModifiedCompound>();

    for (ProteinModification mod : potentialModifications) {
      ModificationCondition condition = mod.getCondition();
      List<Component> components = condition.getComponents();
      if (!mapCompGroups.keySet().containsAll(components)) {
        // not all components exist for this mod.
        continue;
      }

      int sizeComps = components.size();
      if (sizeComps == 1) {

        processCrosslink1(mapCompGroups, modComps, mod, components);

      } else {

        processMultiCrosslink(mapCompGroups, modComps, mod, condition);
      }
    }

    if (recordAdditionalAttachments) {
      // identify additional groups that are not directly attached to amino acids.
      for (ModifiedCompound mc : modComps) {
        identifyAdditionalAttachments(mc, ligands, mapChainIdChain);
      }
    }

    mergeModComps(modComps);

    identifiedModifiedCompounds.addAll(modComps);

    // record unidentifiable linkage
    if (recordUnidentifiableModifiedCompounds) {
      recordUnidentifiableAtomLinkages(modComps, ligands);
      recordUnidentifiableModifiedResidues(modComps);
    }
  }
  /**
   * @param modifications a set of {@link ProteinModification}s.
   * @param residues
   * @param ligands
   * @param saveTo save result to
   * @return map from component to list of corresponding residues in the chain.
   */
  private void addModificationGroups(
      final Set<ProteinModification> modifications,
      final List<Group> residues,
      final List<Group> ligands,
      final Map<Component, Set<Group>> saveTo) {
    if (residues == null || ligands == null || modifications == null) {
      throw new IllegalArgumentException("Null argument(s).");
    }

    Map<Component, Set<Component>> mapSingleMultiComps = new HashMap<Component, Set<Component>>();
    for (ProteinModification mod : modifications) {
      ModificationCondition condition = mod.getCondition();
      for (Component comp : condition.getComponents()) {
        for (String pdbccId : comp.getPdbccIds()) {
          Component single =
              Component.of(Collections.singleton(pdbccId), comp.isNTerminal(), comp.isCTerminal());
          Set<Component> mult = mapSingleMultiComps.get(single);
          if (mult == null) {
            mult = new HashSet<Component>();
            mapSingleMultiComps.put(single, mult);
          }
          mult.add(comp);
        }
      }
    }

    {
      // ligands
      Set<Component> ligandsWildCard = mapSingleMultiComps.get(Component.of("*"));
      for (Group group : ligands) {
        String pdbccId = group.getPDBName().trim();
        Set<Component> comps = mapSingleMultiComps.get(Component.of(pdbccId));

        for (Component comp : unionComponentSet(ligandsWildCard, comps)) {
          Set<Group> gs = saveTo.get(comp);
          if (gs == null) {
            gs = new LinkedHashSet<Group>();
            saveTo.put(comp, gs);
          }
          gs.add(group);
        }
      }
    }

    {
      // residues
      if (residues.isEmpty()) {
        return;
      }

      Set<Component> residuesWildCard = mapSingleMultiComps.get(Component.of("*"));

      // for all residues
      for (Group group : residues) {
        String pdbccId = group.getPDBName().trim();
        Set<Component> comps = mapSingleMultiComps.get(Component.of(pdbccId));

        for (Component comp : unionComponentSet(residuesWildCard, comps)) {
          Set<Group> gs = saveTo.get(comp);
          if (gs == null) {
            gs = new LinkedHashSet<Group>();
            saveTo.put(comp, gs);
          }
          gs.add(group);
        }
      }

      // for N-terminal
      int nRes = residues.size();
      int iRes = 0;
      Group res;
      do {
        // for all ligands on N terminal and the first residue
        res = residues.get(iRes++);

        Set<Component> nTermWildCard = mapSingleMultiComps.get(Component.of("*", true, false));

        Set<Component> comps = mapSingleMultiComps.get(Component.of(res.getPDBName(), true, false));

        for (Component comp : unionComponentSet(nTermWildCard, comps)) {
          Set<Group> gs = saveTo.get(comp);
          if (gs == null) {
            gs = new LinkedHashSet<Group>();
            saveTo.put(comp, gs);
          }
          gs.add(res);
        }
      } while (iRes < nRes && ligands.contains(res));

      // for C-terminal
      iRes = residues.size() - 1;
      do {
        // for all ligands on C terminal and the last residue
        res = residues.get(iRes--);

        Set<Component> cTermWildCard = mapSingleMultiComps.get(Component.of("*", false, true));

        Set<Component> comps = mapSingleMultiComps.get(Component.of(res.getPDBName(), false, true));

        for (Component comp : unionComponentSet(cTermWildCard, comps)) {
          Set<Group> gs = saveTo.get(comp);
          if (gs == null) {
            gs = new LinkedHashSet<Group>();
            saveTo.put(comp, gs);
          }
          gs.add(res);
        }
      } while (iRes >= 0 && ligands.contains(res));
    }
  }
  /** Get matched atoms for all linkages. */
  private List<List<Atom[]>> getMatchedAtomsOfLinkages(
      ModificationCondition condition, Map<Component, Set<Group>> mapCompGroups) {
    List<ModificationLinkage> linkages = condition.getLinkages();
    int nLink = linkages.size();

    List<List<Atom[]>> matchedAtomsOfLinkages = new ArrayList<List<Atom[]>>(nLink);

    for (int iLink = 0; iLink < nLink; iLink++) {
      ModificationLinkage linkage = linkages.get(iLink);
      Component comp1 = linkage.getComponent1();
      Component comp2 = linkage.getComponent2();

      //			boolean isAA1 = comp1.;
      //			boolean isAA2 = comp2.getType()==true;

      Set<Group> groups1 = mapCompGroups.get(comp1);
      Set<Group> groups2 = mapCompGroups.get(comp2);

      List<Atom[]> list = new ArrayList<Atom[]>();

      List<String> potentialNamesOfAtomOnGroup1 = linkage.getPDBNameOfPotentialAtomsOnComponent1();
      for (String name : potentialNamesOfAtomOnGroup1) {
        if (name.equals("*")) {
          // wildcard
          potentialNamesOfAtomOnGroup1 = null; // search all atoms
          break;
        }
      }

      List<String> potentialNamesOfAtomOnGroup2 = linkage.getPDBNameOfPotentialAtomsOnComponent2();
      for (String name : potentialNamesOfAtomOnGroup2) {
        if (name.equals("*")) {
          // wildcard
          potentialNamesOfAtomOnGroup2 = null; // search all atoms
          break;
        }
      }

      for (Group g1 : groups1) {
        for (Group g2 : groups2) {
          if (g1.equals(g2)) {
            continue;
          }

          // only for wildcard match of two residues
          boolean ignoreNCLinkage =
              potentialNamesOfAtomOnGroup1 == null
                  && potentialNamesOfAtomOnGroup2 == null
                  && residues.contains(g1)
                  && residues.contains(g2);

          Atom[] atoms =
              StructureUtil.findNearestAtomLinkage(
                  g1,
                  g2,
                  potentialNamesOfAtomOnGroup1,
                  potentialNamesOfAtomOnGroup2,
                  ignoreNCLinkage,
                  bondLengthTolerance);
          if (atoms != null) {
            list.add(atoms);
          }
        }
      }

      if (list.isEmpty()) {
        // broken linkage
        break;
      }

      matchedAtomsOfLinkages.add(list);
    }

    return matchedAtomsOfLinkages;
  }
Example #17
0
  /**
   * @param a
   * @param ca1
   * @param ca2
   * @return
   * @throws StructureException if an error occurred during superposition
   */
  public static AFPChain splitBlocksByTopology(AFPChain a, Atom[] ca1, Atom[] ca2)
      throws StructureException {
    int[][][] optAln = a.getOptAln();
    int blockNum = a.getBlockNum();
    int[] optLen = a.getOptLen();

    // Determine block lengths
    // Split blocks if residue indices don't increase monotonically
    List<Integer> newBlkLen = new ArrayList<Integer>();
    boolean blockChanged = false;
    for (int blk = 0; blk < blockNum; blk++) {
      int currLen = 1;
      for (int pos = 1; pos < optLen[blk]; pos++) {
        if (optAln[blk][0][pos] <= optAln[blk][0][pos - 1]
            || optAln[blk][1][pos] <= optAln[blk][1][pos - 1]) {
          // start a new block
          newBlkLen.add(currLen);
          currLen = 0;
          blockChanged = true;
        }
        currLen++;
      }
      if (optLen[blk] < 2) {
        newBlkLen.add(optLen[blk]);
      } else {
        newBlkLen.add(currLen);
      }
    }

    // Check if anything needs to be split
    if (!blockChanged) {
      return a;
    }

    // Split blocks
    List<int[][]> blocks = new ArrayList<int[][]>(newBlkLen.size());

    int oldBlk = 0;
    int pos = 0;
    for (int blkLen : newBlkLen) {
      if (blkLen == optLen[oldBlk]) {
        assert (pos == 0); // should be the whole block
        // Use the old block
        blocks.add(optAln[oldBlk]);
      } else {
        int[][] newBlock = new int[2][blkLen];
        assert (pos + blkLen <= optLen[oldBlk]); // don't overrun block
        for (int i = 0; i < blkLen; i++) {
          newBlock[0][i] = optAln[oldBlk][0][pos + i];
          newBlock[1][i] = optAln[oldBlk][1][pos + i];
        }
        pos += blkLen;
        blocks.add(newBlock);

        if (pos == optLen[oldBlk]) {
          // Finished this oldBlk, start the next
          oldBlk++;
          pos = 0;
        }
      }
    }

    // Store new blocks
    int[][][] newOptAln = blocks.toArray(new int[0][][]);
    int[] newBlockLens = new int[newBlkLen.size()];
    for (int i = 0; i < newBlkLen.size(); i++) {
      newBlockLens[i] = newBlkLen.get(i);
    }

    return replaceOptAln(a, ca1, ca2, blocks.size(), newBlockLens, newOptAln);
  }
Example #18
0
  /**
   * Tries to detect symmetry in an alignment.
   *
   * <p>Conceptually, an alignment is a function f:A->B between two sets of integers. The function
   * may have simple topology (meaning that if two elements of A are close, then their images in B
   * will also be close), or may have more complex topology (such as a circular permutation). This
   * function checks <i>alignment</i> against a reference function <i>identity</i>, which should
   * have simple topology. It then tries to determine the symmetry order of <i>alignment</i>
   * relative to <i>identity</i>, up to a maximum order of <i>maxSymmetry</i>.
   *
   * <p><strong>Details</strong><br>
   * Considers the offset (in number of residues) which a residue moves after undergoing <i>n</i>
   * alternating transforms by alignment and identity. If <i>n</i> corresponds to the intrinsic
   * order of the alignment, this will be small. This algorithm tries increasing values of <i>n</i>
   * and looks for abrupt decreases in the root mean squared offset. If none are found at
   * <i>n</i><=maxSymmetry, the alignment is reported as non-symmetric.
   *
   * @param alignment The alignment to test for symmetry
   * @param identity An alignment with simple topology which approximates the sequential
   *     relationship between the two proteins. Should map in the reverse direction from alignment.
   * @param maxSymmetry Maximum symmetry to consider. High values increase the calculation time and
   *     can lead to overfitting.
   * @param minimumMetricChange Percent decrease in root mean squared offsets in order to declare
   *     symmetry. 0.4f seems to work well for CeSymm.
   * @return The order of symmetry of alignment, or 1 if no order <= maxSymmetry is found.
   * @see IdentityMap For a simple identity function
   */
  public static int getSymmetryOrder(
      Map<Integer, Integer> alignment,
      Map<Integer, Integer> identity,
      final int maxSymmetry,
      final float minimumMetricChange) {
    List<Integer> preimage = new ArrayList<Integer>(alignment.keySet()); // currently unmodified
    List<Integer> image = new ArrayList<Integer>(preimage);

    int bestSymmetry = 1;
    double bestMetric = Double.POSITIVE_INFINITY; // lower is better
    boolean foundSymmetry = false;

    if (debug) {
      logger.trace("Symm\tPos\tDelta");
    }

    for (int n = 1; n <= maxSymmetry; n++) {
      int deltasSq = 0;
      int numDeltas = 0;
      // apply alignment
      for (int i = 0; i < image.size(); i++) {
        Integer pre = image.get(i);
        Integer intermediate = (pre == null ? null : alignment.get(pre));
        Integer post = (intermediate == null ? null : identity.get(intermediate));
        image.set(i, post);

        if (post != null) {
          int delta = post - preimage.get(i);

          deltasSq += delta * delta;
          numDeltas++;

          if (debug) {
            logger.debug("%d\t%d\t%d\n", n, preimage.get(i), delta);
          }
        }
      }

      // Metrics: RMS compensates for the trend of smaller numDeltas with higher order
      // Not normalizing by numDeltas favors smaller orders

      double metric = Math.sqrt((double) deltasSq / numDeltas); // root mean squared distance

      if (!foundSymmetry && metric < bestMetric * minimumMetricChange) {
        // n = 1 is never the best symmetry
        if (bestMetric < Double.POSITIVE_INFINITY) {
          foundSymmetry = true;
        }
        bestSymmetry = n;
        bestMetric = metric;
      }

      // When debugging need to loop over everything. Unneeded in production
      if (!debug && foundSymmetry) {
        break;
      }
    }
    if (foundSymmetry) {
      return bestSymmetry;
    } else {
      return 1;
    }
  }