Ejemplo n.º 1
0
  /**
   * Takes a potentially non-sequential alignment and guesses a sequential version of it. Residues
   * from each structure are sorted sequentially and then compared directly.
   *
   * <p>The results of this method are consistent with what one might expect from an identity
   * function, and are therefore useful with {@link #getSymmetryOrder(Map, Map identity, int,
   * float)}.
   *
   * <ul>
   *   <li>Perfect self-alignments will have the same pre-image and image, so will map X->X
   *   <li>Gaps and alignment errors will cause errors in the resulting map, but only locally.
   *       Errors do not propagate through the whole alignment.
   * </ul>
   *
   * <h4>Example:</h4>
   *
   * A non sequential alignment, represented schematically as
   *
   * <pre>
   * 12456789
   * 78912345</pre>
   *
   * would result in a map
   *
   * <pre>
   * 12456789
   * 12345789</pre>
   *
   * @param alignment The non-sequential input alignment
   * @param inverseAlignment If false, map from structure1 to structure2. If true, generate the
   *     inverse of that map.
   * @return A mapping from sequential residues of one protein to those of the other
   * @throws IllegalArgumentException if the input alignment is not one-to-one.
   */
  public static Map<Integer, Integer> guessSequentialAlignment(
      Map<Integer, Integer> alignment, boolean inverseAlignment) {
    Map<Integer, Integer> identity = new HashMap<Integer, Integer>();

    SortedSet<Integer> aligned1 = new TreeSet<Integer>();
    SortedSet<Integer> aligned2 = new TreeSet<Integer>();

    for (Entry<Integer, Integer> pair : alignment.entrySet()) {
      aligned1.add(pair.getKey());
      if (!aligned2.add(pair.getValue()))
        throw new IllegalArgumentException(
            "Alignment is not one-to-one for residue "
                + pair.getValue()
                + " of the second structure.");
    }

    Iterator<Integer> it1 = aligned1.iterator();
    Iterator<Integer> it2 = aligned2.iterator();
    while (it1.hasNext()) {
      if (inverseAlignment) { // 2->1
        identity.put(it2.next(), it1.next());
      } else { // 1->2
        identity.put(it1.next(), it2.next());
      }
    }
    return identity;
  }
Ejemplo n.º 2
0
 /** @see #toConciseAlignmentString(Map, Map) */
 public static Map<Integer, Integer> fromConciseAlignmentString(String string) {
   Map<Integer, Integer> map = new HashMap<Integer, Integer>();
   boolean matches = true;
   while (matches) {
     Pattern pattern = Pattern.compile("(\\d+)>(\\d+)");
     Matcher matcher = pattern.matcher(string);
     matches = matcher.find();
     if (matches) {
       Integer from = Integer.parseInt(matcher.group(1));
       Integer to = Integer.parseInt(matcher.group(2));
       map.put(from, to);
       string = string.substring(matcher.end(1) + 1);
     }
   }
   return map;
 }
Ejemplo n.º 3
0
  /**
   * Applies an alignment k times. Eg if alignmentMap defines function f(x), this returns a function
   * f^k(x)=f(f(...f(x)...)).
   *
   * <p>To allow for functions with different domains and codomains, the identity function allows
   * converting back in a reasonable way. For instance, if alignmentMap represented an alignment
   * between two proteins with different numbering schemes, the identity function could calculate
   * the offset between residue numbers, eg I(x) = x-offset.
   *
   * <p>When an identity function is provided, the returned function calculates f^k(x) = f(I( f(I(
   * ... f(x) ... )) )).
   *
   * @param <S>
   * @param <T>
   * @param alignmentMap The input function, as a map (see {@link
   *     AlignmentTools#alignmentAsMap(AFPChain)})
   * @param identity An identity-like function providing the isomorphism between the codomain of
   *     alignmentMap (of type <T>) and the domain (type <S>).
   * @param k The number of times to apply the alignment
   * @return A new alignment. If the input function is not automorphic (one-to-one), then some
   *     inputs may map to null, indicating that the function is undefined for that input.
   */
  public static <S, T> Map<S, T> applyAlignment(Map<S, T> alignmentMap, Map<T, S> identity, int k) {

    // This implementation simply applies the map k times.
    // If k were large, it would be more efficient to do this recursively,
    // (eg f^4 = (f^2)^2) but k will usually be small.

    if (k < 0) throw new IllegalArgumentException("k must be positive");
    if (k == 1) {
      return new HashMap<S, T>(alignmentMap);
    }
    // Convert to lists to establish a fixed order
    List<S> preimage = new ArrayList<S>(alignmentMap.keySet()); // currently unmodified
    List<S> image = new ArrayList<S>(preimage);

    for (int n = 1; n < k; n++) {
      // apply alignment
      for (int i = 0; i < image.size(); i++) {
        S pre = image.get(i);
        T intermediate = (pre == null ? null : alignmentMap.get(pre));
        S post = (intermediate == null ? null : identity.get(intermediate));
        image.set(i, post);
      }
    }

    Map<S, T> imageMap = new HashMap<S, T>(alignmentMap.size());

    // TODO handle nulls consistently.
    // assure that all the residues in the domain are valid keys
    /*
    for(int i=0;i<preimage.size();i++) {
    	S pre = preimage.get(i);
    	T intermediate = (pre==null?null: alignmentMap.get(pre));
    	S post = (intermediate==null?null: identity.get(intermediate));
    	imageMap.put(post, null);
    }
     */
    // now populate with actual values
    for (int i = 0; i < preimage.size(); i++) {
      S pre = preimage.get(i);

      // image is currently f^k-1(x), so take the final step
      S preK1 = image.get(i);
      T postK = (preK1 == null ? null : alignmentMap.get(preK1));
      imageMap.put(pre, postK);
    }
    return imageMap;
  }
Ejemplo n.º 4
0
  /**
   * Creates a Map specifying the alignment as a mapping between residue indices of protein 1 and
   * residue indices of protein 2.
   *
   * <p>For example,
   *
   * <pre>
   * 1234
   * 5678</pre>
   *
   * becomes
   *
   * <pre>
   * 1->5
   * 2->6
   * 3->7
   * 4->8</pre>
   *
   * @param afpChain An alignment
   * @return A mapping from aligned residues of protein 1 to their partners in protein 2.
   * @throws StructureException If afpChain is not one-to-one
   */
  public static Map<Integer, Integer> alignmentAsMap(AFPChain afpChain) throws StructureException {
    Map<Integer, Integer> map = new HashMap<Integer, Integer>();

    if (afpChain.getAlnLength() < 1) {
      return map;
    }
    int[][][] optAln = afpChain.getOptAln();
    int[] optLen = afpChain.getOptLen();
    for (int block = 0; block < afpChain.getBlockNum(); block++) {
      for (int pos = 0; pos < optLen[block]; pos++) {
        int res1 = optAln[block][0][pos];
        int res2 = optAln[block][1][pos];
        if (map.containsKey(res1)) {
          throw new StructureException(
              String.format("Residue %d aligned to both %d and %d.", res1, map.get(res1), res2));
        }
        map.put(res1, res2);
      }
    }
    return map;
  }
  /**
   * Identify a set of modifications in a a list of chains.
   *
   * @param chains query {@link Chain}s.
   * @param potentialModifications query {@link ProteinModification}s.
   */
  public void identify(
      final List<Chain> chains, final Set<ProteinModification> potentialModifications) {

    if (chains == null) {
      throw new IllegalArgumentException("Null structure.");
    }

    if (potentialModifications == null) {
      throw new IllegalArgumentException("Null potentialModifications.");
    }

    reset();

    if (potentialModifications.isEmpty()) {
      return;
    }

    Map<String, Chain> mapChainIdChain = new HashMap<String, Chain>(chains.size());
    residues = new ArrayList<Group>();
    List<Group> ligands = new ArrayList<Group>();
    Map<Component, Set<Group>> mapCompGroups = new HashMap<Component, Set<Group>>();

    for (Chain chain : chains) {
      mapChainIdChain.put(chain.getChainID(), chain);

      List<Group> ress = StructureUtil.getAminoAcids(chain);

      // List<Group> ligs = chain.getAtomLigands();
      List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups());
      residues.addAll(ress);
      residues.removeAll(ligs);
      ligands.addAll(ligs);
      addModificationGroups(potentialModifications, ress, ligs, mapCompGroups);
    }

    if (residues.isEmpty()) {
      String pdbId = "?";
      if (chains.size() > 0) {
        Structure struc = chains.get(0).getParent();
        if (struc != null) pdbId = struc.getPDBCode();
      }
      logger.warn(
          "No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.",
          pdbId);
    }
    List<ModifiedCompound> modComps = new ArrayList<ModifiedCompound>();

    for (ProteinModification mod : potentialModifications) {
      ModificationCondition condition = mod.getCondition();
      List<Component> components = condition.getComponents();
      if (!mapCompGroups.keySet().containsAll(components)) {
        // not all components exist for this mod.
        continue;
      }

      int sizeComps = components.size();
      if (sizeComps == 1) {

        processCrosslink1(mapCompGroups, modComps, mod, components);

      } else {

        processMultiCrosslink(mapCompGroups, modComps, mod, condition);
      }
    }

    if (recordAdditionalAttachments) {
      // identify additional groups that are not directly attached to amino acids.
      for (ModifiedCompound mc : modComps) {
        identifyAdditionalAttachments(mc, ligands, mapChainIdChain);
      }
    }

    mergeModComps(modComps);

    identifiedModifiedCompounds.addAll(modComps);

    // record unidentifiable linkage
    if (recordUnidentifiableModifiedCompounds) {
      recordUnidentifiableAtomLinkages(modComps, ligands);
      recordUnidentifiableModifiedResidues(modComps);
    }
  }