Ejemplo n.º 1
1
  /**
   * Print an alignment map in a concise representation. Edges are given as two numbers separated by
   * '>'. They are chained together where possible, or separated by spaces where disjoint or
   * branched.
   *
   * <p>Note that more concise representations may be possible. Examples:
   * <li>1>2>3>1
   * <li>1>2>3>2 4>3
   *
   * @param alignment The input function, as a map (see {@link
   *     AlignmentTools#alignmentAsMap(AFPChain)})
   * @param identity An identity-like function providing the isomorphism between the codomain of
   *     alignment (of type <T>) and the domain (type <S>).
   * @return
   */
  public static <S, T> String toConciseAlignmentString(Map<S, T> alignment, Map<T, S> identity) {
    // Clone input to prevent changes
    Map<S, T> alig = new HashMap<S, T>(alignment);

    // Generate inverse alignment
    Map<S, List<S>> inverse = new HashMap<S, List<S>>();
    for (Entry<S, T> e : alig.entrySet()) {
      S val = identity.get(e.getValue());
      if (inverse.containsKey(val)) {
        List<S> l = inverse.get(val);
        l.add(e.getKey());
      } else {
        List<S> l = new ArrayList<S>();
        l.add(e.getKey());
        inverse.put(val, l);
      }
    }

    StringBuilder str = new StringBuilder();

    while (!alig.isEmpty()) {
      // Pick an edge and work upstream to a root or cycle
      S seedNode = alig.keySet().iterator().next();
      S node = seedNode;
      if (inverse.containsKey(seedNode)) {
        node = inverse.get(seedNode).iterator().next();
        while (node != seedNode && inverse.containsKey(node)) {
          node = inverse.get(node).iterator().next();
        }
      }

      // Now work downstream, deleting edges as we go
      seedNode = node;
      str.append(node);

      while (alig.containsKey(node)) {
        S lastNode = node;
        node = identity.get(alig.get(lastNode));

        // Output
        str.append('>');
        str.append(node);

        // Remove edge
        alig.remove(lastNode);
        List<S> inv = inverse.get(node);
        if (inv.size() > 1) {
          inv.remove(node);
        } else {
          inverse.remove(node);
        }
      }
      if (!alig.isEmpty()) {
        str.append(' ');
      }
    }

    return str.toString();
  }
  /**
   * Builds an {@link AFPChain} from already-matched arrays of atoms and residues.
   *
   * @param ca1 An array of atoms in the first structure
   * @param ca2 An array of atoms in the second structure
   * @param residues1 An array of {@link ResidueNumber ResidueNumbers} in the first structure that
   *     are aligned. Only null ResidueNumbers are considered to be unaligned
   * @param residues2 An array of {@link ResidueNumber ResidueNumbers} in the second structure that
   *     are aligned. Only null ResidueNumbers are considered to be unaligned
   * @throws StructureException
   */
  private static AFPChain buildAlignment(
      Atom[] ca1, Atom[] ca2, ResidueNumber[] residues1, ResidueNumber[] residues2)
      throws StructureException {

    // remove any gap
    // this includes the ones introduced by the nullifying above
    List<ResidueNumber> alignedResiduesList1 = new ArrayList<ResidueNumber>();
    List<ResidueNumber> alignedResiduesList2 = new ArrayList<ResidueNumber>();
    for (int i = 0; i < residues1.length; i++) {
      if (residues1[i] != null && residues2[i] != null) {
        alignedResiduesList1.add(residues1[i]);
        alignedResiduesList2.add(residues2[i]);
      }
    }

    ResidueNumber[] alignedResidues1 =
        alignedResiduesList1.toArray(new ResidueNumber[alignedResiduesList1.size()]);
    ResidueNumber[] alignedResidues2 =
        alignedResiduesList2.toArray(new ResidueNumber[alignedResiduesList2.size()]);

    AFPChain afpChain = AlignmentTools.createAFPChain(ca1, ca2, alignedResidues1, alignedResidues2);
    afpChain.setAlgorithmName("unknown");

    AlignmentTools.updateSuperposition(afpChain, ca1, ca2);

    afpChain.setBlockSize(new int[] {afpChain.getNrEQR()});
    afpChain.setBlockRmsd(new double[] {afpChain.getTotalRmsdOpt()});
    afpChain.setBlockGap(new int[] {afpChain.getGapLen()});

    return afpChain;
  }
  /**
   * identify additional groups that are not directly attached to amino acids.
   *
   * @param mc {@link ModifiedCompound}.
   * @param chain a {@link Chain}.
   * @return a list of added groups.
   */
  private void identifyAdditionalAttachments(
      ModifiedCompound mc, List<Group> ligands, Map<String, Chain> mapChainIdChain) {
    if (ligands.isEmpty()) {
      return;
    }

    // TODO: should the additional groups only be allowed to the identified
    // ligands or both amino acids and ligands? Currently only on ligands
    // ligands to amino acid bonds for same modification of unknown category
    // will be combined in mergeModComps()
    // TODO: how about chain-chain links?
    List<Group> identifiedGroups = new ArrayList<Group>();
    for (StructureGroup num : mc.getGroups(false)) {
      Group group;
      try {
        // String numIns = "" + num.getResidueNumber();
        // if (num.getInsCode() != null) {
        //	numIns += num.getInsCode();
        // }
        ResidueNumber resNum = new ResidueNumber();
        resNum.setChainId(num.getChainId());
        resNum.setSeqNum(num.getResidueNumber());
        resNum.setInsCode(num.getInsCode());
        // group = chain.getGroupByPDB(numIns);
        group = mapChainIdChain.get(num.getChainId()).getGroupByPDB(resNum);
      } catch (StructureException e) {
        logger.error("Exception: ", e);
        // should not happen
        continue;
      }
      identifiedGroups.add(group);
    }

    int start = 0;

    int n = identifiedGroups.size();
    while (n > start) {
      for (Group group1 : ligands) {
        for (int i = start; i < n; i++) {
          Group group2 = identifiedGroups.get(i);
          if (!identifiedGroups.contains(group1)) {
            List<Atom[]> linkedAtoms =
                StructureUtil.findAtomLinkages(group1, group2, false, bondLengthTolerance);
            if (!linkedAtoms.isEmpty()) {
              for (Atom[] atoms : linkedAtoms) {
                mc.addAtomLinkage(
                    StructureUtil.getStructureAtomLinkage(atoms[0], false, atoms[1], false));
              }
              identifiedGroups.add(group1);
              break;
            }
          }
        }
      }

      start = n;
      n = identifiedGroups.size();
    }
  }
Ejemplo n.º 4
0
  /**
   * Takes an AFPChain and replaces the optimal alignment based on an alignment map
   *
   * <p>Parameters are filled with defaults (often null) or sometimes calculated.
   *
   * <p>For a way to create a new AFPChain, see {@link AlignmentTools#createAFPChain(Atom[], Atom[],
   * ResidueNumber[], ResidueNumber[])}
   *
   * @param afpChain The alignment to be modified
   * @param alignment The new alignment, as a Map
   * @throws StructureException if an error occurred during superposition
   * @see AlignmentTools#createAFPChain(Atom[], Atom[], ResidueNumber[], ResidueNumber[])
   */
  public static AFPChain replaceOptAln(
      AFPChain afpChain, Atom[] ca1, Atom[] ca2, Map<Integer, Integer> alignment)
      throws StructureException {

    // Determine block lengths
    // Sort ca1 indices, then start a new block whenever ca2 indices aren't
    // increasing monotonically.
    Integer[] res1 = alignment.keySet().toArray(new Integer[0]);
    Arrays.sort(res1);
    List<Integer> blockLens = new ArrayList<Integer>(2);
    int optLength = 0;
    Integer lastRes = alignment.get(res1[0]);
    int blkLen = lastRes == null ? 0 : 1;
    for (int i = 1; i < res1.length; i++) {
      Integer currRes = alignment.get(res1[i]); // res2 index
      assert (currRes
          != null); // could be converted to if statement if assertion doesn't hold; just modify
                    // below as well.
      if (lastRes < currRes) {
        blkLen++;
      } else {
        // CP!
        blockLens.add(blkLen);
        optLength += blkLen;
        blkLen = 1;
      }
      lastRes = currRes;
    }
    blockLens.add(blkLen);
    optLength += blkLen;

    // Create array structure for alignment
    int[][][] optAln = new int[blockLens.size()][][];
    int pos1 = 0; // index into res1
    for (int blk = 0; blk < blockLens.size(); blk++) {
      optAln[blk] = new int[2][];
      blkLen = blockLens.get(blk);
      optAln[blk][0] = new int[blkLen];
      optAln[blk][1] = new int[blkLen];
      int pos = 0; // index into optAln
      while (pos < blkLen) {
        optAln[blk][0][pos] = res1[pos1];
        Integer currRes = alignment.get(res1[pos1]);
        optAln[blk][1][pos] = currRes;
        pos++;
        pos1++;
      }
    }
    assert (pos1 == optLength);

    // Create length array
    int[] optLens = new int[blockLens.size()];
    for (int i = 0; i < blockLens.size(); i++) {
      optLens[i] = blockLens.get(i);
    }

    return replaceOptAln(afpChain, ca1, ca2, blockLens.size(), optLens, optAln);
  }
  /**
   * Assembly the matched linkages.
   *
   * @param matchedAtomsOfLinkages
   * @param mod
   * @param condition
   * @param ret ModifiedCompound will be stored here.
   */
  private void assembleLinkages(
      List<List<Atom[]>> matchedAtomsOfLinkages,
      ProteinModification mod,
      List<ModifiedCompound> ret) {
    ModificationCondition condition = mod.getCondition();
    List<ModificationLinkage> modLinks = condition.getLinkages();

    int nLink = matchedAtomsOfLinkages.size();
    int[] indices = new int[nLink];
    Set<ModifiedCompound> identifiedCompounds = new HashSet<ModifiedCompound>();
    while (indices[0] < matchedAtomsOfLinkages.get(0).size()) {
      List<Atom[]> atomLinkages = new ArrayList<Atom[]>(nLink);
      for (int iLink = 0; iLink < nLink; iLink++) {
        Atom[] atoms = matchedAtomsOfLinkages.get(iLink).get(indices[iLink]);
        atomLinkages.add(atoms);
      }
      if (matchLinkages(modLinks, atomLinkages)) {
        // matched

        int n = atomLinkages.size();
        List<StructureAtomLinkage> linkages = new ArrayList<StructureAtomLinkage>(n);
        for (int i = 0; i < n; i++) {
          Atom[] linkage = atomLinkages.get(i);
          StructureAtomLinkage link =
              StructureUtil.getStructureAtomLinkage(
                  linkage[0], residues.contains(linkage[0].getGroup()),
                  linkage[1], residues.contains(linkage[1].getGroup()));
          linkages.add(link);
        }

        ModifiedCompound mc = new ModifiedCompoundImpl(mod, linkages);
        if (!identifiedCompounds.contains(mc)) {
          ret.add(mc);
          identifiedCompounds.add(mc);
        }
      }

      // indices++ (e.g. [0,0,1]=>[0,0,2]=>[1,2,0])
      int i = nLink - 1;
      while (i >= 0) {
        if (i == 0 || indices[i] < matchedAtomsOfLinkages.get(i).size() - 1) {
          indices[i]++;
          break;
        } else {
          indices[i] = 0;
          i--;
        }
      }
    }
  }
 /**
  * Takes a protein sequence string with capital and lowercase letters and sets its {@link
  * ProteinSequence#getUserCollection() user collection} to record which letters are uppercase
  * (aligned) and which are lowercase (unaligned).
  *
  * @param sequence Make sure <em>not</em> to use {@link ProteinSequence#getSequenceAsString()} for
  *     this, as it won't preserve upper- and lower-case
  */
 public static List<Object> getAlignedUserCollection(String sequence) {
   List<Object> aligned = new ArrayList<Object>(sequence.length());
   for (char c : sequence.toCharArray()) {
     aligned.add(Character.isUpperCase(c));
   }
   return aligned;
 }
Ejemplo n.º 7
0
  /**
   * Retrieves the optimum alignment from an AFPChain and returns it as a java collection. The
   * result is indexed in the same way as {@link AFPChain#getOptAln()}, but has the correct size().
   *
   * <pre>
   * List<List<List<Integer>>> aln = getOptAlnAsList(AFPChain afpChain);
   * aln.get(blockNum).get(structureNum={0,1}).get(pos)</pre>
   *
   * @param afpChain
   * @return
   */
  public static List<List<List<Integer>>> getOptAlnAsList(AFPChain afpChain) {
    int[][][] optAln = afpChain.getOptAln();
    int[] optLen = afpChain.getOptLen();
    List<List<List<Integer>>> blocks = new ArrayList<List<List<Integer>>>(afpChain.getBlockNum());
    for (int blockNum = 0; blockNum < afpChain.getBlockNum(); blockNum++) {
      // TODO could improve speed an memory by wrapping the arrays with
      // an unmodifiable list, similar to Arrays.asList(...) but with the
      // correct size parameter.
      List<Integer> align1 = new ArrayList<Integer>(optLen[blockNum]);
      List<Integer> align2 = new ArrayList<Integer>(optLen[blockNum]);
      for (int pos = 0; pos < optLen[blockNum]; pos++) {
        align1.add(optAln[blockNum][0][pos]);
        align2.add(optAln[blockNum][1][pos]);
      }
      List<List<Integer>> block = new ArrayList<List<Integer>>(2);
      block.add(align1);
      block.add(align2);
      blocks.add(block);
    }

    return blocks;
  }
  /**
   * Uses two sequences each with a corresponding structure to create an AFPChain corresponding to
   * the alignment. Provided only for convenience since FastaReaders return such maps.
   *
   * @param sequences A Map containing exactly two entries from sequence names as Strings to gapped
   *     ProteinSequences; the name is ignored
   * @see #fastaToAfpChain(ProteinSequence, ProteinSequence, Structure, Structure)
   * @throws StructureException
   */
  public static AFPChain fastaToAfpChain(
      Map<String, ProteinSequence> sequences, Structure structure1, Structure structure2)
      throws StructureException {

    if (sequences.size() != 2) {
      throw new IllegalArgumentException(
          "There must be exactly 2 sequences, but there were " + sequences.size());
    }

    if (structure1 == null || structure2 == null) {
      throw new IllegalArgumentException("A structure is null");
    }

    List<ProteinSequence> seqs = new ArrayList<ProteinSequence>();
    List<String> names = new ArrayList<String>(2);
    for (Map.Entry<String, ProteinSequence> entry : sequences.entrySet()) {
      seqs.add(entry.getValue());
      names.add(entry.getKey());
    }

    return fastaToAfpChain(seqs.get(0), seqs.get(1), structure1, structure2);
  }
 private void processCrosslink1(
     Map<Component, Set<Group>> mapCompGroups,
     List<ModifiedCompound> modComps,
     ProteinModification mod,
     List<Component> components) {
   // modified residue
   // TODO: is this the correct logic for CROSS_LINK_1?
   Set<Group> modifiedResidues = mapCompGroups.get(components.get(0));
   if (modifiedResidues != null) {
     for (Group residue : modifiedResidues) {
       StructureGroup strucGroup = StructureUtil.getStructureGroup(residue, true);
       ModifiedCompound modRes = new ModifiedCompoundImpl(mod, strucGroup);
       modComps.add(modRes);
     }
   }
 }
Ejemplo n.º 10
0
  /**
   * @param a
   * @param ca1
   * @param ca2
   * @return
   * @throws StructureException if an error occurred during superposition
   */
  public static AFPChain splitBlocksByTopology(AFPChain a, Atom[] ca1, Atom[] ca2)
      throws StructureException {
    int[][][] optAln = a.getOptAln();
    int blockNum = a.getBlockNum();
    int[] optLen = a.getOptLen();

    // Determine block lengths
    // Split blocks if residue indices don't increase monotonically
    List<Integer> newBlkLen = new ArrayList<Integer>();
    boolean blockChanged = false;
    for (int blk = 0; blk < blockNum; blk++) {
      int currLen = 1;
      for (int pos = 1; pos < optLen[blk]; pos++) {
        if (optAln[blk][0][pos] <= optAln[blk][0][pos - 1]
            || optAln[blk][1][pos] <= optAln[blk][1][pos - 1]) {
          // start a new block
          newBlkLen.add(currLen);
          currLen = 0;
          blockChanged = true;
        }
        currLen++;
      }
      if (optLen[blk] < 2) {
        newBlkLen.add(optLen[blk]);
      } else {
        newBlkLen.add(currLen);
      }
    }

    // Check if anything needs to be split
    if (!blockChanged) {
      return a;
    }

    // Split blocks
    List<int[][]> blocks = new ArrayList<int[][]>(newBlkLen.size());

    int oldBlk = 0;
    int pos = 0;
    for (int blkLen : newBlkLen) {
      if (blkLen == optLen[oldBlk]) {
        assert (pos == 0); // should be the whole block
        // Use the old block
        blocks.add(optAln[oldBlk]);
      } else {
        int[][] newBlock = new int[2][blkLen];
        assert (pos + blkLen <= optLen[oldBlk]); // don't overrun block
        for (int i = 0; i < blkLen; i++) {
          newBlock[0][i] = optAln[oldBlk][0][pos + i];
          newBlock[1][i] = optAln[oldBlk][1][pos + i];
        }
        pos += blkLen;
        blocks.add(newBlock);

        if (pos == optLen[oldBlk]) {
          // Finished this oldBlk, start the next
          oldBlk++;
          pos = 0;
        }
      }
    }

    // Store new blocks
    int[][][] newOptAln = blocks.toArray(new int[0][][]);
    int[] newBlockLens = new int[newBlkLen.size()];
    for (int i = 0; i < newBlkLen.size(); i++) {
      newBlockLens[i] = newBlkLen.get(i);
    }

    return replaceOptAln(a, ca1, ca2, blocks.size(), newBlockLens, newOptAln);
  }
  /** Get matched atoms for all linkages. */
  private List<List<Atom[]>> getMatchedAtomsOfLinkages(
      ModificationCondition condition, Map<Component, Set<Group>> mapCompGroups) {
    List<ModificationLinkage> linkages = condition.getLinkages();
    int nLink = linkages.size();

    List<List<Atom[]>> matchedAtomsOfLinkages = new ArrayList<List<Atom[]>>(nLink);

    for (int iLink = 0; iLink < nLink; iLink++) {
      ModificationLinkage linkage = linkages.get(iLink);
      Component comp1 = linkage.getComponent1();
      Component comp2 = linkage.getComponent2();

      //			boolean isAA1 = comp1.;
      //			boolean isAA2 = comp2.getType()==true;

      Set<Group> groups1 = mapCompGroups.get(comp1);
      Set<Group> groups2 = mapCompGroups.get(comp2);

      List<Atom[]> list = new ArrayList<Atom[]>();

      List<String> potentialNamesOfAtomOnGroup1 = linkage.getPDBNameOfPotentialAtomsOnComponent1();
      for (String name : potentialNamesOfAtomOnGroup1) {
        if (name.equals("*")) {
          // wildcard
          potentialNamesOfAtomOnGroup1 = null; // search all atoms
          break;
        }
      }

      List<String> potentialNamesOfAtomOnGroup2 = linkage.getPDBNameOfPotentialAtomsOnComponent2();
      for (String name : potentialNamesOfAtomOnGroup2) {
        if (name.equals("*")) {
          // wildcard
          potentialNamesOfAtomOnGroup2 = null; // search all atoms
          break;
        }
      }

      for (Group g1 : groups1) {
        for (Group g2 : groups2) {
          if (g1.equals(g2)) {
            continue;
          }

          // only for wildcard match of two residues
          boolean ignoreNCLinkage =
              potentialNamesOfAtomOnGroup1 == null
                  && potentialNamesOfAtomOnGroup2 == null
                  && residues.contains(g1)
                  && residues.contains(g2);

          Atom[] atoms =
              StructureUtil.findNearestAtomLinkage(
                  g1,
                  g2,
                  potentialNamesOfAtomOnGroup1,
                  potentialNamesOfAtomOnGroup2,
                  ignoreNCLinkage,
                  bondLengthTolerance);
          if (atoms != null) {
            list.add(atoms);
          }
        }
      }

      if (list.isEmpty()) {
        // broken linkage
        break;
      }

      matchedAtomsOfLinkages.add(list);
    }

    return matchedAtomsOfLinkages;
  }