Пример #1
0
  /**
   * Checks that the alignment given by afpChain is sequential. This means that the residue indices
   * of both proteins increase monotonically as a function of the alignment position (ie both
   * proteins are sorted).
   *
   * <p>This will return false for circularly permuted alignments or other non-topological
   * alignments. It will also return false for cases where the alignment itself is sequential but it
   * is not stored in the afpChain in a sorted manner.
   *
   * <p>Since algorithms which create non-sequential alignments split the alignment into multiple
   * blocks, some computational time can be saved by only checking block boundaries for
   * sequentiality. Setting <tt>checkWithinBlocks</tt> to <tt>true</tt> makes this function slower,
   * but detects AFPChains with non-sequential blocks.
   *
   * <p>Note that this method should give the same results as {@link
   * AFPChain#isSequentialAlignment()}. However, the AFPChain version relies on the
   * StructureAlignment algorithm correctly setting this parameter, which is sadly not always the
   * case.
   *
   * @param afpChain An alignment
   * @param checkWithinBlocks Indicates whether individual blocks should be checked for
   *     sequentiality
   * @return True if the alignment is sequential.
   */
  public static boolean isSequentialAlignment(AFPChain afpChain, boolean checkWithinBlocks) {
    int[][][] optAln = afpChain.getOptAln();
    int[] alnLen = afpChain.getOptLen();
    int blocks = afpChain.getBlockNum();

    if (blocks < 1) return true; // trivial case
    if (alnLen[0] < 1) return true;

    // Check that blocks are sequential
    if (checkWithinBlocks) {
      for (int block = 0; block < blocks; block++) {
        if (alnLen[block] < 1) continue; // skip empty blocks

        int prevRes1 = optAln[block][0][0];
        int prevRes2 = optAln[block][1][0];

        for (int pos = 1; pos < alnLen[block]; pos++) {
          int currRes1 = optAln[block][0][pos];
          int currRes2 = optAln[block][1][pos];

          if (currRes1 < prevRes1) {
            return false;
          }
          if (currRes2 < prevRes2) {
            return false;
          }

          prevRes1 = currRes1;
          prevRes2 = currRes2;
        }
      }
    }

    // Check that blocks are sequential
    int prevRes1 = optAln[0][0][alnLen[0] - 1];
    int prevRes2 = optAln[0][1][alnLen[0] - 1];

    for (int block = 1; block < blocks; block++) {
      if (alnLen[block] < 1) continue; // skip empty blocks

      if (optAln[block][0][0] < prevRes1) {
        return false;
      }
      if (optAln[block][1][0] < prevRes2) {
        return false;
      }

      prevRes1 = optAln[block][0][alnLen[block] - 1];
      prevRes2 = optAln[block][1][alnLen[block] - 1];
    }

    return true;
  }
Пример #2
0
  /**
   * Creates a Map specifying the alignment as a mapping between residue indices of protein 1 and
   * residue indices of protein 2.
   *
   * <p>For example,
   *
   * <pre>
   * 1234
   * 5678</pre>
   *
   * becomes
   *
   * <pre>
   * 1->5
   * 2->6
   * 3->7
   * 4->8</pre>
   *
   * @param afpChain An alignment
   * @return A mapping from aligned residues of protein 1 to their partners in protein 2.
   * @throws StructureException If afpChain is not one-to-one
   */
  public static Map<Integer, Integer> alignmentAsMap(AFPChain afpChain) throws StructureException {
    Map<Integer, Integer> map = new HashMap<Integer, Integer>();

    if (afpChain.getAlnLength() < 1) {
      return map;
    }
    int[][][] optAln = afpChain.getOptAln();
    int[] optLen = afpChain.getOptLen();
    for (int block = 0; block < afpChain.getBlockNum(); block++) {
      for (int pos = 0; pos < optLen[block]; pos++) {
        int res1 = optAln[block][0][pos];
        int res2 = optAln[block][1][pos];
        if (map.containsKey(res1)) {
          throw new StructureException(
              String.format("Residue %d aligned to both %d and %d.", res1, map.get(res1), res2));
        }
        map.put(res1, res2);
      }
    }
    return map;
  }
Пример #3
0
  /**
   * Retrieves the optimum alignment from an AFPChain and returns it as a java collection. The
   * result is indexed in the same way as {@link AFPChain#getOptAln()}, but has the correct size().
   *
   * <pre>
   * List<List<List<Integer>>> aln = getOptAlnAsList(AFPChain afpChain);
   * aln.get(blockNum).get(structureNum={0,1}).get(pos)</pre>
   *
   * @param afpChain
   * @return
   */
  public static List<List<List<Integer>>> getOptAlnAsList(AFPChain afpChain) {
    int[][][] optAln = afpChain.getOptAln();
    int[] optLen = afpChain.getOptLen();
    List<List<List<Integer>>> blocks = new ArrayList<List<List<Integer>>>(afpChain.getBlockNum());
    for (int blockNum = 0; blockNum < afpChain.getBlockNum(); blockNum++) {
      // TODO could improve speed an memory by wrapping the arrays with
      // an unmodifiable list, similar to Arrays.asList(...) but with the
      // correct size parameter.
      List<Integer> align1 = new ArrayList<Integer>(optLen[blockNum]);
      List<Integer> align2 = new ArrayList<Integer>(optLen[blockNum]);
      for (int pos = 0; pos < optLen[blockNum]; pos++) {
        align1.add(optAln[blockNum][0][pos]);
        align2.add(optAln[blockNum][1][pos]);
      }
      List<List<Integer>> block = new ArrayList<List<Integer>>(2);
      block.add(align1);
      block.add(align2);
      blocks.add(block);
    }

    return blocks;
  }
Пример #4
0
  /**
   * After the alignment changes (optAln, optLen, blockNum, at a minimum), many other properties
   * which depend on the superposition will be invalid.
   *
   * <p>This method re-runs a rigid superposition over the whole alignment and repopulates the
   * required properties, including RMSD (TotalRMSD) and TM-Score.
   *
   * @param afpChain
   * @param ca1
   * @param ca2 Second set of ca atoms. Will be modified based on the superposition
   * @throws StructureException
   * @see {@link CECalculator#calc_rmsd(Atom[], Atom[], int, boolean)} contains much of the same
   *     code, but stores results in a CECalculator instance rather than an AFPChain
   */
  public static void updateSuperposition(AFPChain afpChain, Atom[] ca1, Atom[] ca2)
      throws StructureException {

    // Update ca information, because the atom array might also be changed
    afpChain.setCa1Length(ca1.length);
    afpChain.setCa2Length(ca2.length);

    // We need this to get the correct superposition
    int[] focusRes1 = afpChain.getFocusRes1();
    int[] focusRes2 = afpChain.getFocusRes2();
    if (focusRes1 == null) {
      focusRes1 = new int[afpChain.getCa1Length()];
      afpChain.setFocusRes1(focusRes1);
    }
    if (focusRes2 == null) {
      focusRes2 = new int[afpChain.getCa2Length()];
      afpChain.setFocusRes2(focusRes2);
    }

    if (afpChain.getNrEQR() == 0) return;

    // create new arrays for the subset of atoms in the alignment.
    Atom[] ca1aligned = new Atom[afpChain.getOptLength()];
    Atom[] ca2aligned = new Atom[afpChain.getOptLength()];
    int pos = 0;
    int[] blockLens = afpChain.getOptLen();
    int[][][] optAln = afpChain.getOptAln();
    assert (afpChain.getBlockNum() <= optAln.length);

    for (int block = 0; block < afpChain.getBlockNum(); block++) {
      for (int i = 0; i < blockLens[block]; i++) {
        int pos1 = optAln[block][0][i];
        int pos2 = optAln[block][1][i];
        Atom a1 = ca1[pos1];
        Atom a2 = (Atom) ca2[pos2].clone();
        ca1aligned[pos] = a1;
        ca2aligned[pos] = a2;
        pos++;
      }
    }

    // this can happen when we load an old XML serialization which did not support modern ChemComp
    // representation of modified residues.
    if (pos != afpChain.getOptLength()) {
      logger.warn(
          "AFPChainScorer getTMScore: Problems reconstructing alignment! nr of loaded atoms is "
              + pos
              + " but should be "
              + afpChain.getOptLength());
      // we need to resize the array, because we allocated too many atoms earlier on.
      ca1aligned = (Atom[]) resizeArray(ca1aligned, pos);
      ca2aligned = (Atom[]) resizeArray(ca2aligned, pos);
    }

    // Superimpose the two structures in correspondance to the new alignment
    SVDSuperimposer svd = new SVDSuperimposer(ca1aligned, ca2aligned);
    Matrix matrix = svd.getRotation();
    Atom shift = svd.getTranslation();
    Matrix[] blockMxs = new Matrix[afpChain.getBlockNum()];
    Arrays.fill(blockMxs, matrix);
    afpChain.setBlockRotationMatrix(blockMxs);
    Atom[] blockShifts = new Atom[afpChain.getBlockNum()];
    Arrays.fill(blockShifts, shift);
    afpChain.setBlockShiftVector(blockShifts);

    for (Atom a : ca2aligned) {
      Calc.rotate(a, matrix);
      Calc.shift(a, shift);
    }

    // Calculate the RMSD and TM score for the new alignment
    double rmsd = SVDSuperimposer.getRMS(ca1aligned, ca2aligned);
    double tmScore = SVDSuperimposer.getTMScore(ca1aligned, ca2aligned, ca1.length, ca2.length);
    afpChain.setTotalRmsdOpt(rmsd);
    afpChain.setTMScore(tmScore);

    // Calculate the RMSD and TM score for every block of the new alignment
    double[] blockRMSD = new double[afpChain.getBlockNum()];
    double[] blockScore = new double[afpChain.getBlockNum()];
    for (int k = 0; k < afpChain.getBlockNum(); k++) {
      // Create the atom arrays corresponding to the aligned residues in the block
      Atom[] ca1block = new Atom[afpChain.getOptLen()[k]];
      Atom[] ca2block = new Atom[afpChain.getOptLen()[k]];
      int position = 0;
      for (int i = 0; i < blockLens[k]; i++) {
        int pos1 = optAln[k][0][i];
        int pos2 = optAln[k][1][i];
        Atom a1 = ca1[pos1];
        Atom a2 = (Atom) ca2[pos2].clone();
        ca1block[position] = a1;
        ca2block[position] = a2;
        position++;
      }
      if (position != afpChain.getOptLen()[k]) {
        logger.warn(
            "AFPChainScorer getTMScore: Problems reconstructing block alignment! nr of loaded atoms is "
                + pos
                + " but should be "
                + afpChain.getOptLen()[k]);
        // we need to resize the array, because we allocated too many atoms earlier on.
        ca1block = (Atom[]) resizeArray(ca1block, position);
        ca2block = (Atom[]) resizeArray(ca2block, position);
      }
      // Superimpose the two block structures
      SVDSuperimposer svdb = new SVDSuperimposer(ca1block, ca2block);
      Matrix matrixb = svdb.getRotation();
      Atom shiftb = svdb.getTranslation();
      for (Atom a : ca2block) {
        Calc.rotate(a, matrixb);
        Calc.shift(a, shiftb);
      }
      // Calculate the RMSD and TM score for the block
      double rmsdb = SVDSuperimposer.getRMS(ca1block, ca2block);
      double tmScoreb = SVDSuperimposer.getTMScore(ca1block, ca2block, ca1.length, ca2.length);
      blockRMSD[k] = rmsdb;
      blockScore[k] = tmScoreb;
    }
    afpChain.setOptRmsd(blockRMSD);
    afpChain.setBlockRmsd(blockRMSD);
    afpChain.setBlockScore(blockScore);
  }
Пример #5
0
  /**
   * @param a
   * @param ca1
   * @param ca2
   * @return
   * @throws StructureException if an error occurred during superposition
   */
  public static AFPChain splitBlocksByTopology(AFPChain a, Atom[] ca1, Atom[] ca2)
      throws StructureException {
    int[][][] optAln = a.getOptAln();
    int blockNum = a.getBlockNum();
    int[] optLen = a.getOptLen();

    // Determine block lengths
    // Split blocks if residue indices don't increase monotonically
    List<Integer> newBlkLen = new ArrayList<Integer>();
    boolean blockChanged = false;
    for (int blk = 0; blk < blockNum; blk++) {
      int currLen = 1;
      for (int pos = 1; pos < optLen[blk]; pos++) {
        if (optAln[blk][0][pos] <= optAln[blk][0][pos - 1]
            || optAln[blk][1][pos] <= optAln[blk][1][pos - 1]) {
          // start a new block
          newBlkLen.add(currLen);
          currLen = 0;
          blockChanged = true;
        }
        currLen++;
      }
      if (optLen[blk] < 2) {
        newBlkLen.add(optLen[blk]);
      } else {
        newBlkLen.add(currLen);
      }
    }

    // Check if anything needs to be split
    if (!blockChanged) {
      return a;
    }

    // Split blocks
    List<int[][]> blocks = new ArrayList<int[][]>(newBlkLen.size());

    int oldBlk = 0;
    int pos = 0;
    for (int blkLen : newBlkLen) {
      if (blkLen == optLen[oldBlk]) {
        assert (pos == 0); // should be the whole block
        // Use the old block
        blocks.add(optAln[oldBlk]);
      } else {
        int[][] newBlock = new int[2][blkLen];
        assert (pos + blkLen <= optLen[oldBlk]); // don't overrun block
        for (int i = 0; i < blkLen; i++) {
          newBlock[0][i] = optAln[oldBlk][0][pos + i];
          newBlock[1][i] = optAln[oldBlk][1][pos + i];
        }
        pos += blkLen;
        blocks.add(newBlock);

        if (pos == optLen[oldBlk]) {
          // Finished this oldBlk, start the next
          oldBlk++;
          pos = 0;
        }
      }
    }

    // Store new blocks
    int[][][] newOptAln = blocks.toArray(new int[0][][]);
    int[] newBlockLens = new int[newBlkLen.size()];
    for (int i = 0; i < newBlkLen.size(); i++) {
      newBlockLens[i] = newBlkLen.get(i);
    }

    return replaceOptAln(a, ca1, ca2, blocks.size(), newBlockLens, newOptAln);
  }
Пример #6
0
  /**
   * Creates a simple interaction format (SIF) file for an alignment.
   *
   * <p>The SIF file can be read by network software (eg Cytoscape) to analyze alignments as graphs.
   *
   * <p>This function creates a graph with residues as nodes and two types of edges: 1. backbone
   * edges, which connect adjacent residues in the aligned protein 2. alignment edges, which connect
   * aligned residues
   *
   * @param out Stream to write to
   * @param afpChain alignment to write
   * @param ca1 First protein, used to generate node names
   * @param ca2 Second protein, used to generate node names
   * @param backboneInteraction Two-letter string used to identify backbone edges
   * @param alignmentInteraction Two-letter string used to identify alignment edges
   * @throws IOException
   */
  public static void alignmentToSIF(
      Writer out,
      AFPChain afpChain,
      Atom[] ca1,
      Atom[] ca2,
      String backboneInteraction,
      String alignmentInteraction)
      throws IOException {

    // out.write("Res1\tInteraction\tRes2\n");
    String name1 = afpChain.getName1();
    String name2 = afpChain.getName2();
    if (name1 == null) name1 = "";
    else name1 += ":";
    if (name2 == null) name2 = "";
    else name2 += ":";

    // Print alignment edges
    int nblocks = afpChain.getBlockNum();
    int[] blockLen = afpChain.getOptLen();
    int[][][] optAlign = afpChain.getOptAln();
    for (int b = 0; b < nblocks; b++) {
      for (int r = 0; r < blockLen[b]; r++) {
        int res1 = optAlign[b][0][r];
        int res2 = optAlign[b][1][r];

        ResidueNumber rn1 = ca1[res1].getGroup().getResidueNumber();
        ResidueNumber rn2 = ca2[res2].getGroup().getResidueNumber();

        String node1 = name1 + rn1.getChainId() + rn1.toString();
        String node2 = name2 + rn2.getChainId() + rn2.toString();

        out.write(String.format("%s\t%s\t%s\n", node1, alignmentInteraction, node2));
      }
    }

    // Print first backbone edges
    ResidueNumber rn = ca1[0].getGroup().getResidueNumber();
    String last = name1 + rn.getChainId() + rn.toString();
    for (int i = 1; i < ca1.length; i++) {
      rn = ca1[i].getGroup().getResidueNumber();
      String curr = name1 + rn.getChainId() + rn.toString();
      out.write(String.format("%s\t%s\t%s\n", last, backboneInteraction, curr));
      last = curr;
    }

    // Print second backbone edges, if the proteins differ
    // Do some quick checks for whether the proteins differ
    // (Not perfect, but should detect major differences and CPs.)
    if (!name1.equals(name2)
        || ca1.length != ca2.length
        || (ca1.length > 0
            && ca1[0].getGroup() != null
            && ca2[0].getGroup() != null
            && !ca1[0]
                .getGroup()
                .getResidueNumber()
                .equals(ca2[0].getGroup().getResidueNumber()))) {
      rn = ca2[0].getGroup().getResidueNumber();
      last = name2 + rn.getChainId() + rn.toString();
      for (int i = 1; i < ca2.length; i++) {
        rn = ca2[i].getGroup().getResidueNumber();
        String curr = name2 + rn.getChainId() + rn.toString();
        out.write(String.format("%s\t%s\t%s\n", last, backboneInteraction, curr));
        last = curr;
      }
    }
  }