コード例 #1
0
  /**
   * Returns an AFPChain corresponding to the alignment between {@code structure1} and {@code
   * structure2}, which is given by the gapped protein sequences {@code sequence1} and {@code
   * sequence2}. The sequences need not correspond to the entire structures, since local alignment
   * is performed to match the sequences to structures. Assumes that a residue is aligned if and
   * only if it is given by an uppercase letter.
   *
   * @param sequence1 <em>Must</em> have {@link ProteinSequence#getUserCollection()} set to document
   *     upper- and lower-case as aligned and unaligned; see {@link
   *     #getAlignedUserCollection(String)}
   * @throws StructureException
   */
  public static AFPChain fastaToAfpChain(
      ProteinSequence sequence1,
      ProteinSequence sequence2,
      Structure structure1,
      Structure structure2)
      throws StructureException {

    if (structure1 == null || structure2 == null) {
      throw new IllegalArgumentException("A structure is null");
    }

    if (sequence1 == null || sequence2 == null) {
      throw new IllegalArgumentException("A sequence is null");
    }

    Atom[] ca1 = StructureTools.getRepresentativeAtomArray(structure1);
    Atom[] ca2 = StructureTools.getRepresentativeAtomArray(structure2);

    ResidueNumber[] residues1 =
        StructureSequenceMatcher.matchSequenceToStructure(sequence1, structure1);
    ResidueNumber[] residues2 =
        StructureSequenceMatcher.matchSequenceToStructure(sequence2, structure2);

    // nullify ResidueNumbers that have a lowercase sequence character
    if (sequence1.getUserCollection() != null) {
      CasePreservingProteinSequenceCreator.setLowercaseToNull(sequence1, residues1);
    }
    if (sequence2.getUserCollection() != null) {
      CasePreservingProteinSequenceCreator.setLowercaseToNull(sequence2, residues2);
    }

    return buildAlignment(ca1, ca2, residues1, residues2);
  }
コード例 #2
0
  /**
   * Takes a structure and sequence corresponding to an alignment between a structure or sequence
   * and itself (or even a structure with a sequence), where the result has a circular permutation
   * site {@link cpSite} residues to the right.
   *
   * @param first The unpermuted sequence
   * @param second The sequence permuted by cpSite
   * @param cpSite The number of residues from the beginning of the sequence at which the circular
   *     permutation site occurs; can be positive or negative; values greater than the length of the
   *     sequence are acceptable
   * @throws StructureException
   */
  public static AFPChain cpFastaToAfpChain(
      ProteinSequence first, ProteinSequence second, Structure structure, int cpSite)
      throws StructureException {

    if (structure == null) {
      throw new IllegalArgumentException("The structure is null");
    }

    if (first == null) {
      throw new IllegalArgumentException("The sequence is null");
    }

    // we need to find the ungapped CP site
    int gappedCpShift = 0;
    int ungappedCpShift = 0;
    while (ungappedCpShift < Math.abs(cpSite)) {
      char c;
      try {
        if (cpSite <= 0) {
          c = second.getSequenceAsString().charAt(gappedCpShift);
        } else {
          c = second.getSequenceAsString().charAt(first.getLength() - 1 - gappedCpShift);
        }
      } catch (StringIndexOutOfBoundsException e) {
        throw new IllegalArgumentException("CP site of " + cpSite + " is wrong");
      }
      if (c != '-') {
        ungappedCpShift++;
      }
      gappedCpShift++;
    }

    Atom[] ca1 = StructureTools.getRepresentativeAtomArray(structure);
    Atom[] ca2 =
        StructureTools.getRepresentativeAtomArray(
            structure); // can't use cloneCAArray because it doesn't set parent
                        // group.chain.structure

    ProteinSequence antipermuted = null;
    try {
      antipermuted =
          new ProteinSequence(
              SequenceTools.permuteCyclic(second.getSequenceAsString(), gappedCpShift));
    } catch (CompoundNotFoundException e) {
      // this can't happen, the original sequence comes from a ProteinSequence
      logger.error(
          "Unexpected error while creating protein sequence: {}. This is most likely a bug.",
          e.getMessage());
    }

    ResidueNumber[] residues = StructureSequenceMatcher.matchSequenceToStructure(first, structure);
    ResidueNumber[] antipermutedResidues =
        StructureSequenceMatcher.matchSequenceToStructure(antipermuted, structure);

    ResidueNumber[] nonpermutedResidues = new ResidueNumber[antipermutedResidues.length];
    SequenceTools.permuteCyclic(antipermutedResidues, nonpermutedResidues, -gappedCpShift);

    // nullify ResidueNumbers that have a lowercase sequence character
    if (first.getUserCollection() != null) {
      CasePreservingProteinSequenceCreator.setLowercaseToNull(first, residues);
    }
    if (second.getUserCollection() != null) {
      CasePreservingProteinSequenceCreator.setLowercaseToNull(second, nonpermutedResidues);
    }

    //		for (int i = 0; i < residues.length; i++) {
    //			if (residues[i] == null) {
    //				System.out.print("=");
    //			} else {
    //				System.out.print(sequence.getSequenceAsString().charAt(i));
    //			}
    //		}
    //		System.out.println();
    //		for (int i = 0; i < residues.length; i++) {
    //			if (nonpermutedResidues[i] == null) {
    //				System.out.print("=");
    //			} else {
    //				System.out.print(second.getSequenceAsString().charAt(i));
    //			}
    //		}
    //		System.out.println();

    return buildAlignment(ca1, ca2, residues, nonpermutedResidues);
  }