/**
   * Returns an AFPChain corresponding to the alignment between {@code structure1} and {@code
   * structure2}, which is given by the gapped protein sequences {@code sequence1} and {@code
   * sequence2}. The sequences need not correspond to the entire structures, since local alignment
   * is performed to match the sequences to structures. Assumes that a residue is aligned if and
   * only if it is given by an uppercase letter.
   *
   * @param sequence1 <em>Must</em> have {@link ProteinSequence#getUserCollection()} set to document
   *     upper- and lower-case as aligned and unaligned; see {@link
   *     #getAlignedUserCollection(String)}
   * @throws StructureException
   */
  public static AFPChain fastaToAfpChain(
      ProteinSequence sequence1,
      ProteinSequence sequence2,
      Structure structure1,
      Structure structure2)
      throws StructureException {

    if (structure1 == null || structure2 == null) {
      throw new IllegalArgumentException("A structure is null");
    }

    if (sequence1 == null || sequence2 == null) {
      throw new IllegalArgumentException("A sequence is null");
    }

    Atom[] ca1 = StructureTools.getRepresentativeAtomArray(structure1);
    Atom[] ca2 = StructureTools.getRepresentativeAtomArray(structure2);

    ResidueNumber[] residues1 =
        StructureSequenceMatcher.matchSequenceToStructure(sequence1, structure1);
    ResidueNumber[] residues2 =
        StructureSequenceMatcher.matchSequenceToStructure(sequence2, structure2);

    // nullify ResidueNumbers that have a lowercase sequence character
    if (sequence1.getUserCollection() != null) {
      CasePreservingProteinSequenceCreator.setLowercaseToNull(sequence1, residues1);
    }
    if (sequence2.getUserCollection() != null) {
      CasePreservingProteinSequenceCreator.setLowercaseToNull(sequence2, residues2);
    }

    return buildAlignment(ca1, ca2, residues1, residues2);
  }
 /**
  * Prints out the XML representation of an AFPChain from a file containing exactly two FASTA
  * sequences.
  *
  * @param args A String array of fasta-file structure-1-name structure-2-name
  * @throws StructureException
  * @throws IOException
  */
 public static void main(String[] args) throws StructureException, IOException {
   if (args.length != 3) {
     System.err.println(
         "Usage: FastaAFPChainConverter fasta-file structure-1-name structure-2-name");
     return;
   }
   File fasta = new File(args[0]);
   Structure structure1 = StructureTools.getStructure(args[1]);
   Structure structure2 = StructureTools.getStructure(args[2]);
   if (structure1 == null)
     throw new IllegalArgumentException("No structure for " + args[1] + " was found");
   if (structure2 == null)
     throw new IllegalArgumentException("No structure for " + args[2] + " was found");
   AFPChain afpChain = fastaFileToAfpChain(fasta, structure1, structure2);
   String xml = AFPChainXMLConverter.toXML(afpChain);
   System.out.println(xml);
 }
Example #3
0
  /**
   * It replaces an optimal alignment of an AFPChain and calculates all the new alignment scores and
   * variables.
   */
  public static AFPChain replaceOptAln(int[][][] newAlgn, AFPChain afpChain, Atom[] ca1, Atom[] ca2)
      throws StructureException {

    // The order is the number of groups in the newAlgn
    int order = newAlgn.length;

    // Calculate the alignment length from all the subunits lengths
    int[] optLens = new int[order];
    for (int s = 0; s < order; s++) {
      optLens[s] = newAlgn[s][0].length;
    }
    int optLength = 0;
    for (int s = 0; s < order; s++) {
      optLength += optLens[s];
    }

    // Create a copy of the original AFPChain and set everything needed for the structure update
    AFPChain copyAFP = (AFPChain) afpChain.clone();

    // Set the new parameters of the optimal alignment
    copyAFP.setOptLength(optLength);
    copyAFP.setOptLen(optLens);
    copyAFP.setOptAln(newAlgn);

    // Set the block information of the new alignment
    copyAFP.setBlockNum(order);
    copyAFP.setBlockSize(optLens);
    copyAFP.setBlockResList(newAlgn);
    copyAFP.setBlockResSize(optLens);
    copyAFP.setBlockGap(calculateBlockGap(newAlgn));

    // Recalculate properties: superposition, tm-score, etc
    Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); // don't modify ca2 positions
    AlignmentTools.updateSuperposition(copyAFP, ca1, ca2clone);

    // It re-does the sequence alignment strings from the OptAlgn information only
    copyAFP.setAlnsymb(null);
    AFPAlignmentDisplay.getAlign(copyAFP, ca1, ca2clone);

    return copyAFP;
  }
Example #4
0
  /**
   * @param afpChain Input afpchain. UNMODIFIED
   * @param ca1
   * @param ca2
   * @param optLens
   * @param optAln
   * @return A NEW AfpChain based off the input but with the optAln modified
   * @throws StructureException if an error occured during superposition
   */
  public static AFPChain replaceOptAln(
      AFPChain afpChain, Atom[] ca1, Atom[] ca2, int blockNum, int[] optLens, int[][][] optAln)
      throws StructureException {
    int optLength = 0;
    for (int blk = 0; blk < blockNum; blk++) {
      optLength += optLens[blk];
    }

    // set everything
    AFPChain refinedAFP = (AFPChain) afpChain.clone();
    refinedAFP.setOptLength(optLength);
    refinedAFP.setBlockSize(optLens);
    refinedAFP.setOptLen(optLens);
    refinedAFP.setOptAln(optAln);
    refinedAFP.setBlockNum(blockNum);

    // TODO recalculate properties: superposition, tm-score, etc
    Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); // don't modify ca2 positions
    AlignmentTools.updateSuperposition(refinedAFP, ca1, ca2clone);

    AFPAlignmentDisplay.getAlign(refinedAFP, ca1, ca2clone);
    return refinedAFP;
  }
  /**
   * Takes a structure and sequence corresponding to an alignment between a structure or sequence
   * and itself (or even a structure with a sequence), where the result has a circular permutation
   * site {@link cpSite} residues to the right.
   *
   * @param first The unpermuted sequence
   * @param second The sequence permuted by cpSite
   * @param cpSite The number of residues from the beginning of the sequence at which the circular
   *     permutation site occurs; can be positive or negative; values greater than the length of the
   *     sequence are acceptable
   * @throws StructureException
   */
  public static AFPChain cpFastaToAfpChain(
      ProteinSequence first, ProteinSequence second, Structure structure, int cpSite)
      throws StructureException {

    if (structure == null) {
      throw new IllegalArgumentException("The structure is null");
    }

    if (first == null) {
      throw new IllegalArgumentException("The sequence is null");
    }

    // we need to find the ungapped CP site
    int gappedCpShift = 0;
    int ungappedCpShift = 0;
    while (ungappedCpShift < Math.abs(cpSite)) {
      char c;
      try {
        if (cpSite <= 0) {
          c = second.getSequenceAsString().charAt(gappedCpShift);
        } else {
          c = second.getSequenceAsString().charAt(first.getLength() - 1 - gappedCpShift);
        }
      } catch (StringIndexOutOfBoundsException e) {
        throw new IllegalArgumentException("CP site of " + cpSite + " is wrong");
      }
      if (c != '-') {
        ungappedCpShift++;
      }
      gappedCpShift++;
    }

    Atom[] ca1 = StructureTools.getRepresentativeAtomArray(structure);
    Atom[] ca2 =
        StructureTools.getRepresentativeAtomArray(
            structure); // can't use cloneCAArray because it doesn't set parent
                        // group.chain.structure

    ProteinSequence antipermuted = null;
    try {
      antipermuted =
          new ProteinSequence(
              SequenceTools.permuteCyclic(second.getSequenceAsString(), gappedCpShift));
    } catch (CompoundNotFoundException e) {
      // this can't happen, the original sequence comes from a ProteinSequence
      logger.error(
          "Unexpected error while creating protein sequence: {}. This is most likely a bug.",
          e.getMessage());
    }

    ResidueNumber[] residues = StructureSequenceMatcher.matchSequenceToStructure(first, structure);
    ResidueNumber[] antipermutedResidues =
        StructureSequenceMatcher.matchSequenceToStructure(antipermuted, structure);

    ResidueNumber[] nonpermutedResidues = new ResidueNumber[antipermutedResidues.length];
    SequenceTools.permuteCyclic(antipermutedResidues, nonpermutedResidues, -gappedCpShift);

    // nullify ResidueNumbers that have a lowercase sequence character
    if (first.getUserCollection() != null) {
      CasePreservingProteinSequenceCreator.setLowercaseToNull(first, residues);
    }
    if (second.getUserCollection() != null) {
      CasePreservingProteinSequenceCreator.setLowercaseToNull(second, nonpermutedResidues);
    }

    //		for (int i = 0; i < residues.length; i++) {
    //			if (residues[i] == null) {
    //				System.out.print("=");
    //			} else {
    //				System.out.print(sequence.getSequenceAsString().charAt(i));
    //			}
    //		}
    //		System.out.println();
    //		for (int i = 0; i < residues.length; i++) {
    //			if (nonpermutedResidues[i] == null) {
    //				System.out.print("=");
    //			} else {
    //				System.out.print(second.getSequenceAsString().charAt(i));
    //			}
    //		}
    //		System.out.println();

    return buildAlignment(ca1, ca2, residues, nonpermutedResidues);
  }
  /**
   * Identify a set of modifications in a a list of chains.
   *
   * @param chains query {@link Chain}s.
   * @param potentialModifications query {@link ProteinModification}s.
   */
  public void identify(
      final List<Chain> chains, final Set<ProteinModification> potentialModifications) {

    if (chains == null) {
      throw new IllegalArgumentException("Null structure.");
    }

    if (potentialModifications == null) {
      throw new IllegalArgumentException("Null potentialModifications.");
    }

    reset();

    if (potentialModifications.isEmpty()) {
      return;
    }

    Map<String, Chain> mapChainIdChain = new HashMap<String, Chain>(chains.size());
    residues = new ArrayList<Group>();
    List<Group> ligands = new ArrayList<Group>();
    Map<Component, Set<Group>> mapCompGroups = new HashMap<Component, Set<Group>>();

    for (Chain chain : chains) {
      mapChainIdChain.put(chain.getChainID(), chain);

      List<Group> ress = StructureUtil.getAminoAcids(chain);

      // List<Group> ligs = chain.getAtomLigands();
      List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups());
      residues.addAll(ress);
      residues.removeAll(ligs);
      ligands.addAll(ligs);
      addModificationGroups(potentialModifications, ress, ligs, mapCompGroups);
    }

    if (residues.isEmpty()) {
      String pdbId = "?";
      if (chains.size() > 0) {
        Structure struc = chains.get(0).getParent();
        if (struc != null) pdbId = struc.getPDBCode();
      }
      logger.warn(
          "No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.",
          pdbId);
    }
    List<ModifiedCompound> modComps = new ArrayList<ModifiedCompound>();

    for (ProteinModification mod : potentialModifications) {
      ModificationCondition condition = mod.getCondition();
      List<Component> components = condition.getComponents();
      if (!mapCompGroups.keySet().containsAll(components)) {
        // not all components exist for this mod.
        continue;
      }

      int sizeComps = components.size();
      if (sizeComps == 1) {

        processCrosslink1(mapCompGroups, modComps, mod, components);

      } else {

        processMultiCrosslink(mapCompGroups, modComps, mod, condition);
      }
    }

    if (recordAdditionalAttachments) {
      // identify additional groups that are not directly attached to amino acids.
      for (ModifiedCompound mc : modComps) {
        identifyAdditionalAttachments(mc, ligands, mapChainIdChain);
      }
    }

    mergeModComps(modComps);

    identifiedModifiedCompounds.addAll(modComps);

    // record unidentifiable linkage
    if (recordUnidentifiableModifiedCompounds) {
      recordUnidentifiableAtomLinkages(modComps, ligands);
      recordUnidentifiableModifiedResidues(modComps);
    }
  }