/** * Returns an AFPChain corresponding to the alignment between {@code structure1} and {@code * structure2}, which is given by the gapped protein sequences {@code sequence1} and {@code * sequence2}. The sequences need not correspond to the entire structures, since local alignment * is performed to match the sequences to structures. Assumes that a residue is aligned if and * only if it is given by an uppercase letter. * * @param sequence1 <em>Must</em> have {@link ProteinSequence#getUserCollection()} set to document * upper- and lower-case as aligned and unaligned; see {@link * #getAlignedUserCollection(String)} * @throws StructureException */ public static AFPChain fastaToAfpChain( ProteinSequence sequence1, ProteinSequence sequence2, Structure structure1, Structure structure2) throws StructureException { if (structure1 == null || structure2 == null) { throw new IllegalArgumentException("A structure is null"); } if (sequence1 == null || sequence2 == null) { throw new IllegalArgumentException("A sequence is null"); } Atom[] ca1 = StructureTools.getRepresentativeAtomArray(structure1); Atom[] ca2 = StructureTools.getRepresentativeAtomArray(structure2); ResidueNumber[] residues1 = StructureSequenceMatcher.matchSequenceToStructure(sequence1, structure1); ResidueNumber[] residues2 = StructureSequenceMatcher.matchSequenceToStructure(sequence2, structure2); // nullify ResidueNumbers that have a lowercase sequence character if (sequence1.getUserCollection() != null) { CasePreservingProteinSequenceCreator.setLowercaseToNull(sequence1, residues1); } if (sequence2.getUserCollection() != null) { CasePreservingProteinSequenceCreator.setLowercaseToNull(sequence2, residues2); } return buildAlignment(ca1, ca2, residues1, residues2); }
/** * Takes a structure and sequence corresponding to an alignment between a structure or sequence * and itself (or even a structure with a sequence), where the result has a circular permutation * site {@link cpSite} residues to the right. * * @param first The unpermuted sequence * @param second The sequence permuted by cpSite * @param cpSite The number of residues from the beginning of the sequence at which the circular * permutation site occurs; can be positive or negative; values greater than the length of the * sequence are acceptable * @throws StructureException */ public static AFPChain cpFastaToAfpChain( ProteinSequence first, ProteinSequence second, Structure structure, int cpSite) throws StructureException { if (structure == null) { throw new IllegalArgumentException("The structure is null"); } if (first == null) { throw new IllegalArgumentException("The sequence is null"); } // we need to find the ungapped CP site int gappedCpShift = 0; int ungappedCpShift = 0; while (ungappedCpShift < Math.abs(cpSite)) { char c; try { if (cpSite <= 0) { c = second.getSequenceAsString().charAt(gappedCpShift); } else { c = second.getSequenceAsString().charAt(first.getLength() - 1 - gappedCpShift); } } catch (StringIndexOutOfBoundsException e) { throw new IllegalArgumentException("CP site of " + cpSite + " is wrong"); } if (c != '-') { ungappedCpShift++; } gappedCpShift++; } Atom[] ca1 = StructureTools.getRepresentativeAtomArray(structure); Atom[] ca2 = StructureTools.getRepresentativeAtomArray( structure); // can't use cloneCAArray because it doesn't set parent // group.chain.structure ProteinSequence antipermuted = null; try { antipermuted = new ProteinSequence( SequenceTools.permuteCyclic(second.getSequenceAsString(), gappedCpShift)); } catch (CompoundNotFoundException e) { // this can't happen, the original sequence comes from a ProteinSequence logger.error( "Unexpected error while creating protein sequence: {}. This is most likely a bug.", e.getMessage()); } ResidueNumber[] residues = StructureSequenceMatcher.matchSequenceToStructure(first, structure); ResidueNumber[] antipermutedResidues = StructureSequenceMatcher.matchSequenceToStructure(antipermuted, structure); ResidueNumber[] nonpermutedResidues = new ResidueNumber[antipermutedResidues.length]; SequenceTools.permuteCyclic(antipermutedResidues, nonpermutedResidues, -gappedCpShift); // nullify ResidueNumbers that have a lowercase sequence character if (first.getUserCollection() != null) { CasePreservingProteinSequenceCreator.setLowercaseToNull(first, residues); } if (second.getUserCollection() != null) { CasePreservingProteinSequenceCreator.setLowercaseToNull(second, nonpermutedResidues); } // for (int i = 0; i < residues.length; i++) { // if (residues[i] == null) { // System.out.print("="); // } else { // System.out.print(sequence.getSequenceAsString().charAt(i)); // } // } // System.out.println(); // for (int i = 0; i < residues.length; i++) { // if (nonpermutedResidues[i] == null) { // System.out.print("="); // } else { // System.out.print(second.getSequenceAsString().charAt(i)); // } // } // System.out.println(); return buildAlignment(ca1, ca2, residues, nonpermutedResidues); }