/** * Takes a structure and sequence corresponding to an alignment between a structure or sequence * and itself (or even a structure with a sequence), where the result has a circular permutation * site {@link cpSite} residues to the right. * * @param fastaFile A FASTA file containing exactly 2 sequences, the first unpermuted and the * second permuted * @param cpSite The number of residues from the beginning of the sequence at which the circular * permutation site occurs; can be positive or negative; values greater than the length of the * sequence are acceptable * @throws IOException * @throws StructureException */ public static AFPChain cpFastaToAfpChain(File fastaFile, Structure structure, int cpSite) throws IOException, StructureException { InputStream inStream = new FileInputStream(fastaFile); SequenceCreatorInterface<AminoAcidCompound> creator = new CasePreservingProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()); SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser = new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(); FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(inStream, headerParser, creator); LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process(); inStream.close(); Iterator<ProteinSequence> iter = sequences.values().iterator(); ProteinSequence first = iter.next(); ProteinSequence second = iter.next(); return cpFastaToAfpChain(first, second, structure, cpSite); }
/** * Takes a potentially non-sequential alignment and guesses a sequential version of it. Residues * from each structure are sorted sequentially and then compared directly. * * <p>The results of this method are consistent with what one might expect from an identity * function, and are therefore useful with {@link #getSymmetryOrder(Map, Map identity, int, * float)}. * * <ul> * <li>Perfect self-alignments will have the same pre-image and image, so will map X->X * <li>Gaps and alignment errors will cause errors in the resulting map, but only locally. * Errors do not propagate through the whole alignment. * </ul> * * <h4>Example:</h4> * * A non sequential alignment, represented schematically as * * <pre> * 12456789 * 78912345</pre> * * would result in a map * * <pre> * 12456789 * 12345789</pre> * * @param alignment The non-sequential input alignment * @param inverseAlignment If false, map from structure1 to structure2. If true, generate the * inverse of that map. * @return A mapping from sequential residues of one protein to those of the other * @throws IllegalArgumentException if the input alignment is not one-to-one. */ public static Map<Integer, Integer> guessSequentialAlignment( Map<Integer, Integer> alignment, boolean inverseAlignment) { Map<Integer, Integer> identity = new HashMap<Integer, Integer>(); SortedSet<Integer> aligned1 = new TreeSet<Integer>(); SortedSet<Integer> aligned2 = new TreeSet<Integer>(); for (Entry<Integer, Integer> pair : alignment.entrySet()) { aligned1.add(pair.getKey()); if (!aligned2.add(pair.getValue())) throw new IllegalArgumentException( "Alignment is not one-to-one for residue " + pair.getValue() + " of the second structure."); } Iterator<Integer> it1 = aligned1.iterator(); Iterator<Integer> it2 = aligned2.iterator(); while (it1.hasNext()) { if (inverseAlignment) { // 2->1 identity.put(it2.next(), it1.next()); } else { // 1->2 identity.put(it1.next(), it2.next()); } } return identity; }
/** Merge identified modified compounds if linked. */ private void mergeModComps(List<ModifiedCompound> modComps) { TreeSet<Integer> remove = new TreeSet<Integer>(); int n = modComps.size(); for (int icurr = 1; icurr < n; icurr++) { ModifiedCompound curr = modComps.get(icurr); String id = curr.getModification().getId(); if (ProteinModificationRegistry.getById(id).getCategory() != ModificationCategory.UNDEFINED) continue; // find linked compounds that before curr // List<Integer> merging = new ArrayList<Integer>(); int ipre = 0; for (; ipre < icurr; ipre++) { if (remove.contains(ipre)) continue; ModifiedCompound pre = modComps.get(ipre); if (!Collections.disjoint(pre.getGroups(false), curr.getGroups(false))) { break; } } if (ipre < icurr) { ModifiedCompound mcKeep = modComps.get(ipre); // merge modifications of the same type if (mcKeep.getModification().getId().equals(id)) { // merging the current one to the previous one mcKeep.addAtomLinkages(curr.getAtomLinkages()); remove.add(icurr); } } } Iterator<Integer> it = remove.descendingIterator(); while (it.hasNext()) { modComps.remove(it.next().intValue()); } }