/** * Returns an AFPChain corresponding to the alignment between {@code structure1} and {@code * structure2}, which is given by the gapped protein sequences {@code sequence1} and {@code * sequence2}. The sequences need not correspond to the entire structures, since local alignment * is performed to match the sequences to structures. Assumes that a residue is aligned if and * only if it is given by an uppercase letter. * * @param sequence1 <em>Must</em> have {@link ProteinSequence#getUserCollection()} set to document * upper- and lower-case as aligned and unaligned; see {@link * #getAlignedUserCollection(String)} * @throws StructureException */ public static AFPChain fastaToAfpChain( ProteinSequence sequence1, ProteinSequence sequence2, Structure structure1, Structure structure2) throws StructureException { if (structure1 == null || structure2 == null) { throw new IllegalArgumentException("A structure is null"); } if (sequence1 == null || sequence2 == null) { throw new IllegalArgumentException("A sequence is null"); } Atom[] ca1 = StructureTools.getRepresentativeAtomArray(structure1); Atom[] ca2 = StructureTools.getRepresentativeAtomArray(structure2); ResidueNumber[] residues1 = StructureSequenceMatcher.matchSequenceToStructure(sequence1, structure1); ResidueNumber[] residues2 = StructureSequenceMatcher.matchSequenceToStructure(sequence2, structure2); // nullify ResidueNumbers that have a lowercase sequence character if (sequence1.getUserCollection() != null) { CasePreservingProteinSequenceCreator.setLowercaseToNull(sequence1, residues1); } if (sequence2.getUserCollection() != null) { CasePreservingProteinSequenceCreator.setLowercaseToNull(sequence2, residues2); } return buildAlignment(ca1, ca2, residues1, residues2); }
@Test public void testIndependence() throws IOException, StructureException { String[] names = new String[] {"1hiv.A", "4i4q", "1n0r.A"}; int[] orders = new int[] {2, 3, 4}; for (int i = 0; i < names.length; i++) { Structure s = StructureTools.getStructure(names[i]); Atom[] atoms = StructureTools.getRepresentativeAtomArray(s); CeSymmResult result = CeSymm.analyze(atoms); assertTrue(result.isSignificant()); assertEquals(result.getSymmOrder(), orders[i]); } }
/** * Prints out the XML representation of an AFPChain from a file containing exactly two FASTA * sequences. * * @param args A String array of fasta-file structure-1-name structure-2-name * @throws StructureException * @throws IOException */ public static void main(String[] args) throws StructureException, IOException { if (args.length != 3) { System.err.println( "Usage: FastaAFPChainConverter fasta-file structure-1-name structure-2-name"); return; } File fasta = new File(args[0]); Structure structure1 = StructureTools.getStructure(args[1]); Structure structure2 = StructureTools.getStructure(args[2]); if (structure1 == null) throw new IllegalArgumentException("No structure for " + args[1] + " was found"); if (structure2 == null) throw new IllegalArgumentException("No structure for " + args[2] + " was found"); AFPChain afpChain = fastaFileToAfpChain(fasta, structure1, structure2); String xml = AFPChainXMLConverter.toXML(afpChain); System.out.println(xml); }
public Atom[] getRepresentativeAtoms(StructureIdentifier name) throws IOException, StructureException { Atom[] atoms = null; Structure s = getStructure(name); atoms = StructureTools.getRepresentativeAtomArray(s); /* * synchronized (cache){ cache.put(name, atoms); } */ return atoms; }
public Atom[] getAtoms(StructureIdentifier name) throws IOException, StructureException { Atom[] atoms = null; // System.out.println("loading " + name); Structure s = getStructure(name); atoms = StructureTools.getAtomCAArray(s); /* * synchronized (cache){ cache.put(name, atoms); } */ return atoms; }
/** * It replaces an optimal alignment of an AFPChain and calculates all the new alignment scores and * variables. */ public static AFPChain replaceOptAln(int[][][] newAlgn, AFPChain afpChain, Atom[] ca1, Atom[] ca2) throws StructureException { // The order is the number of groups in the newAlgn int order = newAlgn.length; // Calculate the alignment length from all the subunits lengths int[] optLens = new int[order]; for (int s = 0; s < order; s++) { optLens[s] = newAlgn[s][0].length; } int optLength = 0; for (int s = 0; s < order; s++) { optLength += optLens[s]; } // Create a copy of the original AFPChain and set everything needed for the structure update AFPChain copyAFP = (AFPChain) afpChain.clone(); // Set the new parameters of the optimal alignment copyAFP.setOptLength(optLength); copyAFP.setOptLen(optLens); copyAFP.setOptAln(newAlgn); // Set the block information of the new alignment copyAFP.setBlockNum(order); copyAFP.setBlockSize(optLens); copyAFP.setBlockResList(newAlgn); copyAFP.setBlockResSize(optLens); copyAFP.setBlockGap(calculateBlockGap(newAlgn)); // Recalculate properties: superposition, tm-score, etc Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); // don't modify ca2 positions AlignmentTools.updateSuperposition(copyAFP, ca1, ca2clone); // It re-does the sequence alignment strings from the OptAlgn information only copyAFP.setAlnsymb(null); AFPAlignmentDisplay.getAlign(copyAFP, ca1, ca2clone); return copyAFP; }
@Override public void actionPerformed(ActionEvent ae) { String cmd = ae.getActionCommand(); if (cmd.equals("New Symmetry Analysis")) SymmetryGui.getInstance(); if (symm == null) logger.error("Currently not displaying a symmetry!"); try { if (cmd.equals("Repeats Superposition")) { MultipleAlignmentJmol j = SymmetryDisplay.displayRepeats(symm); String s = SymmetryDisplay.printSymmetryAxes(symm); j.evalString(s); } else if (cmd.equals("Multiple Structure Alignment")) { MultipleAlignmentJmol j = SymmetryDisplay.displayFull(symm); String s = SymmetryDisplay.printSymmetryAxes(symm); j.evalString(s); } else if (cmd.equals("Optimal Self Alignment")) { Atom[] cloned = StructureTools.cloneAtomArray(symm.getAtoms()); AbstractAlignmentJmol jmol = StructureAlignmentDisplay.display(symm.getSelfAlignment(), symm.getAtoms(), cloned); RotationAxis axis = new RotationAxis(symm.getSelfAlignment()); jmol.evalString(axis.getJmolScript(symm.getAtoms())); jmol.setTitle(SymmetryDisplay.getSymmTitle(symm)); } else if (cmd.equals("Show Symmetry Group")) { String script = SymmetryDisplay.printSymmetryGroup(symm); jmol.evalString(script); } else if (cmd.equals("Show Symmetry Axes")) { String s = SymmetryDisplay.printSymmetryAxes(symm); jmol.evalString(s); } } catch (Exception e) { logger.error("Could not complete display option", e); } }
/** * @param afpChain Input afpchain. UNMODIFIED * @param ca1 * @param ca2 * @param optLens * @param optAln * @return A NEW AfpChain based off the input but with the optAln modified * @throws StructureException if an error occured during superposition */ public static AFPChain replaceOptAln( AFPChain afpChain, Atom[] ca1, Atom[] ca2, int blockNum, int[] optLens, int[][][] optAln) throws StructureException { int optLength = 0; for (int blk = 0; blk < blockNum; blk++) { optLength += optLens[blk]; } // set everything AFPChain refinedAFP = (AFPChain) afpChain.clone(); refinedAFP.setOptLength(optLength); refinedAFP.setBlockSize(optLens); refinedAFP.setOptLen(optLens); refinedAFP.setOptAln(optAln); refinedAFP.setBlockNum(blockNum); // TODO recalculate properties: superposition, tm-score, etc Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); // don't modify ca2 positions AlignmentTools.updateSuperposition(refinedAFP, ca1, ca2clone); AFPAlignmentDisplay.getAlign(refinedAFP, ca1, ca2clone); return refinedAFP; }
/** * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. * * @param domain a SCOP domain * @param scopDatabase A {@link ScopDatabase} to use * @param strictLigandHandling If set to false, hetero-atoms are included if and only if they * belong to a chain to which the SCOP domain belongs; if set to true, hetero-atoms are * included if and only if they are strictly within the definition (residue numbers) of the * SCOP domain * @return a Structure object * @throws IOException * @throws StructureException */ public Structure getStructureForDomain( ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling) throws IOException, StructureException { String pdbId = domain.getPdbId(); Structure fullStructure = getStructureForPdbId(pdbId); Structure structure = domain.reduce(fullStructure); // TODO It would be better to move all of this into the reduce method, // but that would require ligand handling properties in StructureIdentifiers // because ligands sometimes occur after TER records in PDB files, we may need to add some // ligands back in // specifically, we add a ligand if and only if it occurs within the domain AtomPositionMap map = null; List<ResidueRangeAndLength> rrs = null; if (strictLigandHandling) { map = new AtomPositionMap( StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER); rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map); } for (Chain chain : fullStructure.getChains()) { if (!structure.hasChain(chain.getChainID())) { continue; // we can't do anything with a chain our domain } // doesn't contain Chain newChain = structure.getChainByPDB(chain.getChainID()); List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups()); for (Group group : ligands) { boolean shouldContain = true; if (strictLigandHandling) { shouldContain = false; // whether the ligand occurs within the domain for (ResidueRange rr : rrs) { if (rr.contains(group.getResidueNumber(), map)) { shouldContain = true; } } } boolean alreadyContains = newChain.getAtomGroups().contains(group); // we don't want to add duplicate // ligands if (shouldContain && !alreadyContains) { newChain.addGroup(group); } } } // build a more meaningful description for the new structure StringBuilder header = new StringBuilder(); header.append(domain.getClassificationId()); if (scopDatabase != null) { int sf = domain.getSuperfamilyId(); ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf); if (description != null) { header.append(" | "); header.append(description.getDescription()); } } structure.getPDBHeader().setDescription(header.toString()); return structure; }
/** * Takes a structure and sequence corresponding to an alignment between a structure or sequence * and itself (or even a structure with a sequence), where the result has a circular permutation * site {@link cpSite} residues to the right. * * @param first The unpermuted sequence * @param second The sequence permuted by cpSite * @param cpSite The number of residues from the beginning of the sequence at which the circular * permutation site occurs; can be positive or negative; values greater than the length of the * sequence are acceptable * @throws StructureException */ public static AFPChain cpFastaToAfpChain( ProteinSequence first, ProteinSequence second, Structure structure, int cpSite) throws StructureException { if (structure == null) { throw new IllegalArgumentException("The structure is null"); } if (first == null) { throw new IllegalArgumentException("The sequence is null"); } // we need to find the ungapped CP site int gappedCpShift = 0; int ungappedCpShift = 0; while (ungappedCpShift < Math.abs(cpSite)) { char c; try { if (cpSite <= 0) { c = second.getSequenceAsString().charAt(gappedCpShift); } else { c = second.getSequenceAsString().charAt(first.getLength() - 1 - gappedCpShift); } } catch (StringIndexOutOfBoundsException e) { throw new IllegalArgumentException("CP site of " + cpSite + " is wrong"); } if (c != '-') { ungappedCpShift++; } gappedCpShift++; } Atom[] ca1 = StructureTools.getRepresentativeAtomArray(structure); Atom[] ca2 = StructureTools.getRepresentativeAtomArray( structure); // can't use cloneCAArray because it doesn't set parent // group.chain.structure ProteinSequence antipermuted = null; try { antipermuted = new ProteinSequence( SequenceTools.permuteCyclic(second.getSequenceAsString(), gappedCpShift)); } catch (CompoundNotFoundException e) { // this can't happen, the original sequence comes from a ProteinSequence logger.error( "Unexpected error while creating protein sequence: {}. This is most likely a bug.", e.getMessage()); } ResidueNumber[] residues = StructureSequenceMatcher.matchSequenceToStructure(first, structure); ResidueNumber[] antipermutedResidues = StructureSequenceMatcher.matchSequenceToStructure(antipermuted, structure); ResidueNumber[] nonpermutedResidues = new ResidueNumber[antipermutedResidues.length]; SequenceTools.permuteCyclic(antipermutedResidues, nonpermutedResidues, -gappedCpShift); // nullify ResidueNumbers that have a lowercase sequence character if (first.getUserCollection() != null) { CasePreservingProteinSequenceCreator.setLowercaseToNull(first, residues); } if (second.getUserCollection() != null) { CasePreservingProteinSequenceCreator.setLowercaseToNull(second, nonpermutedResidues); } // for (int i = 0; i < residues.length; i++) { // if (residues[i] == null) { // System.out.print("="); // } else { // System.out.print(sequence.getSequenceAsString().charAt(i)); // } // } // System.out.println(); // for (int i = 0; i < residues.length; i++) { // if (nonpermutedResidues[i] == null) { // System.out.print("="); // } else { // System.out.print(second.getSequenceAsString().charAt(i)); // } // } // System.out.println(); return buildAlignment(ca1, ca2, residues, nonpermutedResidues); }
/** * Identify a set of modifications in a a list of chains. * * @param chains query {@link Chain}s. * @param potentialModifications query {@link ProteinModification}s. */ public void identify( final List<Chain> chains, final Set<ProteinModification> potentialModifications) { if (chains == null) { throw new IllegalArgumentException("Null structure."); } if (potentialModifications == null) { throw new IllegalArgumentException("Null potentialModifications."); } reset(); if (potentialModifications.isEmpty()) { return; } Map<String, Chain> mapChainIdChain = new HashMap<String, Chain>(chains.size()); residues = new ArrayList<Group>(); List<Group> ligands = new ArrayList<Group>(); Map<Component, Set<Group>> mapCompGroups = new HashMap<Component, Set<Group>>(); for (Chain chain : chains) { mapChainIdChain.put(chain.getChainID(), chain); List<Group> ress = StructureUtil.getAminoAcids(chain); // List<Group> ligs = chain.getAtomLigands(); List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups()); residues.addAll(ress); residues.removeAll(ligs); ligands.addAll(ligs); addModificationGroups(potentialModifications, ress, ligs, mapCompGroups); } if (residues.isEmpty()) { String pdbId = "?"; if (chains.size() > 0) { Structure struc = chains.get(0).getParent(); if (struc != null) pdbId = struc.getPDBCode(); } logger.warn( "No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.", pdbId); } List<ModifiedCompound> modComps = new ArrayList<ModifiedCompound>(); for (ProteinModification mod : potentialModifications) { ModificationCondition condition = mod.getCondition(); List<Component> components = condition.getComponents(); if (!mapCompGroups.keySet().containsAll(components)) { // not all components exist for this mod. continue; } int sizeComps = components.size(); if (sizeComps == 1) { processCrosslink1(mapCompGroups, modComps, mod, components); } else { processMultiCrosslink(mapCompGroups, modComps, mod, condition); } } if (recordAdditionalAttachments) { // identify additional groups that are not directly attached to amino acids. for (ModifiedCompound mc : modComps) { identifyAdditionalAttachments(mc, ligands, mapChainIdChain); } } mergeModComps(modComps); identifiedModifiedCompounds.addAll(modComps); // record unidentifiable linkage if (recordUnidentifiableModifiedCompounds) { recordUnidentifiableAtomLinkages(modComps, ligands); recordUnidentifiableModifiedResidues(modComps); } }