/**
   * Returns an AFPChain corresponding to the alignment between {@code structure1} and {@code
   * structure2}, which is given by the gapped protein sequences {@code sequence1} and {@code
   * sequence2}. The sequences need not correspond to the entire structures, since local alignment
   * is performed to match the sequences to structures. Assumes that a residue is aligned if and
   * only if it is given by an uppercase letter.
   *
   * @param sequence1 <em>Must</em> have {@link ProteinSequence#getUserCollection()} set to document
   *     upper- and lower-case as aligned and unaligned; see {@link
   *     #getAlignedUserCollection(String)}
   * @throws StructureException
   */
  public static AFPChain fastaToAfpChain(
      ProteinSequence sequence1,
      ProteinSequence sequence2,
      Structure structure1,
      Structure structure2)
      throws StructureException {

    if (structure1 == null || structure2 == null) {
      throw new IllegalArgumentException("A structure is null");
    }

    if (sequence1 == null || sequence2 == null) {
      throw new IllegalArgumentException("A sequence is null");
    }

    Atom[] ca1 = StructureTools.getRepresentativeAtomArray(structure1);
    Atom[] ca2 = StructureTools.getRepresentativeAtomArray(structure2);

    ResidueNumber[] residues1 =
        StructureSequenceMatcher.matchSequenceToStructure(sequence1, structure1);
    ResidueNumber[] residues2 =
        StructureSequenceMatcher.matchSequenceToStructure(sequence2, structure2);

    // nullify ResidueNumbers that have a lowercase sequence character
    if (sequence1.getUserCollection() != null) {
      CasePreservingProteinSequenceCreator.setLowercaseToNull(sequence1, residues1);
    }
    if (sequence2.getUserCollection() != null) {
      CasePreservingProteinSequenceCreator.setLowercaseToNull(sequence2, residues2);
    }

    return buildAlignment(ca1, ca2, residues1, residues2);
  }
Exemple #2
0
  @Test
  public void testIndependence() throws IOException, StructureException {

    String[] names = new String[] {"1hiv.A", "4i4q", "1n0r.A"};
    int[] orders = new int[] {2, 3, 4};

    for (int i = 0; i < names.length; i++) {

      Structure s = StructureTools.getStructure(names[i]);
      Atom[] atoms = StructureTools.getRepresentativeAtomArray(s);

      CeSymmResult result = CeSymm.analyze(atoms);

      assertTrue(result.isSignificant());
      assertEquals(result.getSymmOrder(), orders[i]);
    }
  }
 /**
  * Prints out the XML representation of an AFPChain from a file containing exactly two FASTA
  * sequences.
  *
  * @param args A String array of fasta-file structure-1-name structure-2-name
  * @throws StructureException
  * @throws IOException
  */
 public static void main(String[] args) throws StructureException, IOException {
   if (args.length != 3) {
     System.err.println(
         "Usage: FastaAFPChainConverter fasta-file structure-1-name structure-2-name");
     return;
   }
   File fasta = new File(args[0]);
   Structure structure1 = StructureTools.getStructure(args[1]);
   Structure structure2 = StructureTools.getStructure(args[2]);
   if (structure1 == null)
     throw new IllegalArgumentException("No structure for " + args[1] + " was found");
   if (structure2 == null)
     throw new IllegalArgumentException("No structure for " + args[2] + " was found");
   AFPChain afpChain = fastaFileToAfpChain(fasta, structure1, structure2);
   String xml = AFPChainXMLConverter.toXML(afpChain);
   System.out.println(xml);
 }
  public Atom[] getRepresentativeAtoms(StructureIdentifier name)
      throws IOException, StructureException {

    Atom[] atoms = null;

    Structure s = getStructure(name);

    atoms = StructureTools.getRepresentativeAtomArray(s);

    /*
     * synchronized (cache){ cache.put(name, atoms); }
     */

    return atoms;
  }
  public Atom[] getAtoms(StructureIdentifier name) throws IOException, StructureException {

    Atom[] atoms = null;

    // System.out.println("loading " + name);
    Structure s = getStructure(name);

    atoms = StructureTools.getAtomCAArray(s);

    /*
     * synchronized (cache){ cache.put(name, atoms); }
     */

    return atoms;
  }
  /**
   * It replaces an optimal alignment of an AFPChain and calculates all the new alignment scores and
   * variables.
   */
  public static AFPChain replaceOptAln(int[][][] newAlgn, AFPChain afpChain, Atom[] ca1, Atom[] ca2)
      throws StructureException {

    // The order is the number of groups in the newAlgn
    int order = newAlgn.length;

    // Calculate the alignment length from all the subunits lengths
    int[] optLens = new int[order];
    for (int s = 0; s < order; s++) {
      optLens[s] = newAlgn[s][0].length;
    }
    int optLength = 0;
    for (int s = 0; s < order; s++) {
      optLength += optLens[s];
    }

    // Create a copy of the original AFPChain and set everything needed for the structure update
    AFPChain copyAFP = (AFPChain) afpChain.clone();

    // Set the new parameters of the optimal alignment
    copyAFP.setOptLength(optLength);
    copyAFP.setOptLen(optLens);
    copyAFP.setOptAln(newAlgn);

    // Set the block information of the new alignment
    copyAFP.setBlockNum(order);
    copyAFP.setBlockSize(optLens);
    copyAFP.setBlockResList(newAlgn);
    copyAFP.setBlockResSize(optLens);
    copyAFP.setBlockGap(calculateBlockGap(newAlgn));

    // Recalculate properties: superposition, tm-score, etc
    Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); // don't modify ca2 positions
    AlignmentTools.updateSuperposition(copyAFP, ca1, ca2clone);

    // It re-does the sequence alignment strings from the OptAlgn information only
    copyAFP.setAlnsymb(null);
    AFPAlignmentDisplay.getAlign(copyAFP, ca1, ca2clone);

    return copyAFP;
  }
Exemple #7
0
  @Override
  public void actionPerformed(ActionEvent ae) {
    String cmd = ae.getActionCommand();
    if (cmd.equals("New Symmetry Analysis")) SymmetryGui.getInstance();

    if (symm == null) logger.error("Currently not displaying a symmetry!");

    try {
      if (cmd.equals("Repeats Superposition")) {
        MultipleAlignmentJmol j = SymmetryDisplay.displayRepeats(symm);
        String s = SymmetryDisplay.printSymmetryAxes(symm);
        j.evalString(s);

      } else if (cmd.equals("Multiple Structure Alignment")) {
        MultipleAlignmentJmol j = SymmetryDisplay.displayFull(symm);
        String s = SymmetryDisplay.printSymmetryAxes(symm);
        j.evalString(s);

      } else if (cmd.equals("Optimal Self Alignment")) {
        Atom[] cloned = StructureTools.cloneAtomArray(symm.getAtoms());
        AbstractAlignmentJmol jmol =
            StructureAlignmentDisplay.display(symm.getSelfAlignment(), symm.getAtoms(), cloned);
        RotationAxis axis = new RotationAxis(symm.getSelfAlignment());
        jmol.evalString(axis.getJmolScript(symm.getAtoms()));
        jmol.setTitle(SymmetryDisplay.getSymmTitle(symm));

      } else if (cmd.equals("Show Symmetry Group")) {
        String script = SymmetryDisplay.printSymmetryGroup(symm);
        jmol.evalString(script);

      } else if (cmd.equals("Show Symmetry Axes")) {
        String s = SymmetryDisplay.printSymmetryAxes(symm);
        jmol.evalString(s);
      }

    } catch (Exception e) {
      logger.error("Could not complete display option", e);
    }
  }
  /**
   * @param afpChain Input afpchain. UNMODIFIED
   * @param ca1
   * @param ca2
   * @param optLens
   * @param optAln
   * @return A NEW AfpChain based off the input but with the optAln modified
   * @throws StructureException if an error occured during superposition
   */
  public static AFPChain replaceOptAln(
      AFPChain afpChain, Atom[] ca1, Atom[] ca2, int blockNum, int[] optLens, int[][][] optAln)
      throws StructureException {
    int optLength = 0;
    for (int blk = 0; blk < blockNum; blk++) {
      optLength += optLens[blk];
    }

    // set everything
    AFPChain refinedAFP = (AFPChain) afpChain.clone();
    refinedAFP.setOptLength(optLength);
    refinedAFP.setBlockSize(optLens);
    refinedAFP.setOptLen(optLens);
    refinedAFP.setOptAln(optAln);
    refinedAFP.setBlockNum(blockNum);

    // TODO recalculate properties: superposition, tm-score, etc
    Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); // don't modify ca2 positions
    AlignmentTools.updateSuperposition(refinedAFP, ca1, ca2clone);

    AFPAlignmentDisplay.getAlign(refinedAFP, ca1, ca2clone);
    return refinedAFP;
  }
  /**
   * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
   *
   * @param domain a SCOP domain
   * @param scopDatabase A {@link ScopDatabase} to use
   * @param strictLigandHandling If set to false, hetero-atoms are included if and only if they
   *     belong to a chain to which the SCOP domain belongs; if set to true, hetero-atoms are
   *     included if and only if they are strictly within the definition (residue numbers) of the
   *     SCOP domain
   * @return a Structure object
   * @throws IOException
   * @throws StructureException
   */
  public Structure getStructureForDomain(
      ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling)
      throws IOException, StructureException {

    String pdbId = domain.getPdbId();
    Structure fullStructure = getStructureForPdbId(pdbId);
    Structure structure = domain.reduce(fullStructure);

    // TODO It would be better to move all of this into the reduce method,
    // but that would require ligand handling properties in StructureIdentifiers

    // because ligands sometimes occur after TER records in PDB files, we may need to add some
    // ligands back in
    // specifically, we add a ligand if and only if it occurs within the domain
    AtomPositionMap map = null;
    List<ResidueRangeAndLength> rrs = null;
    if (strictLigandHandling) {
      map =
          new AtomPositionMap(
              StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER);
      rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map);
    }
    for (Chain chain : fullStructure.getChains()) {
      if (!structure.hasChain(chain.getChainID())) {
        continue; // we can't do anything with a chain our domain
      }
      // doesn't contain
      Chain newChain = structure.getChainByPDB(chain.getChainID());
      List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups());
      for (Group group : ligands) {
        boolean shouldContain = true;
        if (strictLigandHandling) {
          shouldContain = false; // whether the ligand occurs within the domain
          for (ResidueRange rr : rrs) {
            if (rr.contains(group.getResidueNumber(), map)) {
              shouldContain = true;
            }
          }
        }
        boolean alreadyContains =
            newChain.getAtomGroups().contains(group); // we don't want to add duplicate
        // ligands
        if (shouldContain && !alreadyContains) {
          newChain.addGroup(group);
        }
      }
    }

    // build a more meaningful description for the new structure
    StringBuilder header = new StringBuilder();
    header.append(domain.getClassificationId());
    if (scopDatabase != null) {
      int sf = domain.getSuperfamilyId();
      ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf);
      if (description != null) {
        header.append(" | ");
        header.append(description.getDescription());
      }
    }
    structure.getPDBHeader().setDescription(header.toString());

    return structure;
  }
  /**
   * Takes a structure and sequence corresponding to an alignment between a structure or sequence
   * and itself (or even a structure with a sequence), where the result has a circular permutation
   * site {@link cpSite} residues to the right.
   *
   * @param first The unpermuted sequence
   * @param second The sequence permuted by cpSite
   * @param cpSite The number of residues from the beginning of the sequence at which the circular
   *     permutation site occurs; can be positive or negative; values greater than the length of the
   *     sequence are acceptable
   * @throws StructureException
   */
  public static AFPChain cpFastaToAfpChain(
      ProteinSequence first, ProteinSequence second, Structure structure, int cpSite)
      throws StructureException {

    if (structure == null) {
      throw new IllegalArgumentException("The structure is null");
    }

    if (first == null) {
      throw new IllegalArgumentException("The sequence is null");
    }

    // we need to find the ungapped CP site
    int gappedCpShift = 0;
    int ungappedCpShift = 0;
    while (ungappedCpShift < Math.abs(cpSite)) {
      char c;
      try {
        if (cpSite <= 0) {
          c = second.getSequenceAsString().charAt(gappedCpShift);
        } else {
          c = second.getSequenceAsString().charAt(first.getLength() - 1 - gappedCpShift);
        }
      } catch (StringIndexOutOfBoundsException e) {
        throw new IllegalArgumentException("CP site of " + cpSite + " is wrong");
      }
      if (c != '-') {
        ungappedCpShift++;
      }
      gappedCpShift++;
    }

    Atom[] ca1 = StructureTools.getRepresentativeAtomArray(structure);
    Atom[] ca2 =
        StructureTools.getRepresentativeAtomArray(
            structure); // can't use cloneCAArray because it doesn't set parent
                        // group.chain.structure

    ProteinSequence antipermuted = null;
    try {
      antipermuted =
          new ProteinSequence(
              SequenceTools.permuteCyclic(second.getSequenceAsString(), gappedCpShift));
    } catch (CompoundNotFoundException e) {
      // this can't happen, the original sequence comes from a ProteinSequence
      logger.error(
          "Unexpected error while creating protein sequence: {}. This is most likely a bug.",
          e.getMessage());
    }

    ResidueNumber[] residues = StructureSequenceMatcher.matchSequenceToStructure(first, structure);
    ResidueNumber[] antipermutedResidues =
        StructureSequenceMatcher.matchSequenceToStructure(antipermuted, structure);

    ResidueNumber[] nonpermutedResidues = new ResidueNumber[antipermutedResidues.length];
    SequenceTools.permuteCyclic(antipermutedResidues, nonpermutedResidues, -gappedCpShift);

    // nullify ResidueNumbers that have a lowercase sequence character
    if (first.getUserCollection() != null) {
      CasePreservingProteinSequenceCreator.setLowercaseToNull(first, residues);
    }
    if (second.getUserCollection() != null) {
      CasePreservingProteinSequenceCreator.setLowercaseToNull(second, nonpermutedResidues);
    }

    //		for (int i = 0; i < residues.length; i++) {
    //			if (residues[i] == null) {
    //				System.out.print("=");
    //			} else {
    //				System.out.print(sequence.getSequenceAsString().charAt(i));
    //			}
    //		}
    //		System.out.println();
    //		for (int i = 0; i < residues.length; i++) {
    //			if (nonpermutedResidues[i] == null) {
    //				System.out.print("=");
    //			} else {
    //				System.out.print(second.getSequenceAsString().charAt(i));
    //			}
    //		}
    //		System.out.println();

    return buildAlignment(ca1, ca2, residues, nonpermutedResidues);
  }
  /**
   * Identify a set of modifications in a a list of chains.
   *
   * @param chains query {@link Chain}s.
   * @param potentialModifications query {@link ProteinModification}s.
   */
  public void identify(
      final List<Chain> chains, final Set<ProteinModification> potentialModifications) {

    if (chains == null) {
      throw new IllegalArgumentException("Null structure.");
    }

    if (potentialModifications == null) {
      throw new IllegalArgumentException("Null potentialModifications.");
    }

    reset();

    if (potentialModifications.isEmpty()) {
      return;
    }

    Map<String, Chain> mapChainIdChain = new HashMap<String, Chain>(chains.size());
    residues = new ArrayList<Group>();
    List<Group> ligands = new ArrayList<Group>();
    Map<Component, Set<Group>> mapCompGroups = new HashMap<Component, Set<Group>>();

    for (Chain chain : chains) {
      mapChainIdChain.put(chain.getChainID(), chain);

      List<Group> ress = StructureUtil.getAminoAcids(chain);

      // List<Group> ligs = chain.getAtomLigands();
      List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups());
      residues.addAll(ress);
      residues.removeAll(ligs);
      ligands.addAll(ligs);
      addModificationGroups(potentialModifications, ress, ligs, mapCompGroups);
    }

    if (residues.isEmpty()) {
      String pdbId = "?";
      if (chains.size() > 0) {
        Structure struc = chains.get(0).getParent();
        if (struc != null) pdbId = struc.getPDBCode();
      }
      logger.warn(
          "No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.",
          pdbId);
    }
    List<ModifiedCompound> modComps = new ArrayList<ModifiedCompound>();

    for (ProteinModification mod : potentialModifications) {
      ModificationCondition condition = mod.getCondition();
      List<Component> components = condition.getComponents();
      if (!mapCompGroups.keySet().containsAll(components)) {
        // not all components exist for this mod.
        continue;
      }

      int sizeComps = components.size();
      if (sizeComps == 1) {

        processCrosslink1(mapCompGroups, modComps, mod, components);

      } else {

        processMultiCrosslink(mapCompGroups, modComps, mod, condition);
      }
    }

    if (recordAdditionalAttachments) {
      // identify additional groups that are not directly attached to amino acids.
      for (ModifiedCompound mc : modComps) {
        identifyAdditionalAttachments(mc, ligands, mapChainIdChain);
      }
    }

    mergeModComps(modComps);

    identifiedModifiedCompounds.addAll(modComps);

    // record unidentifiable linkage
    if (recordUnidentifiableModifiedCompounds) {
      recordUnidentifiableAtomLinkages(modComps, ligands);
      recordUnidentifiableModifiedResidues(modComps);
    }
  }