示例#1
0
  @Test
  public void testSmallFasta2() {

    try {
      InputStream inStream = this.getClass().getResourceAsStream("/test.fasta");

      FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
          new FastaReader<ProteinSequence, AminoAcidCompound>(
              inStream,
              new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
              new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));

      int nrSeq = 0;

      LinkedHashMap<String, ProteinSequence> b = fastaReader.process();

      assertNotNull(b);

      // #282 make sure that process() still works

      assertTrue(b.keySet().size() == 10);

    } catch (Exception ex) {
      ex.printStackTrace();
      java.util.logging.Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, null, ex);

      fail(ex.getMessage());
    }
  }
 @Test
 public void testSimpleSubstitutionMatrixCompoundSetOfCShortShort() {
   SubstitutionMatrix<AminoAcidCompound> matrix =
       new SimpleSubstitutionMatrix<AminoAcidCompound>(
           AminoAcidCompoundSet.getAminoAcidCompoundSet(), (short) 5, (short) 1);
   assertEquals(matrix.getName(), "IDENTITY_5_1");
 }
示例#3
0
  @Test
  public void testSmallFasta() {

    try {
      InputStream inStream = this.getClass().getResourceAsStream("/test.fasta");

      FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
          new FastaReader<ProteinSequence, AminoAcidCompound>(
              inStream,
              new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
              new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));

      LinkedHashMap<String, ProteinSequence> b;

      int nrSeq = 0;

      while ((b = fastaReader.process(10)) != null) {
        for (String key : b.keySet()) {
          nrSeq++;

          // #282 would result in an endless loop
          // this makes sure it has been fixed.
          assertTrue(
              "Looks like there is a problem with termination of processing of the FASTA file!",
              nrSeq < 15);
        }
      }
    } catch (Exception ex) {
      ex.printStackTrace();
      java.util.logging.Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, null, ex);

      fail(ex.getMessage());
    }
  }
 @Test()
 public void testSimpleSubstitutionMatrix() {
   SubstitutionMatrix<AminoAcidCompound> matrix = SimpleSubstitutionMatrix.getBlosum62();
   assertEquals(matrix.getCompoundSet(), AminoAcidCompoundSet.getAminoAcidCompoundSet());
   assertEquals(matrix.getName(), "blosum62");
   assertEquals(matrix.getMaxValue(), 11);
   assertEquals(matrix.getMinValue(), -4);
 }
 /**
  * Reads the file {@code fastaFile}, expecting exactly two sequences which give a pairwise
  * alignment. Uses this and two structures to create an AFPChain corresponding to the alignment.
  * Uses a {@link CasePreservingProteinSequenceCreator} and assumes that a residue is aligned if
  * and only if it is given by an uppercase letter.
  *
  * @see #fastaToAfpChain(ProteinSequence, ProteinSequence, Structure, Structure)
  * @throws IOException
  * @throws StructureException
  */
 public static AFPChain fastaFileToAfpChain(
     File fastaFile, Structure structure1, Structure structure2)
     throws IOException, StructureException {
   InputStream inStream = new FileInputStream(fastaFile);
   SequenceCreatorInterface<AminoAcidCompound> creator =
       new CasePreservingProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet());
   SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser =
       new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>();
   FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
       new FastaReader<ProteinSequence, AminoAcidCompound>(inStream, headerParser, creator);
   LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process();
   inStream.close();
   return fastaToAfpChain(sequences, structure1, structure2);
 }
 /**
  * Takes a structure and sequence corresponding to an alignment between a structure or sequence
  * and itself (or even a structure with a sequence), where the result has a circular permutation
  * site {@link cpSite} residues to the right.
  *
  * @param fastaFile A FASTA file containing exactly 2 sequences, the first unpermuted and the
  *     second permuted
  * @param cpSite The number of residues from the beginning of the sequence at which the circular
  *     permutation site occurs; can be positive or negative; values greater than the length of the
  *     sequence are acceptable
  * @throws IOException
  * @throws StructureException
  */
 public static AFPChain cpFastaToAfpChain(File fastaFile, Structure structure, int cpSite)
     throws IOException, StructureException {
   InputStream inStream = new FileInputStream(fastaFile);
   SequenceCreatorInterface<AminoAcidCompound> creator =
       new CasePreservingProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet());
   SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser =
       new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>();
   FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
       new FastaReader<ProteinSequence, AminoAcidCompound>(inStream, headerParser, creator);
   LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process();
   inStream.close();
   Iterator<ProteinSequence> iter = sequences.values().iterator();
   ProteinSequence first = iter.next();
   ProteinSequence second = iter.next();
   return cpFastaToAfpChain(first, second, structure, cpSite);
 }
示例#7
0
  /** Test of process method, of class FastaReader. */
  @Test
  public void testProcess() throws Exception {
    logger.info("process");
    InputStream inStream = this.getClass().getResourceAsStream("/PF00104_small.fasta");
    assertNotNull(inStream);

    FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
        new FastaReader<ProteinSequence, AminoAcidCompound>(
            inStream,
            new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
            new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
    LinkedHashMap<String, ProteinSequence> proteinSequences = fastaReader.process();
    inStream.close();

    // Should have 282 sequences
    // logger.debug("Expecting 283 got " + proteinSequences.size());
    assertEquals(proteinSequences.size(), 283);

    int seqNum = 0;
    for (String id : proteinSequences.keySet()) {
      ProteinSequence proteinSequence = proteinSequences.get(id);
      switch (seqNum) {
        case 0:
          assertEquals(proteinSequence.getAccession().getID(), "A2D504_ATEGE/1-46");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "-----------------FK-N----LP-LED----------------Q----ITL--IQY-----------SWM----------------------CL-SSFA------LSWRSYK---HTNSQFLYFAPDLVF-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------");
          break;
        case 281:
          // logger.debug("Get Accession: {}", proteinSequence.getAccession());
          // logger.debug("Get Protein Sequence: {}", proteinSequence.getSequenceAsString());
          assertEquals(proteinSequence.getAccession().getID(), "Q9PU76_CRONI/141-323");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "VETVTELTEFAKSI-PGFS-N----LD-LND----------------Q----VTL--LKY-----------GVY----------------------EA-IFAM------LASVMNK---DGMPVAYGNGFITRE------------------------------------------------------------------------------------------------------------------------------------------------------------FLKSLRKPFCDIMEPKFDFA-MKF-NSL-E-LDDSDI--------------------SLFVA-AIIC-CGDRPG-------------------------------------------LVNV--GHIEKMQESIVHVLKL-H-----LQN---------NH---PD----------------------------DI------F--------LFP-KLLQKMAD-LRQLV-----------------TEH-AQLV--QIIKK---TESDAHLHPLL-------QEI---");
          break;
        case 282:
          assertEquals(proteinSequence.getAccession().getID(), "Q98SJ1_CHICK/15-61");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------Q-----------------NW------Q--------RFY-QLTKLLDS-MHDVV-----------------ENL-LSFC--FQTFLDKSM--SIEFPEML-------AEI---");
          break;
      }
      seqNum++;
    }
    assertEquals(seqNum, 283);
  }
  /** Test of process method, of class GenbankReader. */
  @Test
  public void testProcess() throws Throwable {
    /*
     * Method 1: With the GenbankProxySequenceReader
     */
    // Try with the GenbankProxySequenceReader
    GenbankProxySequenceReader<AminoAcidCompound> genbankProteinReader =
        new GenbankProxySequenceReader<AminoAcidCompound>(
            System.getProperty("java.io.tmpdir"),
            "NP_000257",
            AminoAcidCompoundSet.getAminoAcidCompoundSet());
    ProteinSequence proteinSequence = new ProteinSequence(genbankProteinReader);
    genbankProteinReader
        .getHeaderParser()
        .parseHeader(genbankProteinReader.getHeader(), proteinSequence);
    logger.info(
        "Sequence({},{}) = {}...",
        proteinSequence.getAccession(),
        proteinSequence.getLength(),
        proteinSequence.getSequenceAsString().substring(0, 10));

    GenbankProxySequenceReader<NucleotideCompound> genbankDNAReader =
        new GenbankProxySequenceReader<NucleotideCompound>(
            System.getProperty("java.io.tmpdir"), "NM_001126", DNACompoundSet.getDNACompoundSet());
    DNASequence dnaSequence = new DNASequence(genbankDNAReader);
    genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence);
    logger.info(
        "Sequence({},{}) = {}...",
        dnaSequence.getAccession(),
        dnaSequence.getLength(),
        dnaSequence.getSequenceAsString().substring(0, 10));
    /*
     * Method 2: With the GenbankReaderHelper
     */
    // Try with the GenbankReaderHelper
    ClasspathResource dnaResource = new ClasspathResource("NM_000266.gb", true);
    // File dnaFile = new File("src/test/resources/NM_000266.gb");
    // File protFile = new File("src/test/resources/BondFeature.gb");
    ClasspathResource protResource = new ClasspathResource("BondFeature.gb");

    LinkedHashMap<String, DNASequence> dnaSequences =
        GenbankReaderHelper.readGenbankDNASequence(dnaResource.getInputStream());
    for (DNASequence sequence : dnaSequences.values()) {
      logger.info("DNA Sequence: {}", sequence.getSequenceAsString());
    }

    LinkedHashMap<String, ProteinSequence> protSequences =
        GenbankReaderHelper.readGenbankProteinSequence(protResource.getInputStream());
    for (ProteinSequence sequence : protSequences.values()) {
      logger.info("Protein Sequence: {}", sequence.getSequenceAsString());
    }
    /*
     * Method 3: With the GenbankReader Object
     */
    // Try reading with the GanbankReader

    GenbankReader<DNASequence, NucleotideCompound> dnaReader =
        new GenbankReader<DNASequence, NucleotideCompound>(
            dnaResource.getInputStream(),
            new GenericGenbankHeaderParser<DNASequence, NucleotideCompound>(),
            new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));
    dnaSequences = dnaReader.process();

    logger.info("DNA Sequence: {}", dnaSequences);

    GenbankReader<ProteinSequence, AminoAcidCompound> protReader =
        new GenbankReader<ProteinSequence, AminoAcidCompound>(
            protResource.getInputStream(),
            new GenericGenbankHeaderParser<ProteinSequence, AminoAcidCompound>(),
            new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
    protSequences = protReader.process();

    logger.info("Protein Sequence: {}", protSequences);
  }
示例#9
0
  @Test
  public void processIntTest() throws Exception {
    logger.info("process(int)");
    InputStream inStream = this.getClass().getResourceAsStream("/PF00104_small.fasta");
    assertNotNull(inStream);
    FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
        new FastaReader<ProteinSequence, AminoAcidCompound>(
            inStream,
            new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
            new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
    LinkedHashMap<String, ProteinSequence> proteinSequences = fastaReader.process(200);

    // Should have 200 sequences
    // logger.debug("Expecting 200 got " + proteinSequences.size());
    assertEquals(proteinSequences.size(), 200);

    int seqNum = 0;
    for (String id : proteinSequences.keySet()) {
      ProteinSequence proteinSequence = proteinSequences.get(id);
      switch (seqNum) {
        case 0:
          assertEquals(proteinSequence.getAccession().getID(), "A2D504_ATEGE/1-46");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "-----------------FK-N----LP-LED----------------Q----ITL--IQY-----------SWM----------------------CL-SSFA------LSWRSYK---HTNSQFLYFAPDLVF-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------");
          break;
        case 199:
          assertEquals(proteinSequence.getAccession().getID(), "Q5F0P7_HUMAN/248-428");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "DRELVVIIGWAKHI-PGFS-S----LS-LGD----------------Q----MSL--LQS-----------AWM----------------------EI-LILG------IVYRSLP---YDDKLVYAEDYIMD-------------------------------------------------------------------------------------------------------------------------------------------------------------EEHSRLAGLLELYRAILQLV-RRY-KKL-K-VEKEEF--------------------VTLKA-LALA-NSDSMY-------------------------------------------IEDL--EAVQKLQDLLHEALQD-Y-----ELS---------QR---HE----------------------------EP------W--------RTG-KLLLTLPL-LRQTA-----------------AKA-VQHF--YSVKLQGKV--PMH--KLF-------LEM---");
          break;
      }
      seqNum++;
    }
    assertEquals(seqNum, 200);

    // Should have 83 sequences
    proteinSequences = fastaReader.process(200);
    assertEquals(proteinSequences.size(), 83);
    seqNum = 0;
    for (String id : proteinSequences.keySet()) {
      ProteinSequence proteinSequence = proteinSequences.get(id);
      switch (seqNum) {
        case 0:
          assertEquals(proteinSequence.getAccession().getID(), "RARA_CANFA/233-413");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "TKCIIKTVEFAKQL-PGFT-T----LT-IAD----------------Q----ITL--LKA-----------ACL----------------------DI-LILR------ICTRYTP---EQDTMTFSEGLTLN-------------------------------------------------------------------------------------------------------------------------------------------------------------RTQMHKAGFGPLTDLVFAFA-NQL-LPL-E-MDDAET--------------------GLLSA-ICLI-CGDRQD-------------------------------------------LEQP--DRVDMLQEPLLEALKV-Y-----VRK---------RR---PS----------------------------RP------H--------MFP-KMLMKITD-LRSIS-----------------AKG-AERV--ITLKMEIPG--SMP--PLI-------QEM---");
          break;
        case 81:
          // logger.debug(proteinSequence.getAccession());
          // logger.debug(proteinSequence.getSequenceAsString());
          assertEquals(proteinSequence.getAccession().getID(), "Q9PU76_CRONI/141-323");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "VETVTELTEFAKSI-PGFS-N----LD-LND----------------Q----VTL--LKY-----------GVY----------------------EA-IFAM------LASVMNK---DGMPVAYGNGFITRE------------------------------------------------------------------------------------------------------------------------------------------------------------FLKSLRKPFCDIMEPKFDFA-MKF-NSL-E-LDDSDI--------------------SLFVA-AIIC-CGDRPG-------------------------------------------LVNV--GHIEKMQESIVHVLKL-H-----LQN---------NH---PD----------------------------DI------F--------LFP-KLLQKMAD-LRQLV-----------------TEH-AQLV--QIIKK---TESDAHLHPLL-------QEI---");
          break;
        case 82:
          assertEquals(proteinSequence.getAccession().getID(), "Q98SJ1_CHICK/15-61");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------Q-----------------NW------Q--------RFY-QLTKLLDS-MHDVV-----------------ENL-LSFC--FQTFLDKSM--SIEFPEML-------AEI---");
          break;
      }
      seqNum++;
    }
    assertEquals(seqNum, 83);
    fastaReader.close();
    inStream.close();
  }
 @Test(expected = FileNotFoundException.class)
 public void testSimpleSubstitutionMatrixNotFound() throws FileNotFoundException {
   new SimpleSubstitutionMatrix<AminoAcidCompound>(
       AminoAcidCompoundSet.getAminoAcidCompoundSet(), new File("blosum63.txt"));
 }