Example #1
0
  /** Test of process method, of class FastaReader. */
  @Test
  public void testProcess() throws Exception {
    logger.info("process");
    InputStream inStream = this.getClass().getResourceAsStream("/PF00104_small.fasta");
    assertNotNull(inStream);

    FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
        new FastaReader<ProteinSequence, AminoAcidCompound>(
            inStream,
            new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
            new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
    LinkedHashMap<String, ProteinSequence> proteinSequences = fastaReader.process();
    inStream.close();

    // Should have 282 sequences
    // logger.debug("Expecting 283 got " + proteinSequences.size());
    assertEquals(proteinSequences.size(), 283);

    int seqNum = 0;
    for (String id : proteinSequences.keySet()) {
      ProteinSequence proteinSequence = proteinSequences.get(id);
      switch (seqNum) {
        case 0:
          assertEquals(proteinSequence.getAccession().getID(), "A2D504_ATEGE/1-46");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "-----------------FK-N----LP-LED----------------Q----ITL--IQY-----------SWM----------------------CL-SSFA------LSWRSYK---HTNSQFLYFAPDLVF-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------");
          break;
        case 281:
          // logger.debug("Get Accession: {}", proteinSequence.getAccession());
          // logger.debug("Get Protein Sequence: {}", proteinSequence.getSequenceAsString());
          assertEquals(proteinSequence.getAccession().getID(), "Q9PU76_CRONI/141-323");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "VETVTELTEFAKSI-PGFS-N----LD-LND----------------Q----VTL--LKY-----------GVY----------------------EA-IFAM------LASVMNK---DGMPVAYGNGFITRE------------------------------------------------------------------------------------------------------------------------------------------------------------FLKSLRKPFCDIMEPKFDFA-MKF-NSL-E-LDDSDI--------------------SLFVA-AIIC-CGDRPG-------------------------------------------LVNV--GHIEKMQESIVHVLKL-H-----LQN---------NH---PD----------------------------DI------F--------LFP-KLLQKMAD-LRQLV-----------------TEH-AQLV--QIIKK---TESDAHLHPLL-------QEI---");
          break;
        case 282:
          assertEquals(proteinSequence.getAccession().getID(), "Q98SJ1_CHICK/15-61");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------Q-----------------NW------Q--------RFY-QLTKLLDS-MHDVV-----------------ENL-LSFC--FQTFLDKSM--SIEFPEML-------AEI---");
          break;
      }
      seqNum++;
    }
    assertEquals(seqNum, 283);
  }
  /** Test of process method, of class GenbankReader. */
  @Test
  public void testProcess() throws Throwable {
    /*
     * Method 1: With the GenbankProxySequenceReader
     */
    // Try with the GenbankProxySequenceReader
    GenbankProxySequenceReader<AminoAcidCompound> genbankProteinReader =
        new GenbankProxySequenceReader<AminoAcidCompound>(
            System.getProperty("java.io.tmpdir"),
            "NP_000257",
            AminoAcidCompoundSet.getAminoAcidCompoundSet());
    ProteinSequence proteinSequence = new ProteinSequence(genbankProteinReader);
    genbankProteinReader
        .getHeaderParser()
        .parseHeader(genbankProteinReader.getHeader(), proteinSequence);
    logger.info(
        "Sequence({},{}) = {}...",
        proteinSequence.getAccession(),
        proteinSequence.getLength(),
        proteinSequence.getSequenceAsString().substring(0, 10));

    GenbankProxySequenceReader<NucleotideCompound> genbankDNAReader =
        new GenbankProxySequenceReader<NucleotideCompound>(
            System.getProperty("java.io.tmpdir"), "NM_001126", DNACompoundSet.getDNACompoundSet());
    DNASequence dnaSequence = new DNASequence(genbankDNAReader);
    genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence);
    logger.info(
        "Sequence({},{}) = {}...",
        dnaSequence.getAccession(),
        dnaSequence.getLength(),
        dnaSequence.getSequenceAsString().substring(0, 10));
    /*
     * Method 2: With the GenbankReaderHelper
     */
    // Try with the GenbankReaderHelper
    ClasspathResource dnaResource = new ClasspathResource("NM_000266.gb", true);
    // File dnaFile = new File("src/test/resources/NM_000266.gb");
    // File protFile = new File("src/test/resources/BondFeature.gb");
    ClasspathResource protResource = new ClasspathResource("BondFeature.gb");

    LinkedHashMap<String, DNASequence> dnaSequences =
        GenbankReaderHelper.readGenbankDNASequence(dnaResource.getInputStream());
    for (DNASequence sequence : dnaSequences.values()) {
      logger.info("DNA Sequence: {}", sequence.getSequenceAsString());
    }

    LinkedHashMap<String, ProteinSequence> protSequences =
        GenbankReaderHelper.readGenbankProteinSequence(protResource.getInputStream());
    for (ProteinSequence sequence : protSequences.values()) {
      logger.info("Protein Sequence: {}", sequence.getSequenceAsString());
    }
    /*
     * Method 3: With the GenbankReader Object
     */
    // Try reading with the GanbankReader

    GenbankReader<DNASequence, NucleotideCompound> dnaReader =
        new GenbankReader<DNASequence, NucleotideCompound>(
            dnaResource.getInputStream(),
            new GenericGenbankHeaderParser<DNASequence, NucleotideCompound>(),
            new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));
    dnaSequences = dnaReader.process();

    logger.info("DNA Sequence: {}", dnaSequences);

    GenbankReader<ProteinSequence, AminoAcidCompound> protReader =
        new GenbankReader<ProteinSequence, AminoAcidCompound>(
            protResource.getInputStream(),
            new GenericGenbankHeaderParser<ProteinSequence, AminoAcidCompound>(),
            new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
    protSequences = protReader.process();

    logger.info("Protein Sequence: {}", protSequences);
  }
Example #3
0
  @Test
  public void processIntTest() throws Exception {
    logger.info("process(int)");
    InputStream inStream = this.getClass().getResourceAsStream("/PF00104_small.fasta");
    assertNotNull(inStream);
    FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
        new FastaReader<ProteinSequence, AminoAcidCompound>(
            inStream,
            new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
            new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
    LinkedHashMap<String, ProteinSequence> proteinSequences = fastaReader.process(200);

    // Should have 200 sequences
    // logger.debug("Expecting 200 got " + proteinSequences.size());
    assertEquals(proteinSequences.size(), 200);

    int seqNum = 0;
    for (String id : proteinSequences.keySet()) {
      ProteinSequence proteinSequence = proteinSequences.get(id);
      switch (seqNum) {
        case 0:
          assertEquals(proteinSequence.getAccession().getID(), "A2D504_ATEGE/1-46");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "-----------------FK-N----LP-LED----------------Q----ITL--IQY-----------SWM----------------------CL-SSFA------LSWRSYK---HTNSQFLYFAPDLVF-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------");
          break;
        case 199:
          assertEquals(proteinSequence.getAccession().getID(), "Q5F0P7_HUMAN/248-428");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "DRELVVIIGWAKHI-PGFS-S----LS-LGD----------------Q----MSL--LQS-----------AWM----------------------EI-LILG------IVYRSLP---YDDKLVYAEDYIMD-------------------------------------------------------------------------------------------------------------------------------------------------------------EEHSRLAGLLELYRAILQLV-RRY-KKL-K-VEKEEF--------------------VTLKA-LALA-NSDSMY-------------------------------------------IEDL--EAVQKLQDLLHEALQD-Y-----ELS---------QR---HE----------------------------EP------W--------RTG-KLLLTLPL-LRQTA-----------------AKA-VQHF--YSVKLQGKV--PMH--KLF-------LEM---");
          break;
      }
      seqNum++;
    }
    assertEquals(seqNum, 200);

    // Should have 83 sequences
    proteinSequences = fastaReader.process(200);
    assertEquals(proteinSequences.size(), 83);
    seqNum = 0;
    for (String id : proteinSequences.keySet()) {
      ProteinSequence proteinSequence = proteinSequences.get(id);
      switch (seqNum) {
        case 0:
          assertEquals(proteinSequence.getAccession().getID(), "RARA_CANFA/233-413");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "TKCIIKTVEFAKQL-PGFT-T----LT-IAD----------------Q----ITL--LKA-----------ACL----------------------DI-LILR------ICTRYTP---EQDTMTFSEGLTLN-------------------------------------------------------------------------------------------------------------------------------------------------------------RTQMHKAGFGPLTDLVFAFA-NQL-LPL-E-MDDAET--------------------GLLSA-ICLI-CGDRQD-------------------------------------------LEQP--DRVDMLQEPLLEALKV-Y-----VRK---------RR---PS----------------------------RP------H--------MFP-KMLMKITD-LRSIS-----------------AKG-AERV--ITLKMEIPG--SMP--PLI-------QEM---");
          break;
        case 81:
          // logger.debug(proteinSequence.getAccession());
          // logger.debug(proteinSequence.getSequenceAsString());
          assertEquals(proteinSequence.getAccession().getID(), "Q9PU76_CRONI/141-323");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "VETVTELTEFAKSI-PGFS-N----LD-LND----------------Q----VTL--LKY-----------GVY----------------------EA-IFAM------LASVMNK---DGMPVAYGNGFITRE------------------------------------------------------------------------------------------------------------------------------------------------------------FLKSLRKPFCDIMEPKFDFA-MKF-NSL-E-LDDSDI--------------------SLFVA-AIIC-CGDRPG-------------------------------------------LVNV--GHIEKMQESIVHVLKL-H-----LQN---------NH---PD----------------------------DI------F--------LFP-KLLQKMAD-LRQLV-----------------TEH-AQLV--QIIKK---TESDAHLHPLL-------QEI---");
          break;
        case 82:
          assertEquals(proteinSequence.getAccession().getID(), "Q98SJ1_CHICK/15-61");
          assertEquals(
              proteinSequence.getSequenceAsString(),
              "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------Q-----------------NW------Q--------RFY-QLTKLLDS-MHDVV-----------------ENL-LSFC--FQTFLDKSM--SIEFPEML-------AEI---");
          break;
      }
      seqNum++;
    }
    assertEquals(seqNum, 83);
    fastaReader.close();
    inStream.close();
  }