Beispiel #1
0
  @Override
  public boolean hasNextChar() {

    if (nvalid) return true;

    while (true) {
      if (protein == null) {
        if (!getProtein()) break;
      }

      // BioJava indexes from 1
      if (position > protein.getLength()) {
        protein = null;
        continue;
      }

      AminoAcidCompound aa = protein.getCompoundAt(position++);

      String base = aa.getUpperedBase();
      int chind = alphabet.indexOf(base);
      if (chind < 0) {
        System.out.println("Bad aa " + base);
        continue;
      }

      nextch = base.charAt(0);
      nvalid = true;
      return true;
    }

    return false;
  }
Beispiel #2
0
  public static void main(String[] args) {
    ProteinSequence proteinSequence = new ProteinSequence("ARNDCEQGHILKMFPSTWYVBZJX");
    System.out.println(proteinSequence.toString());

    StringProxySequenceReader<AminoAcidCompound> sequenceStringProxyLoader =
        new StringProxySequenceReader<AminoAcidCompound>(
            "XRNDCEQGHILKMFPSTWYVBZJA", AminoAcidCompoundSet.getAminoAcidCompoundSet());
    ProteinSequence proteinSequenceFromProxy = new ProteinSequence(sequenceStringProxyLoader);
    System.out.println(proteinSequenceFromProxy.toString());
  }
  public static void main(String[] args) {

    try {
      UniprotProxySequenceReader<AminoAcidCompound> uniprotSequence =
          new UniprotProxySequenceReader<AminoAcidCompound>(
              "YA745_GIBZE", AminoAcidCompoundSet.getAminoAcidCompoundSet());
      ProteinSequence proteinSequence = new ProteinSequence(uniprotSequence);
      logger.info("Accession: {}", proteinSequence.getAccession().getID());
      logger.info("Sequence: {}", proteinSequence.getSequenceAsString());
    } catch (Exception e) {
      logger.error("Exception: ", e);
    }
  }
Beispiel #4
0
 /**
  * By stworzyć drzewo należy mieć sekwencje protein w formie listy, w której wszystkie sekwencje
  * są tej samej długości. Teoria była na wykładzie, tutaj tylko ważne jest, że ta f-cja robi
  * odpowiednie przesunięcia i wkłada do odpowiedniej struktury.
  */
 private void alignSequences() {
   multipleSequenceAlignment = new MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound>();
   Profile<ProteinSequence, AminoAcidCompound> profile =
       Alignments.getMultipleSequenceAlignment(sequences);
   List<AlignedSequence<ProteinSequence, AminoAcidCompound>> l = profile.getAlignedSequences();
   ProteinSequence p;
   for (int i = 0; i < l.size(); i++) {
     Sequence<AminoAcidCompound> s = l.get(i);
     p = new ProteinSequence(s.getSequenceAsString(), s.getCompoundSet());
     p.setAccession(s.getAccession());
     multipleSequenceAlignment.addAlignedSequence(p);
   }
 }
Beispiel #5
0
  public static void writeProteinSequence(ProteinSequence sequence, DataOutput out)
      throws IOException {

    String string = sequence.getSequenceAsString();

    out.writeInt(string.length());

    for (int i = 0; i < string.length(); i += 3) {
      String substring = string.substring(i, Math.min(i + 3, string.length()));
      int value = 0;
      for (int j = 0; j < 3; j++) {
        value += (j < substring.length() ? getSerializedAminoAcid(substring.charAt(j)) : 0);
        if (j != 2) {
          value <<= 5;
        }
      }
      out.writeShort(value);
    }
  }
  /** @param args */
  public static void main(String[] args) {

    System.out.println("parseHeader");
    String header = "";
    ProteinSequence sequence = new ProteinSequence("");
    GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound> instance =
        new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>();

    header = "gi|gi-number|gb|accession|locus";
    instance.parseHeader(header, sequence);
    System.out.println("accession" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.GENBANK);

    header = "gi|gi-number|emb|accession|locus";
    instance.parseHeader(header, sequence);
    System.out.println("accession" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.ENA);

    header = "gi|gi-number|dbj|accession|locus";
    instance.parseHeader(header, sequence);
    System.out.println("accession" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.DDBJ);

    header = "pir||entry";
    instance.parseHeader(header, sequence);
    System.out.println("entry" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.NBRF);

    header = "prf||name";
    instance.parseHeader(header, sequence);
    System.out.println("name" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.PRF);

    header = "sp|accession|name";
    instance.parseHeader(header, sequence);
    System.out.println("accession" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.UNIPROT);

    header = "pdb|entry|chain";
    instance.parseHeader(header, sequence);
    System.out.println("entry:chain" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.PDB1);

    header = "entry:chain|PDBID|CHAIN|SEQUENCE";
    instance.parseHeader(header, sequence);
    System.out.println("entry:chain" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.PDB2);
    header = "PDB:1ECY_A mol:protein length:142  ECOTIN";
    instance.parseHeader(header, sequence);
    System.out.println("1ECY_A" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.PDBe);

    header = "pat|country|number";
    instance.parseHeader(header, sequence);
    System.out.println("number" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.PATENTS);

    header = "bbs|number";
    instance.parseHeader(header, sequence);
    System.out.println("number" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.GENINFO);

    header = "gnl|database|identifier";
    instance.parseHeader(header, sequence);
    System.out.println("identifier" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.GENERAL);

    header = "ref|accession|locus";

    instance.parseHeader(header, sequence);
    System.out.println("accession" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.NCBI);

    header = "lcl|identifier";
    instance.parseHeader(header, sequence);
    System.out.println("identifier" + "=" + sequence.getAccession());
    System.out.println(sequence.getAccession().getDataSource() + "=" + DataSource.LOCAL);
  }