Ejemplo n.º 1
0
  /**
   * Parse the header and set the values in the sequence
   *
   * @param header
   * @param sequence
   */
  public void parseHeader(String header, S sequence) {
    // uniptrot
    // tr|Q0TET7|Q0TET7_ECOL5 Putative uncharacterized protein OS=Escherichia coli O6:K15:H31
    // (strain 536 / UPEC) GN=ECP_2553 PE=4 SV=1
    sequence.setOriginalHeader(header);
    String[] data = getHeaderValues(header);

    if (data.length == 1) {
      sequence.setAccession(new AccessionID(data[0]));
    } else if (data[0].equalsIgnoreCase("sp") || data[0].equalsIgnoreCase("tr")) {
      if (data[0].equalsIgnoreCase("sp")) {
        sequence.setAnnotationType(AnnotationType.CURATED);
      } else {
        sequence.setAnnotationType(AnnotationType.PREDICTED);
      }

      sequence.setAccession(new AccessionID(data[1], DataSource.UNIPROT));
      if (data.length > 1) {
        sequence.setDescription(data[2]);
      }

    } else if (data[0].equalsIgnoreCase("gi")) {
      DataSource giSource = DataSource.UNKNOWN;
      if (data.length >= 3) {
        if (data[2].equalsIgnoreCase("gb")) {
          giSource = DataSource.GENBANK;
        } else if (data[2].equalsIgnoreCase("emb")) {
          giSource = DataSource.ENA;
        } else if (data[2].equalsIgnoreCase("dbj")) {
          giSource = DataSource.DDBJ;
        }
        sequence.setAccession(new AccessionID(data[3], giSource));
      } else {
        sequence.setAccession(new AccessionID(header, giSource));
      }
    } else if (data[0].equalsIgnoreCase("pir")) {
      sequence.setAccession(new AccessionID(data[2], DataSource.NBRF));
    } else if (data[0].equalsIgnoreCase("prf")) {
      sequence.setAccession(new AccessionID(data[2], DataSource.PRF));
    } else if (data[0].equalsIgnoreCase("pdb")) {
      sequence.setAccession(new AccessionID(data[1] + ":" + data[2], DataSource.PDB1));
    } else if (data[0].startsWith("PDB")) {
      String[] pdbe = data[0].split(" ");
      String[] pdbaccession = pdbe[0].split(":");
      sequence.setAccession(new AccessionID(pdbaccession[1], DataSource.PDBe));
    } else if (data[0].indexOf(":") != -1 && data.length > 1 && data[1].equals("PDBID")) {
      sequence.setAccession(new AccessionID(data[0], DataSource.PDB2));
    } else if (data[0].equalsIgnoreCase("pat")) {
      sequence.setAccession(new AccessionID(data[2], DataSource.PATENTS));
    } else if (data[0].equalsIgnoreCase("bbs")) {
      sequence.setAccession(new AccessionID(data[1], DataSource.GENINFO));
    } else if (data[0].equalsIgnoreCase("gnl")) {
      sequence.setAccession(new AccessionID(data[2], DataSource.GENERAL));
    } else if (data[0].equalsIgnoreCase("ref")) {
      sequence.setAccession(new AccessionID(data[1], DataSource.NCBI));
    } else if (data[0].equalsIgnoreCase("lcl")) {
      sequence.setAccession(new AccessionID(data[1], DataSource.LOCAL));
    } else {
      sequence.setAccession(
          new AccessionID(
              data[0])); // avoid the common problem of picking up all the comments original header
      // in getOriginalHeader
    }
  }
 /**
  * Parse the header and set the values in the sequence
  *
  * @param header
  * @param sequence
  */
 public void parseHeader(String header, S sequence) {
   sequence.setOriginalHeader(header);
   sequence.setAccession(new AccessionID(accession));
   sequence.setDescription(description);
 }