/** * Parse the header and set the values in the sequence * * @param header * @param sequence */ public void parseHeader(String header, S sequence) { // uniptrot // tr|Q0TET7|Q0TET7_ECOL5 Putative uncharacterized protein OS=Escherichia coli O6:K15:H31 // (strain 536 / UPEC) GN=ECP_2553 PE=4 SV=1 sequence.setOriginalHeader(header); String[] data = getHeaderValues(header); if (data.length == 1) { sequence.setAccession(new AccessionID(data[0])); } else if (data[0].equalsIgnoreCase("sp") || data[0].equalsIgnoreCase("tr")) { if (data[0].equalsIgnoreCase("sp")) { sequence.setAnnotationType(AnnotationType.CURATED); } else { sequence.setAnnotationType(AnnotationType.PREDICTED); } sequence.setAccession(new AccessionID(data[1], DataSource.UNIPROT)); if (data.length > 1) { sequence.setDescription(data[2]); } } else if (data[0].equalsIgnoreCase("gi")) { DataSource giSource = DataSource.UNKNOWN; if (data.length >= 3) { if (data[2].equalsIgnoreCase("gb")) { giSource = DataSource.GENBANK; } else if (data[2].equalsIgnoreCase("emb")) { giSource = DataSource.ENA; } else if (data[2].equalsIgnoreCase("dbj")) { giSource = DataSource.DDBJ; } sequence.setAccession(new AccessionID(data[3], giSource)); } else { sequence.setAccession(new AccessionID(header, giSource)); } } else if (data[0].equalsIgnoreCase("pir")) { sequence.setAccession(new AccessionID(data[2], DataSource.NBRF)); } else if (data[0].equalsIgnoreCase("prf")) { sequence.setAccession(new AccessionID(data[2], DataSource.PRF)); } else if (data[0].equalsIgnoreCase("pdb")) { sequence.setAccession(new AccessionID(data[1] + ":" + data[2], DataSource.PDB1)); } else if (data[0].startsWith("PDB")) { String[] pdbe = data[0].split(" "); String[] pdbaccession = pdbe[0].split(":"); sequence.setAccession(new AccessionID(pdbaccession[1], DataSource.PDBe)); } else if (data[0].indexOf(":") != -1 && data.length > 1 && data[1].equals("PDBID")) { sequence.setAccession(new AccessionID(data[0], DataSource.PDB2)); } else if (data[0].equalsIgnoreCase("pat")) { sequence.setAccession(new AccessionID(data[2], DataSource.PATENTS)); } else if (data[0].equalsIgnoreCase("bbs")) { sequence.setAccession(new AccessionID(data[1], DataSource.GENINFO)); } else if (data[0].equalsIgnoreCase("gnl")) { sequence.setAccession(new AccessionID(data[2], DataSource.GENERAL)); } else if (data[0].equalsIgnoreCase("ref")) { sequence.setAccession(new AccessionID(data[1], DataSource.NCBI)); } else if (data[0].equalsIgnoreCase("lcl")) { sequence.setAccession(new AccessionID(data[1], DataSource.LOCAL)); } else { sequence.setAccession( new AccessionID( data[0])); // avoid the common problem of picking up all the comments original header // in getOriginalHeader } }
/** * Parse the header and set the values in the sequence * * @param header * @param sequence */ public void parseHeader(String header, S sequence) { sequence.setOriginalHeader(header); sequence.setAccession(new AccessionID(accession)); sequence.setDescription(description); }