public String translate(Sequence s, GeneticCode gncode) { String AA = ""; Sequence n = new Sequence(); n.setSequence(s.getSequence().toUpperCase()); int pos = 0; boolean found = false; // --Find start codon // while(!found&&pos<n.getLen()-2) { // String start=n.getSequence().substring(pos, pos+3); // if (gncode.getStart_code().get(start)!=null) { // AA+=""+gncode.getStart_code().get(start); // pos+=3; // found=true; // } // pos++; // } // --return empty string if not found // if (!found) { // Config.log("No Start"); // return AA; // } pos = start; // --Continue n.Remove(0, pos); // remove till start String codon = n.Remove(0, 3); // Config.log(n.getSequence()); while (!codon.isEmpty() && n.getLen() > 0) { // Config.log(codon+" "+gncode.getCode().get(codon)); String aa = gncode.getCode().get(codon); // Config.log(aa); if (aa != null) { AA += aa; } else { if (codon.length() == 3) AA += "-"; } codon = n.Remove(0, 3); } // --Remove ending "*" if found if (AA.endsWith("*")) { AA = AA.substring(0, AA.length() - 1); } return AA; }
/** * We read file as fasta but we group sequences by iteration * * @param filename * @return */ @Override public boolean loadSequences(String filename) { this.name = filename; try { seq.clear(); // Clear the sequence vector and open the file BufferedReader br = new BufferedReader(new FileReader(new File(filename))); // VARIABLES boolean sequenceMode = false; Sequence tmp = new Sequence(); // Temp sequence StringBuilder tmpsequence = new StringBuilder(); // Temp sequence string String stri = ""; // Temp read line // Read the file in a buffer an parse at the same time // Process :: We read like a fasta file Config.log("Reading fasta:" + filename + ":"); int count = 0; String buffer_iteration = ""; // Counter for iterations; Pattern iter = Pattern.compile("iterations = ([0-9]*)"); while (br.ready()) { stri = br.readLine(); Matcher m = iter.matcher(stri); if (m.find()) { // --Save current Alignment and add to group if (!buffer_iteration.isEmpty()) { // --Replace name if (getNbSequence() > 0) { for (Sequence s : getSequences()) { try { String sname = (s.getName().startsWith("AZ") ? s.getName().substring(2) : s.getName()); int original_sequence_id = Integer.valueOf(sname); InfoSequence info = df.getInfoSequence(original_sequence_id); // System.out.println(info); s.setName(info.getName()); s.setGi(info.getGi()); s.setAccession(info.getAccession()); s.setAccession_referee(info.getAccession_referee()); s.setAbbreviate(info.getAbbreviate()); s.setSequence_type(info.getSequence_type()); s.setOriginal_id(original_sequence_id); // System.out.println(s.getAbbreviate()); } catch (Exception e) { } } this.setName("Iterations_" + m.group(1) + "_" + name); this.setNote("Created on " + Util.returnCurrentDateAndTime()); this.saveToDatabase(); this.setId(0); this.getSequences().removeAllElements(); } buffer_iteration = stri; } if (sequenceMode && (stri.equals("") || stri.startsWith(">"))) { tmp.setSequence(tmpsequence.toString()); tmp.loadInfoFromName(); // Add sequence if not empty if (tmp.getSequence().length() > 0) seq.add(tmp); tmp = new Sequence(); tmpsequence = new StringBuilder(); sequenceMode = false; } if (sequenceMode) { tmpsequence.append(stri); count++; if (count % 10000 == 0) Config.log("*"); } if (stri.startsWith(">")) { // We have a fasta definition tmp.setName(stri.substring(1)); // remove > sequenceMode = true; } } // end while // Add last read if (sequenceMode) { tmp.setSequence(tmpsequence.toString()); tmp.loadInfoFromName(); if (tmp.getSequence().length() > 0) seq.add(tmp); tmp = new Sequence(); } br.close(); if (seq.size() == 0) { if (isDebug()) Config.log("not fasta..."); return false; } } } catch (Exception e) { e.printStackTrace(); Config.log("Error with " + filename); return false; } if (isDebug()) Config.log("done"); return true; }
public String aa_to_dna(Sequence original_dna, Sequence protein, GeneticCode gncode) { LinkedList<String> codons = new LinkedList<String>(); String AA = ""; String dna = ""; Sequence n = new Sequence(); n.setSequence(original_dna.getSequence().toUpperCase()); int pos = 0; boolean found = false; // --Find start codon // while(!found&&pos<n.getLen()-2) { // String start=n.getSequence().substring(pos, pos+3); // if (gncode.getStart_code().get(start)!=null) { // AA+=""+gncode.getStart_code().get(start); // codons.add(start); // pos+=3; // found=true; // } // pos++; // } // --return empty string if not found // if (!found) { // Config.log("No Start"); // return AA; // } pos = start; // --MAKE THE ORIGINAL TRANSLATION n.Remove(0, pos); // remove till start String codon = n.Remove(0, 3); // --Debug // Config.log(n.getSequence()); while (!codon.isEmpty() && n.getLen() > 0) { // Config.log(codon+" "+gncode.getCode().get(codon)); String aa = gncode.getCode().get(codon); // --Debug // Config.log(aa); if (aa != null) { AA += aa; codons.add(codon); } else { if (codon.length() == 3) AA += "-"; } codon = n.Remove(0, 3); } // --MAKE THE REAL cDNA for (int i = 0; i < protein.getLen(); i++) { char c = protein.getSequence().charAt(i); // Config.log(c); if (c == '-' || c == '.') { dna += "---"; } else { // --remove stop codon String codon_to_add = codons.poll(); if (codon_to_add != null) { if (gncode.getCode().get(codon_to_add).equals("*")) codon_to_add = "---"; dna += codon_to_add; } } } return dna; }