public MultipleAlignment deepClone(int ntxinc) { MultipleAlignment ma = new MultipleAlignment(nTaxon, seqLen, nTaxon + ntxinc); System.arraycopy(names, 0, ma.names, 0, names.length); System.arraycopy(data, 0, ma.data, 0, data.length); ma.buildNameIndex(); return ma; }
public static MultipleAlignment loadPhylip(Reader rarg) { try { BufferedReader r = new BufferedReader(rarg); String header = r.readLine(); StringTokenizer st = new StringTokenizer(header); int nTaxon = Integer.parseInt(st.nextToken()); int seqLen = Integer.parseInt(st.nextToken()); MultipleAlignment ma = new MultipleAlignment(nTaxon, seqLen); for (int i = 0; i < nTaxon; i++) { String line = r.readLine(); if (line == null) { throw new RuntimeException("cannot read next line in " + "input file"); } st = new StringTokenizer(line); String name = st.nextToken(); String data = st.nextToken(); if (data.length() != seqLen) { throw new RuntimeException("wrong sequence length: " + data.length() + " vs " + seqLen); } ma.names[i] = name; ma.data[i] = data.toUpperCase(); } ma.buildNameIndex(); r.close(); return ma; } catch (IOException ex) { Logger.getLogger(MultipleAlignment.class.getName()).log(Level.SEVERE, null, ex); throw new RuntimeException("bailing out"); } }
public static void main(String args[]) { if (false) { // MultipleAlignment ma = MultipleAlignment.loadPhylipShitty(new File( // "/space/raxml/VINCENT/DATA/500")); long time1 = System.currentTimeMillis(); MultipleAlignment mar = MultipleAlignment.loadPhylip(new File("/space/raxml/VINCENT/DATA/150")); MultipleAlignment ma = MultipleAlignment.loadPhylipShitty(new File("/space/raxml/VINCENT/DATA/150")); System.out.printf( "parse done: %d %s\n", System.currentTimeMillis() - time1, compare(ma, mar)); // ma.print(); } else { long time1 = System.currentTimeMillis(); MultipleAlignment ma = MultipleAlignment.loadPhylipShitty(new File("/space/raxml/VINCENT/DATA/500")); System.out.printf("parse done: %d\n", System.currentTimeMillis() - time1); } }
public static MultipleAlignment loadPhylipShitty(File file) { try { BufferedReader r = new BufferedReader(new FileReader(file)); String header = r.readLine(); StringTokenizer st = new StringTokenizer(header); final int nTaxon = Integer.parseInt(st.nextToken()); final int seqLen = Integer.parseInt(st.nextToken()); MultipleAlignment ma = new MultipleAlignment(nTaxon, seqLen); ma.nameMap = new HashMap<String, Integer>(); String line = null; boolean keepline = false; mainloop: while (true) { for (int i = 0; i < nTaxon; i++) { if (!keepline) { line = r.readLine(); } keepline = false; if (line == null) { break mainloop; } String nd[] = readLine(line); String name = nd[0]; String data = nd[1]; if (name != null) { assert (!ma.nameMap.containsKey(name)); ma.names[i] = name; ma.data[i] = data; ma.nameMap.put(name, i); } else { ma.data[i] += data; } } // // look if there are non-empty lines following the last block. // keepline is used to implement a crude form of 'unreadline'. // keepline = false; outer: while ((line = r.readLine()) != null) { // System.out.printf( "look: '%s'\n", line ); for (int i = 0; i < line.length(); i++) { if (!Character.isWhitespace(line.charAt(i))) { keepline = true; break outer; } } } if (!keepline) { break; } } // sanity check on sequence length for (int i = 0; i < nTaxon; i++) { if (ma.nameMap.get(ma.names[i]) != i) { throw new RuntimeException("quirk in name index"); } if (ma.data[i].length() != seqLen) { throw new RuntimeException("wrong sequence length "); } } r.close(); return ma; } catch (IOException ex) { Logger.getLogger(MultipleAlignment.class.getName()).log(Level.SEVERE, null, ex); throw new RuntimeException("bailing out"); } }