private static int readSeqLineP( PushbackReader in, int s, int pos, int maxPos, char[][] data, String[] identifiers, FormattedInput fi, int maxLabelLength, int lineLength) throws IOException { if (pos == 0) { identifiers[s] = fi.readLabel(in, maxLabelLength).toUpperCase(); } if (s == 0) { String thisLine = fi.readLine(in, false); if (thisLine.length() > maxPos - pos) { lineLength = maxPos - pos; } else { lineLength = thisLine.length(); } for (int i = 0; i < lineLength; i++) { data[0][pos + i] = thisLine.charAt(i); if (data[0][pos + i] == '.') { throw new IllegalArgumentException( "Copy character (.) in first sequence not allowed (pos. " + (i + pos + 1) + ")"); } } } else { for (int i = 0; i < lineLength; i++) { data[s][pos + i] = (char) fi.readNextChar(in); if (data[s][pos + i] == '.') { data[s][pos + i] = data[0][pos + i]; } } fi.nextLine(in); } return lineLength; }
// Read alignment (in PHYLIP 3.4 INTERLEAVED or PHYLIP SEQUENTIAL format) private static GenotypeTable readPHYLIP(PushbackReader in, int maxLabelLength) { FormattedInput fi = FormattedInput.getInstance(); TaxaList idGroup; int numSeqs = 0, numSites = 0, lineLength = 0; char[][] data = null; int c, pos = 0, seq = 0; try { // Parse PHYLIP header line numSeqs = fi.readInt(in); numSites = fi.readInt(in); String[] identifiers = new String[numSeqs]; data = new char[numSeqs][numSites]; // Determine whether sequences are in INTERLEAVED // or in sequential format String header = fi.readLine(in, false); boolean interleaved = true; if (header.length() > 0) { if (header.charAt(0) == 'S') { interleaved = false; } } if (interleaved) // PHYLIP INTERLEAVED { // Reading data while (pos < numSites) { // Go to next block c = fi.readNextChar(in); in.unread(c); for (seq = 0; seq < numSeqs; seq++) { lineLength = readSeqLineP( in, seq, pos, numSites, data, identifiers, fi, maxLabelLength, lineLength); } pos += lineLength; } } else // PHYLIP SEQUENTIAL { // System.out.println("PHYLIP SEQUENTIAL"); for (seq = 0; seq < numSeqs; seq++) { // Go to next block c = fi.readNextChar(in); in.unread(c); // Read label identifiers[seq] = fi.readLabel(in, maxLabelLength).toUpperCase(); // Read sequences for (pos = 0; pos < numSites; pos++) { data[seq][pos] = (char) fi.readNextChar(in); if (data[0][pos] == '.') { if (seq == 0) { throw new IllegalArgumentException( "Copy character (.) in first sequence not allowed (pos. " + (pos + 1) + ")"); } else { data[seq][pos] = data[0][pos]; } } } } } idGroup = new TaxaListBuilder().addAll(identifiers).build(); } catch (IOException e) { throw new IllegalArgumentException( "IO error after pos. " + (pos + 1) + ", seq. " + (seq + 1)); } String[] s = new String[numSeqs]; for (int i = 0; i < numSeqs; i++) { s[i] = (new String(data[i])).toUpperCase(); } // SimpleAlignment saa = SimpleAlignment.getInstance(idGroup, s, new Nucleotides()); String[] sites = new String[numSites]; int[] positions = new int[numSites]; for (int i = 0; i < numSites; i++) { positions[i] = i; sites[i] = Integer.toString(i); } GenotypeCallTable genotype = GenotypeCallTableBuilder.getUnphasedNucleotideGenotypeBuilder(numSeqs, numSites) .setBases(s) .build(); return GenotypeTableBuilder.getInstance( genotype, PositionListBuilder.getInstance(numSites), idGroup); }