コード例 #1
0
  private static int readSeqLineP(
      PushbackReader in,
      int s,
      int pos,
      int maxPos,
      char[][] data,
      String[] identifiers,
      FormattedInput fi,
      int maxLabelLength,
      int lineLength)
      throws IOException {
    if (pos == 0) {
      identifiers[s] = fi.readLabel(in, maxLabelLength).toUpperCase();
    }

    if (s == 0) {
      String thisLine = fi.readLine(in, false);

      if (thisLine.length() > maxPos - pos) {
        lineLength = maxPos - pos;
      } else {
        lineLength = thisLine.length();
      }

      for (int i = 0; i < lineLength; i++) {
        data[0][pos + i] = thisLine.charAt(i);
        if (data[0][pos + i] == '.') {
          throw new IllegalArgumentException(
              "Copy character (.) in first sequence not allowed (pos. " + (i + pos + 1) + ")");
        }
      }
    } else {
      for (int i = 0; i < lineLength; i++) {
        data[s][pos + i] = (char) fi.readNextChar(in);
        if (data[s][pos + i] == '.') {
          data[s][pos + i] = data[0][pos + i];
        }
      }
      fi.nextLine(in);
    }
    return lineLength;
  }
コード例 #2
0
  // Read alignment (in PHYLIP 3.4 INTERLEAVED or PHYLIP SEQUENTIAL format)
  private static GenotypeTable readPHYLIP(PushbackReader in, int maxLabelLength) {
    FormattedInput fi = FormattedInput.getInstance();
    TaxaList idGroup;
    int numSeqs = 0, numSites = 0, lineLength = 0;
    char[][] data = null;
    int c, pos = 0, seq = 0;

    try {
      // Parse PHYLIP header line
      numSeqs = fi.readInt(in);
      numSites = fi.readInt(in);

      String[] identifiers = new String[numSeqs];
      data = new char[numSeqs][numSites];

      // Determine whether sequences are in INTERLEAVED
      // or in sequential format
      String header = fi.readLine(in, false);

      boolean interleaved = true;

      if (header.length() > 0) {
        if (header.charAt(0) == 'S') {
          interleaved = false;
        }
      }

      if (interleaved) // PHYLIP INTERLEAVED
      {

        // Reading data
        while (pos < numSites) {
          // Go to next block
          c = fi.readNextChar(in);
          in.unread(c);

          for (seq = 0; seq < numSeqs; seq++) {
            lineLength =
                readSeqLineP(
                    in, seq, pos, numSites, data, identifiers, fi, maxLabelLength, lineLength);
          }
          pos += lineLength;
        }
      } else // PHYLIP SEQUENTIAL
      {
        // System.out.println("PHYLIP SEQUENTIAL");

        for (seq = 0; seq < numSeqs; seq++) {
          // Go to next block
          c = fi.readNextChar(in);
          in.unread(c);

          // Read label
          identifiers[seq] = fi.readLabel(in, maxLabelLength).toUpperCase();

          // Read sequences
          for (pos = 0; pos < numSites; pos++) {
            data[seq][pos] = (char) fi.readNextChar(in);

            if (data[0][pos] == '.') {
              if (seq == 0) {
                throw new IllegalArgumentException(
                    "Copy character (.) in first sequence not allowed (pos. " + (pos + 1) + ")");
              } else {
                data[seq][pos] = data[0][pos];
              }
            }
          }
        }
      }
      idGroup = new TaxaListBuilder().addAll(identifiers).build();
    } catch (IOException e) {
      throw new IllegalArgumentException(
          "IO error after pos. " + (pos + 1) + ", seq. " + (seq + 1));
    }
    String[] s = new String[numSeqs];
    for (int i = 0; i < numSeqs; i++) {
      s[i] = (new String(data[i])).toUpperCase();
    }
    // SimpleAlignment saa = SimpleAlignment.getInstance(idGroup, s, new Nucleotides());
    String[] sites = new String[numSites];
    int[] positions = new int[numSites];
    for (int i = 0; i < numSites; i++) {
      positions[i] = i;
      sites[i] = Integer.toString(i);
    }

    GenotypeCallTable genotype =
        GenotypeCallTableBuilder.getUnphasedNucleotideGenotypeBuilder(numSeqs, numSites)
            .setBases(s)
            .build();

    return GenotypeTableBuilder.getInstance(
        genotype, PositionListBuilder.getInstance(numSites), idGroup);
  }