예제 #1
0
  public MultipleAlignment deepClone(int ntxinc) {
    MultipleAlignment ma = new MultipleAlignment(nTaxon, seqLen, nTaxon + ntxinc);

    System.arraycopy(names, 0, ma.names, 0, names.length);
    System.arraycopy(data, 0, ma.data, 0, data.length);

    ma.buildNameIndex();

    return ma;
  }
예제 #2
0
  public static MultipleAlignment loadPhylip(Reader rarg) {
    try {
      BufferedReader r = new BufferedReader(rarg);

      String header = r.readLine();
      StringTokenizer st = new StringTokenizer(header);

      int nTaxon = Integer.parseInt(st.nextToken());
      int seqLen = Integer.parseInt(st.nextToken());

      MultipleAlignment ma = new MultipleAlignment(nTaxon, seqLen);

      for (int i = 0; i < nTaxon; i++) {
        String line = r.readLine();
        if (line == null) {
          throw new RuntimeException("cannot read next line in " + "input file");
        }

        st = new StringTokenizer(line);
        String name = st.nextToken();
        String data = st.nextToken();

        if (data.length() != seqLen) {
          throw new RuntimeException("wrong sequence length: " + data.length() + " vs " + seqLen);
        }
        ma.names[i] = name;
        ma.data[i] = data.toUpperCase();
      }

      ma.buildNameIndex();

      r.close();
      return ma;

    } catch (IOException ex) {
      Logger.getLogger(MultipleAlignment.class.getName()).log(Level.SEVERE, null, ex);
      throw new RuntimeException("bailing out");
    }
  }
예제 #3
0
  public static void main(String args[]) {

    if (false) {
      // MultipleAlignment ma = MultipleAlignment.loadPhylipShitty(new File(
      // "/space/raxml/VINCENT/DATA/500"));
      long time1 = System.currentTimeMillis();
      MultipleAlignment mar =
          MultipleAlignment.loadPhylip(new File("/space/raxml/VINCENT/DATA/150"));

      MultipleAlignment ma =
          MultipleAlignment.loadPhylipShitty(new File("/space/raxml/VINCENT/DATA/150"));
      System.out.printf(
          "parse done: %d %s\n", System.currentTimeMillis() - time1, compare(ma, mar));

      //   ma.print();
    } else {
      long time1 = System.currentTimeMillis();
      MultipleAlignment ma =
          MultipleAlignment.loadPhylipShitty(new File("/space/raxml/VINCENT/DATA/500"));
      System.out.printf("parse done: %d\n", System.currentTimeMillis() - time1);
    }
  }
예제 #4
0
  public static MultipleAlignment loadPhylipShitty(File file) {
    try {

      BufferedReader r = new BufferedReader(new FileReader(file));

      String header = r.readLine();
      StringTokenizer st = new StringTokenizer(header);

      final int nTaxon = Integer.parseInt(st.nextToken());
      final int seqLen = Integer.parseInt(st.nextToken());

      MultipleAlignment ma = new MultipleAlignment(nTaxon, seqLen);

      ma.nameMap = new HashMap<String, Integer>();

      String line = null;
      boolean keepline = false;

      mainloop:
      while (true) {
        for (int i = 0; i < nTaxon; i++) {
          if (!keepline) {
            line = r.readLine();
          }
          keepline = false;

          if (line == null) {
            break mainloop;
          }

          String nd[] = readLine(line);

          String name = nd[0];
          String data = nd[1];

          if (name != null) {
            assert (!ma.nameMap.containsKey(name));

            ma.names[i] = name;
            ma.data[i] = data;
            ma.nameMap.put(name, i);

          } else {
            ma.data[i] += data;
          }
        }

        //
        // look if there are non-empty lines following the last block.
        // keepline is used to implement a crude form of 'unreadline'.
        //
        keepline = false;
        outer:
        while ((line = r.readLine()) != null) {
          // System.out.printf( "look: '%s'\n", line );
          for (int i = 0; i < line.length(); i++) {
            if (!Character.isWhitespace(line.charAt(i))) {
              keepline = true;
              break outer;
            }
          }
        }

        if (!keepline) {
          break;
        }
      }

      // sanity check on sequence length
      for (int i = 0; i < nTaxon; i++) {
        if (ma.nameMap.get(ma.names[i]) != i) {
          throw new RuntimeException("quirk in name index");
        }

        if (ma.data[i].length() != seqLen) {
          throw new RuntimeException("wrong sequence length ");
        }
      }

      r.close();
      return ma;

    } catch (IOException ex) {
      Logger.getLogger(MultipleAlignment.class.getName()).log(Level.SEVERE, null, ex);
      throw new RuntimeException("bailing out");
    }
  }