@Override
 public List<MarkerDataType> transform(final DataTable inTable) {
   checkNotNull(this.minimalTableSpec, "Minimal DataTableSpec hat to be set");
   checkNotNull(this.inputTableSpec, "Input DataTableSpec hat to be set");
   checkNotNull(inTable, "InTable has to be set");
   List<MarkerDataType> markerData = Lists.newArrayListWithExpectedSize(1000);
   RowIterator iterator = inTable.iterator();
   while (iterator.hasNext()) {
     markerData.add(transformRow(iterator.next()));
   }
   return markerData;
 }
Ejemplo n.º 2
0
  /** {@inheritDoc} */
  @Override
  protected BufferedDataTable[] execute(
      final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    if (m_email.equals(DEFAULT_EMAIL)) {
      throw new Exception(
          "You must set a valid E-Mail for EBI to contact you in the event of problems with the service!");
    }
    int n_rows = inData[0].getRowCount();
    int seq_idx =
        inData[0].getSpec().findColumnIndex(((SettingsModelString) m_seq_col).getStringValue());
    int accsn_idx =
        inData[0].getSpec().findColumnIndex(((SettingsModelString) m_accsn_col).getStringValue());
    if (seq_idx < 0 || accsn_idx < 0) {
      throw new Exception("Cannot find columns... valid data?");
    }
    int done = 0;

    // create the output columns (raw format for use with R)
    DataTableSpec outputSpec = new DataTableSpec(inData[0].getDataTableSpec(), make_output_spec());
    BufferedDataContainer container = exec.createDataContainer(outputSpec, false, 0);

    // instantiate MUSCLE client
    MuscleClient cli = new MuscleClient();

    // each row is a separate MUSCLE job, the sequences are in one collection cell, the accessions
    // (IDs) in the other
    RowIterator it = inData[0].iterator();
    while (it.hasNext()) {
      DataRow r = it.next();
      ListCell seqs = (ListCell) r.getCell(seq_idx);
      ListCell accsns = (ListCell) r.getCell(accsn_idx);
      if (seqs.size() != accsns.size()) {
        throw new Exception(
            "Every sequence must have a corresponding accession: error at row "
                + r.getKey().getString());
      }
      if (seqs.size() < 1) {
        throw new Exception("Cannot MUSCLE zero sequences: error at row " + r.getKey().getString());
      }
      if (seqs.size() > 1000) {
        throw new Exception("Too many sequences in row " + r.getKey().getString());
      }
      // dummy a fake "FASTA" file (in memory) and then submit that to MUSCLE@EBI along with other
      // necessary parameters
      StringBuffer seq_as_fasta = new StringBuffer();
      for (int i = 0; i < seqs.size(); i++) {
        seq_as_fasta.append(">");
        seq_as_fasta.append(accsns.get(i).toString());
        seq_as_fasta.append("\n");
        seq_as_fasta.append(seqs.get(i).toString());
        seq_as_fasta.append("\n");
      }
      // System.err.println(seq_as_fasta);

      // lodge the muscle job and store the results in the output table
      InputParameters ip = new InputParameters();
      ip.setSequence(seq_as_fasta.toString());

      // start the job
      String jobId = cli.runApp(m_email.getStringValue(), r.getKey().getString(), ip);

      exec.checkCanceled();
      exec.setProgress(((double) done) / n_rows, "Executing " + jobId);
      Thread.sleep(20 * 1000); // 20 seconds
      waitForCompletion(cli, exec, jobId);
      done++;

      // process results and add them into the table...
      // 1. fasta alignment data
      byte[] bytes = cli.getSrvProxy().getResult(jobId, "aln-fasta", null);

      DataCell[] cells = new DataCell[3];
      cells[0] = new StringCell(jobId);

      // compute the base64 encoded phylip aligned sequences suitable for use by R's phangorn
      // package
      String fasta = new String(bytes);
      String ret = fasta2phylip(fasta);

      // it must be encoded (I chose base64) as it is common to both Java and R and it must be
      // encoded due to containing multiple lines, which confuses the CSV passed between KNIME and R
      String rk = r.getKey().getString();
      DataCell mac = AlignmentCellFactory.createCell(fasta, AlignmentType.AL_AA);
      if (mac instanceof MultiAlignmentCell) m_muscle_map.put(rk, (MultiAlignmentCell) mac);
      cells[1] = mac;

      bytes = cli.getSrvProxy().getResult(jobId, "out", null);
      cells[2] = new StringCell("<html><pre>" + new String(bytes));

      container.addRowToTable(new JoinedRow(r, new DefaultRow(r.getKey(), cells)));
    }
    container.close();
    BufferedDataTable out = container.getTable();
    return new BufferedDataTable[] {out};
  }