Ejemplo n.º 1
0
  protected void waitForCompletion(MuscleClient cli, ExecutionContext exec, String jobId)
      throws Exception {
    if (jobId.length() > 0) {
      int check_period = 20 * 1000; // every 10s
      String status = "PENDING";
      int retry = 0;
      while (status.equals("PENDING") || status.equals("RUNNING")) {
        try {
          logger.info("Waiting for " + jobId);

          status = cli.checkStatus(jobId);
          if (status.equals("RUNNING") || status.equals("PENDING")) {
            logger.info(jobId + " " + status + ", sleeping for " + check_period + " milliseconds");

            // check ten times each check_period to see if the user pressed cancel
            for (int i = 0; i < 10; i++) {
              Thread.sleep(check_period / 10);
              exec.checkCanceled();
            }

            // each time job is still going, we double check_period to reduce likelihood of
            // overloading EBI
            check_period *= 2;
            if (check_period > 200000) {
              check_period = 200000;
            }
          }
          if (status == "FAILED") {
            logger.error("MUSCLE job failed: " + jobId);
          }
        } catch (IOException e) {
          if (retry < 3) {
            logger.warn(
                "Unable to check job " + jobId + " retrying (after linear-backoff delay)... ");
            Thread.sleep(((420 * retry) + 120) * 1000);
            status = "PENDING";
            retry++;
          } else {
            throw new Exception("Cannot check job " + jobId + " via MUSCLE (EBI)... aborting" + e);
          }
        }
      }
    } else {
      throw new Exception("Bogus EBI job id... aborting!");
    }
  }
Ejemplo n.º 2
0
  /** {@inheritDoc} */
  @Override
  protected BufferedDataTable[] execute(
      final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    if (m_email.equals(DEFAULT_EMAIL)) {
      throw new Exception(
          "You must set a valid E-Mail for EBI to contact you in the event of problems with the service!");
    }
    int n_rows = inData[0].getRowCount();
    int seq_idx =
        inData[0].getSpec().findColumnIndex(((SettingsModelString) m_seq_col).getStringValue());
    int accsn_idx =
        inData[0].getSpec().findColumnIndex(((SettingsModelString) m_accsn_col).getStringValue());
    if (seq_idx < 0 || accsn_idx < 0) {
      throw new Exception("Cannot find columns... valid data?");
    }
    int done = 0;

    // create the output columns (raw format for use with R)
    DataTableSpec outputSpec = new DataTableSpec(inData[0].getDataTableSpec(), make_output_spec());
    BufferedDataContainer container = exec.createDataContainer(outputSpec, false, 0);

    // instantiate MUSCLE client
    MuscleClient cli = new MuscleClient();

    // each row is a separate MUSCLE job, the sequences are in one collection cell, the accessions
    // (IDs) in the other
    RowIterator it = inData[0].iterator();
    while (it.hasNext()) {
      DataRow r = it.next();
      ListCell seqs = (ListCell) r.getCell(seq_idx);
      ListCell accsns = (ListCell) r.getCell(accsn_idx);
      if (seqs.size() != accsns.size()) {
        throw new Exception(
            "Every sequence must have a corresponding accession: error at row "
                + r.getKey().getString());
      }
      if (seqs.size() < 1) {
        throw new Exception("Cannot MUSCLE zero sequences: error at row " + r.getKey().getString());
      }
      if (seqs.size() > 1000) {
        throw new Exception("Too many sequences in row " + r.getKey().getString());
      }
      // dummy a fake "FASTA" file (in memory) and then submit that to MUSCLE@EBI along with other
      // necessary parameters
      StringBuffer seq_as_fasta = new StringBuffer();
      for (int i = 0; i < seqs.size(); i++) {
        seq_as_fasta.append(">");
        seq_as_fasta.append(accsns.get(i).toString());
        seq_as_fasta.append("\n");
        seq_as_fasta.append(seqs.get(i).toString());
        seq_as_fasta.append("\n");
      }
      // System.err.println(seq_as_fasta);

      // lodge the muscle job and store the results in the output table
      InputParameters ip = new InputParameters();
      ip.setSequence(seq_as_fasta.toString());

      // start the job
      String jobId = cli.runApp(m_email.getStringValue(), r.getKey().getString(), ip);

      exec.checkCanceled();
      exec.setProgress(((double) done) / n_rows, "Executing " + jobId);
      Thread.sleep(20 * 1000); // 20 seconds
      waitForCompletion(cli, exec, jobId);
      done++;

      // process results and add them into the table...
      // 1. fasta alignment data
      byte[] bytes = cli.getSrvProxy().getResult(jobId, "aln-fasta", null);

      DataCell[] cells = new DataCell[3];
      cells[0] = new StringCell(jobId);

      // compute the base64 encoded phylip aligned sequences suitable for use by R's phangorn
      // package
      String fasta = new String(bytes);
      String ret = fasta2phylip(fasta);

      // it must be encoded (I chose base64) as it is common to both Java and R and it must be
      // encoded due to containing multiple lines, which confuses the CSV passed between KNIME and R
      String rk = r.getKey().getString();
      DataCell mac = AlignmentCellFactory.createCell(fasta, AlignmentType.AL_AA);
      if (mac instanceof MultiAlignmentCell) m_muscle_map.put(rk, (MultiAlignmentCell) mac);
      cells[1] = mac;

      bytes = cli.getSrvProxy().getResult(jobId, "out", null);
      cells[2] = new StringCell("<html><pre>" + new String(bytes));

      container.addRowToTable(new JoinedRow(r, new DefaultRow(r.getKey(), cells)));
    }
    container.close();
    BufferedDataTable out = container.getTable();
    return new BufferedDataTable[] {out};
  }