protected void waitForCompletion(MuscleClient cli, ExecutionContext exec, String jobId) throws Exception { if (jobId.length() > 0) { int check_period = 20 * 1000; // every 10s String status = "PENDING"; int retry = 0; while (status.equals("PENDING") || status.equals("RUNNING")) { try { logger.info("Waiting for " + jobId); status = cli.checkStatus(jobId); if (status.equals("RUNNING") || status.equals("PENDING")) { logger.info(jobId + " " + status + ", sleeping for " + check_period + " milliseconds"); // check ten times each check_period to see if the user pressed cancel for (int i = 0; i < 10; i++) { Thread.sleep(check_period / 10); exec.checkCanceled(); } // each time job is still going, we double check_period to reduce likelihood of // overloading EBI check_period *= 2; if (check_period > 200000) { check_period = 200000; } } if (status == "FAILED") { logger.error("MUSCLE job failed: " + jobId); } } catch (IOException e) { if (retry < 3) { logger.warn( "Unable to check job " + jobId + " retrying (after linear-backoff delay)... "); Thread.sleep(((420 * retry) + 120) * 1000); status = "PENDING"; retry++; } else { throw new Exception("Cannot check job " + jobId + " via MUSCLE (EBI)... aborting" + e); } } } } else { throw new Exception("Bogus EBI job id... aborting!"); } }
/** {@inheritDoc} */ @Override protected BufferedDataTable[] execute( final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception { if (m_email.equals(DEFAULT_EMAIL)) { throw new Exception( "You must set a valid E-Mail for EBI to contact you in the event of problems with the service!"); } int n_rows = inData[0].getRowCount(); int seq_idx = inData[0].getSpec().findColumnIndex(((SettingsModelString) m_seq_col).getStringValue()); int accsn_idx = inData[0].getSpec().findColumnIndex(((SettingsModelString) m_accsn_col).getStringValue()); if (seq_idx < 0 || accsn_idx < 0) { throw new Exception("Cannot find columns... valid data?"); } int done = 0; // create the output columns (raw format for use with R) DataTableSpec outputSpec = new DataTableSpec(inData[0].getDataTableSpec(), make_output_spec()); BufferedDataContainer container = exec.createDataContainer(outputSpec, false, 0); // instantiate MUSCLE client MuscleClient cli = new MuscleClient(); // each row is a separate MUSCLE job, the sequences are in one collection cell, the accessions // (IDs) in the other RowIterator it = inData[0].iterator(); while (it.hasNext()) { DataRow r = it.next(); ListCell seqs = (ListCell) r.getCell(seq_idx); ListCell accsns = (ListCell) r.getCell(accsn_idx); if (seqs.size() != accsns.size()) { throw new Exception( "Every sequence must have a corresponding accession: error at row " + r.getKey().getString()); } if (seqs.size() < 1) { throw new Exception("Cannot MUSCLE zero sequences: error at row " + r.getKey().getString()); } if (seqs.size() > 1000) { throw new Exception("Too many sequences in row " + r.getKey().getString()); } // dummy a fake "FASTA" file (in memory) and then submit that to MUSCLE@EBI along with other // necessary parameters StringBuffer seq_as_fasta = new StringBuffer(); for (int i = 0; i < seqs.size(); i++) { seq_as_fasta.append(">"); seq_as_fasta.append(accsns.get(i).toString()); seq_as_fasta.append("\n"); seq_as_fasta.append(seqs.get(i).toString()); seq_as_fasta.append("\n"); } // System.err.println(seq_as_fasta); // lodge the muscle job and store the results in the output table InputParameters ip = new InputParameters(); ip.setSequence(seq_as_fasta.toString()); // start the job String jobId = cli.runApp(m_email.getStringValue(), r.getKey().getString(), ip); exec.checkCanceled(); exec.setProgress(((double) done) / n_rows, "Executing " + jobId); Thread.sleep(20 * 1000); // 20 seconds waitForCompletion(cli, exec, jobId); done++; // process results and add them into the table... // 1. fasta alignment data byte[] bytes = cli.getSrvProxy().getResult(jobId, "aln-fasta", null); DataCell[] cells = new DataCell[3]; cells[0] = new StringCell(jobId); // compute the base64 encoded phylip aligned sequences suitable for use by R's phangorn // package String fasta = new String(bytes); String ret = fasta2phylip(fasta); // it must be encoded (I chose base64) as it is common to both Java and R and it must be // encoded due to containing multiple lines, which confuses the CSV passed between KNIME and R String rk = r.getKey().getString(); DataCell mac = AlignmentCellFactory.createCell(fasta, AlignmentType.AL_AA); if (mac instanceof MultiAlignmentCell) m_muscle_map.put(rk, (MultiAlignmentCell) mac); cells[1] = mac; bytes = cli.getSrvProxy().getResult(jobId, "out", null); cells[2] = new StringCell("<html><pre>" + new String(bytes)); container.addRowToTable(new JoinedRow(r, new DefaultRow(r.getKey(), cells))); } container.close(); BufferedDataTable out = container.getTable(); return new BufferedDataTable[] {out}; }