@Override protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception { // Initialising exec.setMessage("initialising"); final ClusterTreeModel model = (ClusterTreeModel) inObjects[0]; final BufferedDataTable data = (BufferedDataTable) inObjects[1]; final int distanceColumnIdx = data.getDataTableSpec().findColumnIndex(distanceColumnModel.getColumnName()); final Map<RowKey, DistanceVectorDataValue> distanceMatrix = new HashMap<RowKey, DistanceVectorDataValue>(); for (final DataRow dataRow : data) { final DistanceVectorDataValue distanceVector = (DistanceVectorDataValue) dataRow.getCell(distanceColumnIdx); distanceMatrix.put(dataRow.getKey(), distanceVector); } exec.setMessage("computing"); final DendrogramNode origRoot = model.getRoot(); final Map<Triple<DendrogramNode, RowKey, RowKey>, Number> m = visit( origRoot, new HashMap<Triple<DendrogramNode, RowKey, RowKey>, Number>(), distanceMatrix, model.getClusterDistances().length + 1, exec.createSilentSubExecutionContext(.9)); final Map<RowKey, Pair<DataRow, Integer>> rows = new HashMap<RowKey, Pair<DataRow, Integer>>(); int idx = 0; for (final DataRow dataRow : data) { rows.put(dataRow.getKey(), Pair.create(dataRow, Integer.valueOf(idx++))); } exec.setMessage("creating final tree"); final ClusterViewNode tree = buildNewTree(convertM(m), origRoot, rows, exec).getO1(); final ArrayList<DistanceVectorDataValue> origList = new ArrayList<DistanceVectorDataValue>(model.getClusterDistances().length + 1), newList = new ArrayList<DistanceVectorDataValue>(model.getClusterDistances().length + 1); flatten(origRoot, origList, distanceMatrix); exec.checkCanceled(); flatten(tree, newList, distanceMatrix); logger.info("Before: " + sumDistance(origList)); logger.info("After: " + sumDistance(newList)); final ClusterTreeModel clusterTreeModel = new ClusterTreeModel( (DataTableSpec) model.getSpec(), tree, model.getClusterDistances(), model.getClusterDistances().length + 1) { @Override public String getSummary() { return "Before: " + sumDistance(origList) + "\nAfter: " + sumDistance(newList); } }; return new PortObject[] {clusterTreeModel}; }
/** {@inheritDoc} */ @Override protected BufferedDataTable[] execute( final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception { if (m_email.equals(DEFAULT_EMAIL)) { throw new Exception( "You must set a valid E-Mail for EBI to contact you in the event of problems with the service!"); } int n_rows = inData[0].getRowCount(); int seq_idx = inData[0].getSpec().findColumnIndex(((SettingsModelString) m_seq_col).getStringValue()); int accsn_idx = inData[0].getSpec().findColumnIndex(((SettingsModelString) m_accsn_col).getStringValue()); if (seq_idx < 0 || accsn_idx < 0) { throw new Exception("Cannot find columns... valid data?"); } int done = 0; // create the output columns (raw format for use with R) DataTableSpec outputSpec = new DataTableSpec(inData[0].getDataTableSpec(), make_output_spec()); BufferedDataContainer container = exec.createDataContainer(outputSpec, false, 0); // instantiate MUSCLE client MuscleClient cli = new MuscleClient(); // each row is a separate MUSCLE job, the sequences are in one collection cell, the accessions // (IDs) in the other RowIterator it = inData[0].iterator(); while (it.hasNext()) { DataRow r = it.next(); ListCell seqs = (ListCell) r.getCell(seq_idx); ListCell accsns = (ListCell) r.getCell(accsn_idx); if (seqs.size() != accsns.size()) { throw new Exception( "Every sequence must have a corresponding accession: error at row " + r.getKey().getString()); } if (seqs.size() < 1) { throw new Exception("Cannot MUSCLE zero sequences: error at row " + r.getKey().getString()); } if (seqs.size() > 1000) { throw new Exception("Too many sequences in row " + r.getKey().getString()); } // dummy a fake "FASTA" file (in memory) and then submit that to MUSCLE@EBI along with other // necessary parameters StringBuffer seq_as_fasta = new StringBuffer(); for (int i = 0; i < seqs.size(); i++) { seq_as_fasta.append(">"); seq_as_fasta.append(accsns.get(i).toString()); seq_as_fasta.append("\n"); seq_as_fasta.append(seqs.get(i).toString()); seq_as_fasta.append("\n"); } // System.err.println(seq_as_fasta); // lodge the muscle job and store the results in the output table InputParameters ip = new InputParameters(); ip.setSequence(seq_as_fasta.toString()); // start the job String jobId = cli.runApp(m_email.getStringValue(), r.getKey().getString(), ip); exec.checkCanceled(); exec.setProgress(((double) done) / n_rows, "Executing " + jobId); Thread.sleep(20 * 1000); // 20 seconds waitForCompletion(cli, exec, jobId); done++; // process results and add them into the table... // 1. fasta alignment data byte[] bytes = cli.getSrvProxy().getResult(jobId, "aln-fasta", null); DataCell[] cells = new DataCell[3]; cells[0] = new StringCell(jobId); // compute the base64 encoded phylip aligned sequences suitable for use by R's phangorn // package String fasta = new String(bytes); String ret = fasta2phylip(fasta); // it must be encoded (I chose base64) as it is common to both Java and R and it must be // encoded due to containing multiple lines, which confuses the CSV passed between KNIME and R String rk = r.getKey().getString(); DataCell mac = AlignmentCellFactory.createCell(fasta, AlignmentType.AL_AA); if (mac instanceof MultiAlignmentCell) m_muscle_map.put(rk, (MultiAlignmentCell) mac); cells[1] = mac; bytes = cli.getSrvProxy().getResult(jobId, "out", null); cells[2] = new StringCell("<html><pre>" + new String(bytes)); container.addRowToTable(new JoinedRow(r, new DefaultRow(r.getKey(), cells))); } container.close(); BufferedDataTable out = container.getTable(); return new BufferedDataTable[] {out}; }