private Map<Triple<DendrogramNode, RowKey, RowKey>, Number> visit( final DendrogramNode root, final Map<Triple<DendrogramNode, RowKey, RowKey>, Number> m, final Map<RowKey, DistanceVectorDataValue> d, final int allLeaves, final ExecutionContext exec) throws CanceledExecutionException { if (root.isLeaf()) { final RowKey key = RowKeyHelper.getKey(root); return Collections.singletonMap(Triple.apply(root, key, key), (Number) Double.valueOf(0)); } final DendrogramNode w = root.getFirstSubnode(); final Map<Triple<DendrogramNode, RowKey, RowKey>, Number> leftM = visit(w, m, d, allLeaves, exec); final DendrogramNode x = root.getSecondSubnode(); final Map<Triple<DendrogramNode, RowKey, RowKey>, Number> rightM = visit(x, m, d, allLeaves, exec); final Map<Triple<DendrogramNode, RowKey, RowKey>, Number> ret = new HashMap<Triple<DendrogramNode, RowKey, RowKey>, Number>(leftM); ret.putAll(rightM); final Set<RowKey> leftKeys = computeLeaves(w); final Set<RowKey> rightKeys = computeLeaves(x); computeM(root, d, w, x, rightM, ret, leftKeys, rightKeys); exec.checkCanceled(); computeM(root, d, x, w, leftM, ret, rightKeys, leftKeys); exec.setProgress(((double) leftKeys.size() + rightKeys.size()) / allLeaves); return ret; }
@Override protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception { // Initialising exec.setMessage("initialising"); final ClusterTreeModel model = (ClusterTreeModel) inObjects[0]; final BufferedDataTable data = (BufferedDataTable) inObjects[1]; final int distanceColumnIdx = data.getDataTableSpec().findColumnIndex(distanceColumnModel.getColumnName()); final Map<RowKey, DistanceVectorDataValue> distanceMatrix = new HashMap<RowKey, DistanceVectorDataValue>(); for (final DataRow dataRow : data) { final DistanceVectorDataValue distanceVector = (DistanceVectorDataValue) dataRow.getCell(distanceColumnIdx); distanceMatrix.put(dataRow.getKey(), distanceVector); } exec.setMessage("computing"); final DendrogramNode origRoot = model.getRoot(); final Map<Triple<DendrogramNode, RowKey, RowKey>, Number> m = visit( origRoot, new HashMap<Triple<DendrogramNode, RowKey, RowKey>, Number>(), distanceMatrix, model.getClusterDistances().length + 1, exec.createSilentSubExecutionContext(.9)); final Map<RowKey, Pair<DataRow, Integer>> rows = new HashMap<RowKey, Pair<DataRow, Integer>>(); int idx = 0; for (final DataRow dataRow : data) { rows.put(dataRow.getKey(), Pair.create(dataRow, Integer.valueOf(idx++))); } exec.setMessage("creating final tree"); final ClusterViewNode tree = buildNewTree(convertM(m), origRoot, rows, exec).getO1(); final ArrayList<DistanceVectorDataValue> origList = new ArrayList<DistanceVectorDataValue>(model.getClusterDistances().length + 1), newList = new ArrayList<DistanceVectorDataValue>(model.getClusterDistances().length + 1); flatten(origRoot, origList, distanceMatrix); exec.checkCanceled(); flatten(tree, newList, distanceMatrix); logger.info("Before: " + sumDistance(origList)); logger.info("After: " + sumDistance(newList)); final ClusterTreeModel clusterTreeModel = new ClusterTreeModel( (DataTableSpec) model.getSpec(), tree, model.getClusterDistances(), model.getClusterDistances().length + 1) { @Override public String getSummary() { return "Before: " + sumDistance(origList) + "\nAfter: " + sumDistance(newList); } }; return new PortObject[] {clusterTreeModel}; }
@Override protected BufferedDataTable[] execute( final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception { final BufferedDataContainer con = exec.createDataContainer(m_spec); final ImgPlusCellFactory imgCellFactory = new ImgPlusCellFactory(exec); int id = 1; for (final SerializableSetting<Img<T>[]> conf : m_kernelList.getObjects()) { for (final Img<T> img : conf.get()) { if (img instanceof ImgPlus<?>) { con.addRowToTable( new DefaultRow("Kernel " + id, imgCellFactory.createCell((ImgPlus<T>) img))); } else { con.addRowToTable( new DefaultRow("Kernel " + id, imgCellFactory.createCell(new ImgPlus<T>(img)))); } id++; } } con.close(); // data for the table cell view m_data = con.getTable(); return new BufferedDataTable[] {m_data}; }
@Test public void shouldTransform() throws CanceledExecutionException { OutputTransformer<ITSDataType> transformer = new ITSAdapterTransformer(createDataColumnSpec()); List<ITSDataType> entries = Lists.newArrayList(create("Bug1"), create("Bug2")); BufferedDataContainer container = mock(BufferedDataContainer.class); ExecutionContext exec = mock(ExecutionContext.class); when(exec.createDataContainer(Mockito.any(DataTableSpec.class))).thenReturn(container); // when transformer.transform(entries, exec); // then verify(container, times(2)).addRowToTable(Mockito.any(DataRow.class)); }
protected void waitForCompletion(MuscleClient cli, ExecutionContext exec, String jobId) throws Exception { if (jobId.length() > 0) { int check_period = 20 * 1000; // every 10s String status = "PENDING"; int retry = 0; while (status.equals("PENDING") || status.equals("RUNNING")) { try { logger.info("Waiting for " + jobId); status = cli.checkStatus(jobId); if (status.equals("RUNNING") || status.equals("PENDING")) { logger.info(jobId + " " + status + ", sleeping for " + check_period + " milliseconds"); // check ten times each check_period to see if the user pressed cancel for (int i = 0; i < 10; i++) { Thread.sleep(check_period / 10); exec.checkCanceled(); } // each time job is still going, we double check_period to reduce likelihood of // overloading EBI check_period *= 2; if (check_period > 200000) { check_period = 200000; } } if (status == "FAILED") { logger.error("MUSCLE job failed: " + jobId); } } catch (IOException e) { if (retry < 3) { logger.warn( "Unable to check job " + jobId + " retrying (after linear-backoff delay)... "); Thread.sleep(((420 * retry) + 120) * 1000); status = "PENDING"; retry++; } else { throw new Exception("Cannot check job " + jobId + " via MUSCLE (EBI)... aborting" + e); } } } } else { throw new Exception("Bogus EBI job id... aborting!"); } }
@Override protected BufferedDataTable[] execute(BufferedDataTable[] inData, ExecutionContext exec) throws Exception { BufferedDataContainer cont = exec.createDataContainer(outspec[0]); List<DataCell> cells = new ArrayList<DataCell>(); CloseableRowIterator iter = inData[0].iterator(); while (iter.hasNext()) { cells.add(iter.next().getCell(0)); } ListCell lc = CollectionCellFactory.createListCell(cells); DataRow row = new DefaultRow("Row 1", lc); cont.addRowToTable(row); cont.close(); return new BufferedDataTable[] {cont.getTable()}; }
private static Triple<ClusterViewNode, RowKey, RowKey> buildNewTree( final Map<DendrogramNode, Map<Pair<RowKey, RowKey>, Number>> m, final DendrogramNode root, final Map<RowKey, Pair<DataRow, Integer>> rows, ExecutionContext exec) throws CanceledExecutionException { if (root.isLeaf()) { final Pair<DataRow, Integer> leafRow = rows.get(RowKeyHelper.getKey(root)); return Triple.apply( new ClusterViewNode(leafRow.getFirst().getKey()), leafRow.getFirst().getKey(), leafRow.getFirst().getKey()); } final Triple<ClusterViewNode, RowKey, RowKey> firstTree = buildNewTree(m, root.getFirstSubnode(), rows, exec); exec.checkCanceled(); final Triple<ClusterViewNode, RowKey, RowKey> secondTree = buildNewTree(m, root.getSecondSubnode(), rows, exec); final Map<Pair<RowKey, RowKey>, Number> map = m.get(root); Pair<RowKey, RowKey> pairNoChange = Pair.create(firstTree.getO3(), secondTree.getO2()); if (!map.containsKey(pairNoChange)) { pairNoChange = flip(pairNoChange); } Pair<RowKey, RowKey> pairChange = Pair.create(secondTree.getO3(), firstTree.getO2()); if (!map.containsKey(pairChange)) { pairChange = flip(pairChange); } assert map.containsKey(pairNoChange); assert map.containsKey(pairChange); final double noChangeValue = map.get(pairNoChange).doubleValue(); final double changeValue = map.get(pairChange).doubleValue(); if (noChangeValue > changeValue) { return Triple.apply( new ClusterViewNode(firstTree.getO1(), secondTree.getO1(), root.getDist()), firstTree.getO2(), secondTree.getO3()); } return Triple.apply( new ClusterViewNode(secondTree.getO1(), firstTree.getO1(), root.getDist()), secondTree.getO2(), firstTree.getO3()); }
/** {@inheritDoc} */ @Override protected BufferedDataTable[] execute( final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception { if (m_email.equals(DEFAULT_EMAIL)) { throw new Exception( "You must set a valid E-Mail for EBI to contact you in the event of problems with the service!"); } int n_rows = inData[0].getRowCount(); int seq_idx = inData[0].getSpec().findColumnIndex(((SettingsModelString) m_seq_col).getStringValue()); int accsn_idx = inData[0].getSpec().findColumnIndex(((SettingsModelString) m_accsn_col).getStringValue()); if (seq_idx < 0 || accsn_idx < 0) { throw new Exception("Cannot find columns... valid data?"); } int done = 0; // create the output columns (raw format for use with R) DataTableSpec outputSpec = new DataTableSpec(inData[0].getDataTableSpec(), make_output_spec()); BufferedDataContainer container = exec.createDataContainer(outputSpec, false, 0); // instantiate MUSCLE client MuscleClient cli = new MuscleClient(); // each row is a separate MUSCLE job, the sequences are in one collection cell, the accessions // (IDs) in the other RowIterator it = inData[0].iterator(); while (it.hasNext()) { DataRow r = it.next(); ListCell seqs = (ListCell) r.getCell(seq_idx); ListCell accsns = (ListCell) r.getCell(accsn_idx); if (seqs.size() != accsns.size()) { throw new Exception( "Every sequence must have a corresponding accession: error at row " + r.getKey().getString()); } if (seqs.size() < 1) { throw new Exception("Cannot MUSCLE zero sequences: error at row " + r.getKey().getString()); } if (seqs.size() > 1000) { throw new Exception("Too many sequences in row " + r.getKey().getString()); } // dummy a fake "FASTA" file (in memory) and then submit that to MUSCLE@EBI along with other // necessary parameters StringBuffer seq_as_fasta = new StringBuffer(); for (int i = 0; i < seqs.size(); i++) { seq_as_fasta.append(">"); seq_as_fasta.append(accsns.get(i).toString()); seq_as_fasta.append("\n"); seq_as_fasta.append(seqs.get(i).toString()); seq_as_fasta.append("\n"); } // System.err.println(seq_as_fasta); // lodge the muscle job and store the results in the output table InputParameters ip = new InputParameters(); ip.setSequence(seq_as_fasta.toString()); // start the job String jobId = cli.runApp(m_email.getStringValue(), r.getKey().getString(), ip); exec.checkCanceled(); exec.setProgress(((double) done) / n_rows, "Executing " + jobId); Thread.sleep(20 * 1000); // 20 seconds waitForCompletion(cli, exec, jobId); done++; // process results and add them into the table... // 1. fasta alignment data byte[] bytes = cli.getSrvProxy().getResult(jobId, "aln-fasta", null); DataCell[] cells = new DataCell[3]; cells[0] = new StringCell(jobId); // compute the base64 encoded phylip aligned sequences suitable for use by R's phangorn // package String fasta = new String(bytes); String ret = fasta2phylip(fasta); // it must be encoded (I chose base64) as it is common to both Java and R and it must be // encoded due to containing multiple lines, which confuses the CSV passed between KNIME and R String rk = r.getKey().getString(); DataCell mac = AlignmentCellFactory.createCell(fasta, AlignmentType.AL_AA); if (mac instanceof MultiAlignmentCell) m_muscle_map.put(rk, (MultiAlignmentCell) mac); cells[1] = mac; bytes = cli.getSrvProxy().getResult(jobId, "out", null); cells[2] = new StringCell("<html><pre>" + new String(bytes)); container.addRowToTable(new JoinedRow(r, new DefaultRow(r.getKey(), cells))); } container.close(); BufferedDataTable out = container.getTable(); return new BufferedDataTable[] {out}; }
@Override protected BufferedDataTable[] execute(BufferedDataTable[] inData, ExecutionContext exec) throws Exception { List<File> inputFiles = FileSelectPanel.getInputFiles(propInputDir.getStringValue(), getAllowedFileExtensions()); if (inputFiles.isEmpty()) { throw new RuntimeException("No files selected"); } // first group files into plate-groups Map<String, List<File>> plateFiles = splitFilesIntoPlates(inputFiles); if (inputFiles.isEmpty()) { throw new RuntimeException("No valid envision-files in selection " + inputFiles); } // split files List<String> allAttributes = mergeAttributes(plateFiles); List<Attribute> colAttributes = compileColumnModel(allAttributes); DataTableSpec outputSpec = AttributeUtils.compileTableSpecs(colAttributes); BufferedDataContainer container = exec.createDataContainer(outputSpec); // populate the table int fileCounter = 0, rowCounter = 0; for (String barcode : plateFiles.keySet()) { logger.info("Processing plate " + barcode); Plate plate = new Plate(); // invalidate plate-dims as these become fixed in the loop plate.setNumColumns(-1); plate.setNumRows(-1); for (File file : plateFiles.get(barcode)) { String attributeName = getAttributeNameOfEnvisionFile(file); parseFile(plate, attributeName, file); BufTableUtils.updateProgress(exec, fileCounter++, inputFiles.size()); } // now create the data-rows for this table for (Well well : plate.getWells()) { if (well.getReadOutNames().isEmpty()) { continue; } DataCell[] knimeRow = new DataCell[colAttributes.size()]; // first add the barcode-column knimeRow[0] = new StringCell(barcode); knimeRow[1] = colAttributes.get(1).createCell(well.getPlateRow()); knimeRow[2] = colAttributes.get(2).createCell(well.getPlateColumn()); for (String attributeName : allAttributes) { int rowIndex = allAttributes.indexOf(attributeName); Double value = well.getReadout(attributeName); if (value != null) { knimeRow[3 + rowIndex] = new DoubleCell(value); } else { knimeRow[3 + rowIndex] = DataType.getMissingCell(); } } DataRow tableRow = new DefaultRow(new RowKey("" + rowCounter++), knimeRow); container.addRowToTable(tableRow); } } container.close(); return new BufferedDataTable[] {container.getTable()}; }
@Override protected BufferedDataTable[] execute(BufferedDataTable[] inData, ExecutionContext exec) throws Exception { BufferedDataTable input = inData[0]; // Get the condition attribute Attribute treatmentAttribute = new InputTableAttribute(this.treatmentAttribute.getStringValue(), input); // Get the library and reference condition names String libraryName = AbstractScreenTrafoModel.getAndValidateTreatment(reference); String referenceName = AbstractScreenTrafoModel.getAndValidateTreatment(library); // Get the parameter and make sure there all double value columns List<Attribute> parameters = getParameterList(input); // Split the columns according to groups contained in the condition column Map<String, List<DataRow>> groupedRows = AttributeUtils.splitRows(input, treatmentAttribute); List<DataRow> libraryRows = groupedRows.get(libraryName); List<DataRow> referenceRows = groupedRows.get(referenceName); int progress = parameters.size(); BufTableUtils.updateProgress(exec, progress / 2, progress); // Initialize BufferedDataContainer container = exec.createDataContainer(new DataTableSpec(getListSpec())); MutualInformation mutualinfo = new MutualInformation(); mutualinfo.set_base(logbase.getDoubleValue()); mutualinfo.set_method(method.getStringValue()); mutualinfo.set_axeslinking(linkaxes.getBooleanValue()); DataCell[] cells = new DataCell[container.getTableSpec().getNumColumns()]; int p = 0; // Calculate mutual information for (Attribute parameter : parameters) { Double[] x = getDataVec(libraryRows, parameter); Double[] y = getDataVec(referenceRows, parameter); mutualinfo.set_vectors(x, y); if (binning.getIntValue() == 0) { mutualinfo.set_binning(); } else { mutualinfo.set_binning(binning.getIntValue()); } int[] bins = mutualinfo.get_binning(); Double[] res = mutualinfo.calculate(); cells[0] = new StringCell(parameter.getName()); cells[1] = new DoubleCell(res[0]); cells[2] = new DoubleCell(res[1]); cells[3] = new DoubleCell(res[2]); cells[4] = new IntCell(bins[0]); cells[5] = new IntCell(bins[1]); cells[6] = new DoubleCell(mutualinfo.get_logbase()); cells[7] = new StringCell(mutualinfo.get_method()); container.addRowToTable(new DefaultRow("row" + p, cells)); BufTableUtils.updateProgress(exec, (progress + p++) / 2, progress); exec.checkCanceled(); } container.close(); return new BufferedDataTable[] {container.getTable()}; }