Exemplo n.º 1
0
 @Override
 protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec)
     throws Exception {
   // Initialising
   exec.setMessage("initialising");
   final ClusterTreeModel model = (ClusterTreeModel) inObjects[0];
   final BufferedDataTable data = (BufferedDataTable) inObjects[1];
   final int distanceColumnIdx =
       data.getDataTableSpec().findColumnIndex(distanceColumnModel.getColumnName());
   final Map<RowKey, DistanceVectorDataValue> distanceMatrix =
       new HashMap<RowKey, DistanceVectorDataValue>();
   for (final DataRow dataRow : data) {
     final DistanceVectorDataValue distanceVector =
         (DistanceVectorDataValue) dataRow.getCell(distanceColumnIdx);
     distanceMatrix.put(dataRow.getKey(), distanceVector);
   }
   exec.setMessage("computing");
   final DendrogramNode origRoot = model.getRoot();
   final Map<Triple<DendrogramNode, RowKey, RowKey>, Number> m =
       visit(
           origRoot,
           new HashMap<Triple<DendrogramNode, RowKey, RowKey>, Number>(),
           distanceMatrix,
           model.getClusterDistances().length + 1,
           exec.createSilentSubExecutionContext(.9));
   final Map<RowKey, Pair<DataRow, Integer>> rows = new HashMap<RowKey, Pair<DataRow, Integer>>();
   int idx = 0;
   for (final DataRow dataRow : data) {
     rows.put(dataRow.getKey(), Pair.create(dataRow, Integer.valueOf(idx++)));
   }
   exec.setMessage("creating final tree");
   final ClusterViewNode tree = buildNewTree(convertM(m), origRoot, rows, exec).getO1();
   final ArrayList<DistanceVectorDataValue>
       origList = new ArrayList<DistanceVectorDataValue>(model.getClusterDistances().length + 1),
       newList = new ArrayList<DistanceVectorDataValue>(model.getClusterDistances().length + 1);
   flatten(origRoot, origList, distanceMatrix);
   exec.checkCanceled();
   flatten(tree, newList, distanceMatrix);
   logger.info("Before:      " + sumDistance(origList));
   logger.info("After:       " + sumDistance(newList));
   final ClusterTreeModel clusterTreeModel =
       new ClusterTreeModel(
           (DataTableSpec) model.getSpec(),
           tree,
           model.getClusterDistances(),
           model.getClusterDistances().length + 1) {
         @Override
         public String getSummary() {
           return "Before: " + sumDistance(origList) + "\nAfter:  " + sumDistance(newList);
         }
       };
   return new PortObject[] {clusterTreeModel};
 }
Exemplo n.º 2
0
  /** {@inheritDoc} */
  @Override
  protected BufferedDataTable[] execute(
      final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    if (m_email.equals(DEFAULT_EMAIL)) {
      throw new Exception(
          "You must set a valid E-Mail for EBI to contact you in the event of problems with the service!");
    }
    int n_rows = inData[0].getRowCount();
    int seq_idx =
        inData[0].getSpec().findColumnIndex(((SettingsModelString) m_seq_col).getStringValue());
    int accsn_idx =
        inData[0].getSpec().findColumnIndex(((SettingsModelString) m_accsn_col).getStringValue());
    if (seq_idx < 0 || accsn_idx < 0) {
      throw new Exception("Cannot find columns... valid data?");
    }
    int done = 0;

    // create the output columns (raw format for use with R)
    DataTableSpec outputSpec = new DataTableSpec(inData[0].getDataTableSpec(), make_output_spec());
    BufferedDataContainer container = exec.createDataContainer(outputSpec, false, 0);

    // instantiate MUSCLE client
    MuscleClient cli = new MuscleClient();

    // each row is a separate MUSCLE job, the sequences are in one collection cell, the accessions
    // (IDs) in the other
    RowIterator it = inData[0].iterator();
    while (it.hasNext()) {
      DataRow r = it.next();
      ListCell seqs = (ListCell) r.getCell(seq_idx);
      ListCell accsns = (ListCell) r.getCell(accsn_idx);
      if (seqs.size() != accsns.size()) {
        throw new Exception(
            "Every sequence must have a corresponding accession: error at row "
                + r.getKey().getString());
      }
      if (seqs.size() < 1) {
        throw new Exception("Cannot MUSCLE zero sequences: error at row " + r.getKey().getString());
      }
      if (seqs.size() > 1000) {
        throw new Exception("Too many sequences in row " + r.getKey().getString());
      }
      // dummy a fake "FASTA" file (in memory) and then submit that to MUSCLE@EBI along with other
      // necessary parameters
      StringBuffer seq_as_fasta = new StringBuffer();
      for (int i = 0; i < seqs.size(); i++) {
        seq_as_fasta.append(">");
        seq_as_fasta.append(accsns.get(i).toString());
        seq_as_fasta.append("\n");
        seq_as_fasta.append(seqs.get(i).toString());
        seq_as_fasta.append("\n");
      }
      // System.err.println(seq_as_fasta);

      // lodge the muscle job and store the results in the output table
      InputParameters ip = new InputParameters();
      ip.setSequence(seq_as_fasta.toString());

      // start the job
      String jobId = cli.runApp(m_email.getStringValue(), r.getKey().getString(), ip);

      exec.checkCanceled();
      exec.setProgress(((double) done) / n_rows, "Executing " + jobId);
      Thread.sleep(20 * 1000); // 20 seconds
      waitForCompletion(cli, exec, jobId);
      done++;

      // process results and add them into the table...
      // 1. fasta alignment data
      byte[] bytes = cli.getSrvProxy().getResult(jobId, "aln-fasta", null);

      DataCell[] cells = new DataCell[3];
      cells[0] = new StringCell(jobId);

      // compute the base64 encoded phylip aligned sequences suitable for use by R's phangorn
      // package
      String fasta = new String(bytes);
      String ret = fasta2phylip(fasta);

      // it must be encoded (I chose base64) as it is common to both Java and R and it must be
      // encoded due to containing multiple lines, which confuses the CSV passed between KNIME and R
      String rk = r.getKey().getString();
      DataCell mac = AlignmentCellFactory.createCell(fasta, AlignmentType.AL_AA);
      if (mac instanceof MultiAlignmentCell) m_muscle_map.put(rk, (MultiAlignmentCell) mac);
      cells[1] = mac;

      bytes = cli.getSrvProxy().getResult(jobId, "out", null);
      cells[2] = new StringCell("<html><pre>" + new String(bytes));

      container.addRowToTable(new JoinedRow(r, new DefaultRow(r.getKey(), cells)));
    }
    container.close();
    BufferedDataTable out = container.getTable();
    return new BufferedDataTable[] {out};
  }