Exemplo n.º 1
0
  /**
   * Input an instance for filtering. Ordinarily the instance is processed and made available for
   * output immediately. Some filters require all instances be read before producing output.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be collected with output().
   * @throws IllegalStateException if no input format has been defined.
   */
  public boolean input(Instance instance) {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }

    Instance inst = (Instance) instance.copy();

    // First copy string values from input to output
    copyValues(inst, true, inst.dataset(), getOutputFormat());

    // Insert the new attribute and reassign to output
    inst.setDataset(null);
    inst.insertAttributeAt(m_Insert.getIndex());
    inst.setDataset(getOutputFormat());
    push(inst);
    return true;
  }
Exemplo n.º 2
0
  /**
   * Processes the given data (may change the provided dataset) and returns the modified version.
   * This method is called in batchFinished().
   *
   * @param instances the data to process
   * @return the modified data
   * @throws Exception in case the processing goes wrong
   * @see #batchFinished()
   */
  protected Instances process(Instances instances) throws Exception {
    Instances result;
    int i;
    int n;
    double[] values;
    String value;
    Instance inst;
    Instance newInst;

    // we need the complete input data!
    if (!isFirstBatchDone()) setOutputFormat(determineOutputFormat(getInputFormat()));

    result = new Instances(getOutputFormat());

    for (i = 0; i < instances.numInstances(); i++) {
      inst = instances.instance(i);
      values = inst.toDoubleArray();

      for (n = 0; n < values.length; n++) {
        if (!m_Cols.isInRange(n) || !instances.attribute(n).isNumeric() || inst.isMissing(n))
          continue;

        // get index of value
        if (instances.attribute(n).type() == Attribute.DATE) value = inst.stringValue(n);
        else value = Utils.doubleToString(inst.value(n), MAX_DECIMALS);

        values[n] = result.attribute(n).indexOfValue(value);
      }

      // generate new instance
      if (inst instanceof SparseInstance) newInst = new SparseInstance(inst.weight(), values);
      else newInst = new DenseInstance(inst.weight(), values);

      // copy possible string, relational values
      newInst.setDataset(getOutputFormat());
      copyValues(newInst, false, inst.dataset(), getOutputFormat());

      result.add(newInst);
    }

    return result;
  }
  public void buildClusterer(ArrayList<String> seqDB, double[][] sm) {
    seqList = seqDB;

    this.setSimMatrix(sm);

    Attribute seqString = new Attribute("sequence", (FastVector) null);
    FastVector attrInfo = new FastVector();
    attrInfo.addElement(seqString);
    Instances data = new Instances("data", attrInfo, 0);

    for (int i = 0; i < seqList.size(); i++) {
      Instance currentInst = new Instance(1);
      currentInst.setDataset(data);
      currentInst.setValue(0, seqList.get(i));
      data.add(currentInst);
    }

    try {
      buildClusterer(data);
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }