コード例 #1
0
 /**
  * Takes string values referenced by an Instance and copies them from a source dataset to a
  * destination dataset. The instance references are updated to be valid for the destination
  * dataset. The instance may have the structure (i.e. number and attribute position) of either
  * dataset (this affects where references are obtained from). Only works if the number of string
  * attributes is the same in both indices (implicitly these string attributes should be
  * semantically same but just with shifted positions).
  *
  * @param instance the instance containing references to strings in the source dataset that will
  *     have references updated to be valid for the destination dataset.
  * @param instSrcCompat true if the instance structure is the same as the source, or false if it
  *     is the same as the destination (i.e. which of the string attribute indices contains the
  *     correct locations for this instance).
  * @param srcDataset the dataset for which the current instance string references are valid (after
  *     any position mapping if needed)
  * @param srcStrAtts an array containing the indices of string attributes in the source datset.
  * @param destDataset the dataset for which the current instance string references need to be
  *     inserted (after any position mapping if needed)
  * @param destStrAtts an array containing the indices of string attributes in the destination
  *     datset.
  */
 protected void copyStringValues(
     M5Instance instance,
     boolean instSrcCompat,
     M5Instances srcDataset,
     int[] srcStrAtts,
     M5Instances destDataset,
     int[] destStrAtts) {
   if (srcDataset == destDataset) {
     return;
   }
   if (srcStrAtts.length != destStrAtts.length) {
     throw new IllegalArgumentException("Src and Dest string indices differ in length!!");
   }
   for (int i = 0; i < srcStrAtts.length; i++) {
     int instIndex = instSrcCompat ? srcStrAtts[i] : destStrAtts[i];
     M5Attribute src = srcDataset.attribute(srcStrAtts[i]);
     M5Attribute dest = destDataset.attribute(destStrAtts[i]);
     if (!instance.isMissing(instIndex)) {
       // System.err.println(instance.value(srcIndex)
       //                   + " " + src.numValues()
       //                   + " " + dest.numValues());
       int valIndex = dest.addStringValue(src, (int) instance.value(instIndex));
       // setValue here shouldn't be too slow here unless your dataset has
       // squillions of string attributes
       instance.setValue(instIndex, (double) valIndex);
     }
   }
 }
コード例 #2
0
  /**
   * Computes average class values for each attribute and value
   *
   * @throws Exception
   */
  private void computeAverageClassValues() throws Exception {

    double totalCounts, sum;
    M5Instance instance;
    double[] counts;

    double[][] avgClassValues = new double[getInputFormat().numAttributes()][0];
    m_Indices = new int[getInputFormat().numAttributes()][0];
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      M5Attribute att = getInputFormat().attribute(j);
      if (att.isNominal()) {
        avgClassValues[j] = new double[att.numValues()];
        counts = new double[att.numValues()];
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
          instance = getInputFormat().instance(i);
          if (!instance.classIsMissing() && (!instance.isMissing(j))) {
            counts[(int) instance.value(j)] += instance.weight();
            avgClassValues[j][(int) instance.value(j)] += instance.weight() * instance.classValue();
          }
        }
        sum = M5StaticUtils.sum(avgClassValues[j]);
        totalCounts = M5StaticUtils.sum(counts);
        if (M5StaticUtils.gr(totalCounts, 0)) {
          for (int k = 0; k < att.numValues(); k++) {
            if (M5StaticUtils.gr(counts[k], 0)) {
              avgClassValues[j][k] /= (double) counts[k];
            } else {
              avgClassValues[j][k] = sum / (double) totalCounts;
            }
          }
        }
        m_Indices[j] = M5StaticUtils.sort(avgClassValues[j]);
      }
    }
  }
コード例 #3
0
  /** Set the output format if the class is nominal. */
  private void setOutputFormatNominal() {

    M5Vector newAtts;
    int newClassIndex;
    StringBuffer attributeName;
    M5Instances outputFormat;
    M5Vector vals;

    // Compute new attributes

    newClassIndex = getInputFormat().classIndex();
    newAtts = new M5Vector();
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      M5Attribute att = getInputFormat().attribute(j);
      if ((!att.isNominal()) || (j == getInputFormat().classIndex())) {
        newAtts.addElement(att.copy());
      } else {
        if (att.numValues() <= 2) {
          if (m_Numeric) {
            newAtts.addElement(new M5Attribute(att.name()));
          } else {
            newAtts.addElement(att.copy());
          }
        } else {

          if (j < getInputFormat().classIndex()) {
            newClassIndex += att.numValues() - 1;
          }

          // Compute values for new attributes
          for (int k = 0; k < att.numValues(); k++) {
            attributeName = new StringBuffer(att.name() + "=");
            attributeName.append(att.value(k));
            if (m_Numeric) {
              newAtts.addElement(new M5Attribute(attributeName.toString()));
            } else {
              vals = new M5Vector(2);
              vals.addElement("f");
              vals.addElement("t");
              newAtts.addElement(new M5Attribute(attributeName.toString(), vals));
            }
          }
        }
      }
    }
    outputFormat = new M5Instances(getInputFormat().relationName(), newAtts, 0);
    outputFormat.setClassIndex(newClassIndex);
    setOutputFormat(outputFormat);
  }
コード例 #4
0
  /**
   * Convert a single instance over if the class is numeric. The converted instance is added to the
   * end of the output queue.
   *
   * @param instance the instance to convert
   */
  private void convertInstanceNumeric(M5Instance instance) {

    double[] vals = new double[outputFormatPeek().numAttributes()];
    int attSoFar = 0;

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      M5Attribute att = getInputFormat().attribute(j);
      if ((!att.isNominal()) || (j == getInputFormat().classIndex())) {
        vals[attSoFar] = instance.value(j);
        attSoFar++;
      } else {
        if (instance.isMissing(j)) {
          for (int k = 0; k < att.numValues() - 1; k++) {
            vals[attSoFar + k] = instance.value(j);
          }
        } else {
          int k = 0;
          while ((int) instance.value(j) != m_Indices[j][k]) {
            vals[attSoFar + k] = 1;
            k++;
          }
          while (k < att.numValues() - 1) {
            vals[attSoFar + k] = 0;
            k++;
          }
        }
        attSoFar += att.numValues() - 1;
      }
    }
    M5Instance inst = null;
    if (instance instanceof M5SparseInstance) {
      inst = new M5SparseInstance(instance.weight(), vals);
    } else {
      inst = new M5Instance(instance.weight(), vals);
    }
    copyStringValues(
        inst,
        false,
        instance.dataset(),
        getInputStringIndex(),
        getOutputFormat(),
        getOutputStringIndex());
    inst.setDataset(getOutputFormat());
    push(inst);
  }