/**
   * Process a classifier's prediction for an instance and update a set of plotting instances and
   * additional plotting info. m_PlotShape for nominal class datasets holds shape types (actual data
   * points have automatic shape type assignment; classifier error data points have box shape type).
   * For numeric class datasets, the actual data points are stored in m_PlotInstances and m_PlotSize
   * stores the error (which is later converted to shape size values).
   *
   * @param toPredict the actual data point
   * @param classifier the classifier
   * @param eval the evaluation object to use for evaluating the classifier on the instance to
   *     predict
   * @see #m_PlotShapes
   * @see #m_PlotSizes
   * @see #m_PlotInstances
   */
  public void process(Instance toPredict, Classifier classifier, Evaluation eval) {
    double pred;
    double[] values;
    int i;

    try {
      pred = eval.evaluateModelOnceAndRecordPrediction(classifier, toPredict);

      if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) {
        toPredict =
            ((weka.classifiers.misc.InputMappedClassifier) classifier)
                .constructMappedInstance(toPredict);
      }

      if (!m_SaveForVisualization) return;

      if (m_PlotInstances != null) {
        values = new double[m_PlotInstances.numAttributes()];
        for (i = 0; i < m_PlotInstances.numAttributes(); i++) {
          if (i < toPredict.classIndex()) {
            values[i] = toPredict.value(i);
          } else if (i == toPredict.classIndex()) {
            values[i] = pred;
            values[i + 1] = toPredict.value(i);
            i++;
          } else {
            values[i] = toPredict.value(i - 1);
          }
        }

        m_PlotInstances.add(new DenseInstance(1.0, values));

        if (toPredict.classAttribute().isNominal()) {
          if (toPredict.isMissing(toPredict.classIndex()) || Utils.isMissingValue(pred)) {
            m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE));
          } else if (pred != toPredict.classValue()) {
            // set to default error point shape
            m_PlotShapes.addElement(new Integer(Plot2D.ERROR_SHAPE));
          } else {
            // otherwise set to constant (automatically assigned) point shape
            m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE));
          }
          m_PlotSizes.addElement(new Integer(Plot2D.DEFAULT_SHAPE_SIZE));
        } else {
          // store the error (to be converted to a point size later)
          Double errd = null;
          if (!toPredict.isMissing(toPredict.classIndex()) && !Utils.isMissingValue(pred)) {
            errd = new Double(pred - toPredict.classValue());
            m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE));
          } else {
            // missing shape if actual class not present or prediction is missing
            m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE));
          }
          m_PlotSizes.addElement(errd);
        }
      }
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }
Beispiel #2
0
        @Override
        boolean evaluate(
            Instance inst,
            int lhsAttIndex,
            String rhsOperand,
            double numericOperand,
            Pattern regexPattern,
            boolean rhsIsAttribute,
            int rhsAttIndex) {

          if (rhsIsAttribute) {
            if (inst.isMissing(lhsAttIndex) && inst.isMissing(rhsAttIndex)) {
              return true;
            }
            if (inst.isMissing(lhsAttIndex) || inst.isMissing(rhsAttIndex)) {
              return false;
            }
            return Utils.eq(inst.value(lhsAttIndex), inst.value(rhsAttIndex));
          }

          if (inst.isMissing(lhsAttIndex)) {
            return false;
          }
          return (Utils.eq(inst.value(lhsAttIndex), numericOperand));
        }
  /**
   * Compare two datasets to see if they differ.
   *
   * @param data1 one set of instances
   * @param data2 the other set of instances
   * @throws Exception if the datasets differ
   */
  protected void compareDatasets(Instances data1, Instances data2) throws Exception {

    if (m_CheckHeader) {
      if (!data2.equalHeaders(data1)) {
        throw new Exception("header has been modified\n" + data2.equalHeadersMsg(data1));
      }
    }
    if (!(data2.numInstances() == data1.numInstances())) {
      throw new Exception("number of instances has changed");
    }
    for (int i = 0; i < data2.numInstances(); i++) {
      Instance orig = data1.instance(i);
      Instance copy = data2.instance(i);
      for (int j = 0; j < orig.numAttributes(); j++) {
        if (orig.isMissing(j)) {
          if (!copy.isMissing(j)) {
            throw new Exception("instances have changed");
          }
        } else {
          if (m_CompareValuesAsString) {
            if (!orig.toString(j).equals(copy.toString(j))) {
              throw new Exception("instances have changed");
            }
          } else {
            if (Math.abs(orig.value(j) - copy.value(j)) > m_MaxDiffValues) {
              throw new Exception("instances have changed");
            }
          }
        }
        if (Math.abs(orig.weight() - copy.weight()) > m_MaxDiffWeights) {
          throw new Exception("instance weights have changed");
        }
      }
    }
  }
Beispiel #4
0
  /**
   * Convert a single instance over. The converted instance is added to the end of the output queue.
   *
   * @param instance the instance to convert
   */
  protected void convertInstance(Instance instance) {

    int index = 0;
    double[] vals = new double[outputFormatPeek().numAttributes()];
    // Copy and convert the values
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
      if (m_DiscretizeCols.isInRange(i) && getInputFormat().attribute(i).isNumeric()) {
        int j;
        double currentVal = instance.value(i);
        if (m_CutPoints[i] == null) {
          if (instance.isMissing(i)) {
            vals[index] = Utils.missingValue();
          } else {
            vals[index] = 0;
          }
          index++;
        } else {
          if (!m_MakeBinary) {
            if (instance.isMissing(i)) {
              vals[index] = Utils.missingValue();
            } else {
              for (j = 0; j < m_CutPoints[i].length; j++) {
                if (currentVal <= m_CutPoints[i][j]) {
                  break;
                }
              }
              vals[index] = j;
            }
            index++;
          } else {
            for (j = 0; j < m_CutPoints[i].length; j++) {
              if (instance.isMissing(i)) {
                vals[index] = Utils.missingValue();
              } else if (currentVal <= m_CutPoints[i][j]) {
                vals[index] = 0;
              } else {
                vals[index] = 1;
              }
              index++;
            }
          }
        }
      } else {
        vals[index] = instance.value(i);
        index++;
      }
    }

    Instance inst = null;
    if (instance instanceof SparseInstance) {
      inst = new SparseInstance(instance.weight(), vals);
    } else {
      inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat());
    inst.setDataset(getOutputFormat());
    push(inst);
  }
Beispiel #5
0
 public void testTypical() {
   m_Filter = getFilter("6,3");
   Instances result = useFilter();
   assertEquals(m_Instances.numAttributes() - 1, result.numAttributes());
   for (int i = 0; i < result.numInstances(); i++) {
     Instance orig = m_Instances.instance(i);
     if (orig.isMissing(5) || orig.isMissing(2)) {
       assertTrue("Instance " + (i + 1) + " should have been ?", result.instance(i).isMissing(4));
     } else {
       assertEquals(orig.value(5) - orig.value(2), result.instance(i).value(4), EXPR_DELTA);
     }
   }
 }
Beispiel #6
0
  /**
   * Input an instance for filtering. Ordinarily the instance is processed and made available for
   * output immediately. Some filters require all instances be read before producing output.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be collected with output().
   * @exception IllegalStateException if no input format has been defined.
   * @exception Exception if there was a problem during the filtering.
   */
  public boolean input(Instance instance) throws Exception {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }

    double[] vals = new double[instance.numAttributes() + 1];
    for (int i = 0; i < instance.numAttributes(); i++) {
      if (instance.isMissing(i)) {
        vals[i] = Instance.missingValue();
      } else {
        vals[i] = instance.value(i);
      }
    }

    evaluateExpression(vals);

    Instance inst = null;
    if (instance instanceof SparseInstance) {
      inst = new SparseInstance(instance.weight(), vals);
    } else {
      inst = new Instance(instance.weight(), vals);
    }
    copyStringValues(inst, false, instance.dataset(), getOutputFormat());
    inst.setDataset(getOutputFormat());
    push(inst);
    return true;
  }
  /**
   * Input an instance for filtering.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be collected with output().
   * @throws Exception if the input format was not set or the date format cannot be parsed
   */
  public boolean input(Instance instance) throws Exception {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }
    Instance newInstance = (Instance) instance.copy();
    int index = m_AttIndex.getIndex();
    if (!newInstance.isMissing(index)) {
      double value = instance.value(index);
      try {
        // Format and parse under the new format to force any required
        // loss in precision.
        value = m_OutputAttribute.parseDate(m_OutputAttribute.formatDate(value));
      } catch (ParseException pe) {
        throw new RuntimeException("Output date format couldn't parse its own output!!");
      }
      newInstance.setValue(index, value);
    }
    push(newInstance);
    return true;
  }
Beispiel #8
0
  /** Computes average class values for each attribute and value */
  private void computeAverageClassValues() {

    double totalCounts, sum;
    Instance instance;
    double[] counts;

    double[][] avgClassValues = new double[getInputFormat().numAttributes()][0];
    m_Indices = new int[getInputFormat().numAttributes()][0];
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (att.isNominal()) {
        avgClassValues[j] = new double[att.numValues()];
        counts = new double[att.numValues()];
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
          instance = getInputFormat().instance(i);
          if (!instance.classIsMissing() && (!instance.isMissing(j))) {
            counts[(int) instance.value(j)] += instance.weight();
            avgClassValues[j][(int) instance.value(j)] += instance.weight() * instance.classValue();
          }
        }
        sum = Utils.sum(avgClassValues[j]);
        totalCounts = Utils.sum(counts);
        if (Utils.gr(totalCounts, 0)) {
          for (int k = 0; k < att.numValues(); k++) {
            if (Utils.gr(counts[k], 0)) {
              avgClassValues[j][k] /= counts[k];
            } else {
              avgClassValues[j][k] = sum / totalCounts;
            }
          }
        }
        m_Indices[j] = Utils.sort(avgClassValues[j]);
      }
    }
  }
Beispiel #9
0
        @Override
        boolean evaluate(
            Instance inst,
            int lhsAttIndex,
            String rhsOperand,
            double numericOperand,
            Pattern regexPattern,
            boolean rhsIsAttribute,
            int rhsAttIndex) {

          if (inst.isMissing(lhsAttIndex)) {
            return false;
          }

          if (regexPattern == null) {
            return false;
          }

          String lhsString = "";
          try {
            lhsString = inst.stringValue(lhsAttIndex);
          } catch (IllegalArgumentException ex) {
            return false;
          }

          return regexPattern.matcher(lhsString).matches();
        }
  /**
   * Convert an input instance
   *
   * @param current the input instance to convert
   * @return a transformed instance
   * @throws Exception if a problem occurs
   */
  protected Instance convertInstance(Instance current) throws Exception {
    double[] vals = new double[getOutputFormat().numAttributes()];
    int index = 0;
    for (int j = 0; j < current.numAttributes(); j++) {
      if (j != current.classIndex()) {
        if (m_unchanged != null && m_unchanged.attribute(current.attribute(j).name()) != null) {
          vals[index++] = current.value(j);
        } else {
          Estimator[] estForAtt = m_estimatorLookup.get(current.attribute(j).name());
          for (int k = 0; k < current.classAttribute().numValues(); k++) {
            if (current.isMissing(j)) {
              vals[index++] = Utils.missingValue();
            } else {
              double e = estForAtt[k].getProbability(current.value(j));
              vals[index++] = e;
            }
          }
        }
      }
    }

    vals[vals.length - 1] = current.classValue();
    DenseInstance instNew = new DenseInstance(current.weight(), vals);

    return instNew;
  }
  public double ExpectedClassificationError(Instances pool, int attr_i) {

    // initialize alpha's to one
    int alpha[][][];
    int NumberOfFeatures = pool.numAttributes() - 1;
    int NumberOfLabels = pool.numClasses();

    alpha = new int[NumberOfFeatures][NumberOfLabels][];
    for (int i = 0; i < NumberOfFeatures; i++)
      for (int j = 0; j < NumberOfLabels; j++) alpha[i][j] = new int[pool.attribute(i).numValues()];

    for (int i = 0; i < NumberOfFeatures; i++)
      for (int j = 0; j < NumberOfLabels; j++)
        for (int k = 0; k < alpha[i][j].length; k++) alpha[i][j][k] = 1;

    // construct alpha's
    for (int i = 0; i < NumberOfFeatures; i++) // for each attribute
    {
      if (i == pool.classIndex()) // skip the class attribute
      i++;
      for (Enumeration<Instance> e = pool.enumerateInstances();
          e.hasMoreElements(); ) // for each instance
      {
        Instance inst = e.nextElement();
        if (!inst.isMissing(i)) // if attribute i is not missing (i.e. its been bought)
        {
          int j = (int) inst.classValue();
          int k = (int) inst.value(i);
          alpha[i][j][k]++;
        }
      }
    }
    return ExpectedClassificationError(alpha, attr_i);
  }
Beispiel #12
0
  /**
   * Calculates the class membership probabilities for the given test instance.
   *
   * @param instance the instance to be classified
   * @return predicted class probability distribution
   * @exception Exception if distribution can't be computed
   */
  @Override
  public double[] distributionForInstance(Instance instance) throws Exception {

    double[] probs = new double[instance.numClasses()];
    int attIndex;

    for (int j = 0; j < instance.numClasses(); j++) {
      probs[j] = 1;
      Enumeration<Attribute> enumAtts = instance.enumerateAttributes();
      attIndex = 0;
      while (enumAtts.hasMoreElements()) {
        Attribute attribute = enumAtts.nextElement();
        if (!instance.isMissing(attribute)) {
          if (attribute.isNominal()) {
            probs[j] *= m_Counts[j][attIndex][(int) instance.value(attribute)];
          } else {
            probs[j] *=
                normalDens(instance.value(attribute), m_Means[j][attIndex], m_Devs[j][attIndex]);
          }
        }
        attIndex++;
      }
      probs[j] *= m_Priors[j];
    }

    // Normalize probabilities
    Utils.normalize(probs);

    return probs;
  }
 public int SelectRow_First(Instances pool, int desiredAttr, int desiredLabel) {
   // buy the desiredAttr-th attribute of an (the first) instance with label argmin_j;
   for (int i = 0; i < pool.numInstances(); i++) {
     Instance inst = pool.instance(i);
     if ((int) inst.classValue() == desiredLabel && inst.isMissing(desiredAttr)) return i;
   }
   return -1;
 }
 public int SelectRow_First(Instances pool, int desiredAttr) {
   // buy the desiredAttr-th attribute of an (the first) instance regardless of label
   for (int i = 0; i < pool.numInstances(); i++) {
     Instance inst = pool.instance(i);
     if (inst.isMissing(desiredAttr)) return i;
   }
   return -1;
 }
  public int SelectRow_KLDivergenceMisclassified(
      Instances pool, Classifier myEstimator, int desiredAttr) {

    // for each instance with unbought desiredAttr and label = desiredLabel
    // measure KL-divergence (relative entropy between two prob distributions):
    //  KL(P||Q) = sum_i  p_i log (p_i/q_i)
    // withr respect to Q = Uniform, we have
    //  KL(P||U) = sum_i p_i log(p_i)
    // choose (row) that is minimum (i.e. closest to uniform)

    int numInstances = pool.numInstances();
    double[] KLDivs = new double[numInstances];
    boolean[] isValidInstance = new boolean[numInstances];
    boolean misclassified = false;
    double[] probs = null;
    Instance inst;

    for (int i = 0; i < numInstances; i++) {
      inst = pool.instance(i);
      try {
        if (inst.classValue() != myEstimator.classifyInstance(inst)) misclassified = true;
        else misclassified = false;
      } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }
      if (inst.isMissing(desiredAttr) && misclassified) {
        try {
          probs = myEstimator.distributionForInstance(inst);
        } catch (Exception e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
        for (int j = 0; j < probs.length; j++) KLDivs[i] += MyXLogX(probs[j]);
        isValidInstance[i] = true;
      } else {
        KLDivs[i] = Double.MAX_VALUE;
        isValidInstance[i] = false;
      }
    }

    double leastDivergence = KLDivs[Utils.minIndex(KLDivs)];
    int numLeastDivs = 0;
    for (int i = 0; i < numInstances; i++)
      if (isValidInstance[i] && KLDivs[i] == leastDivergence) numLeastDivs++;
    int randomInstance = r.nextInt(numLeastDivs);
    int index = 0;
    for (int i = 0; i < numInstances; i++) {
      if (isValidInstance[i] && KLDivs[i] == leastDivergence) {
        if (index == randomInstance) return i;
        else index++;
      }
    }
    return -1;
  }
Beispiel #16
0
  /**
   * Returns index of subset instance is assigned to. Returns -1 if instance is assigned to more
   * than one subset.
   *
   * @exception Exception if something goes wrong
   */
  public final int whichSubset(Instance instance) throws Exception {

    if (instance.isMissing(m_attIndex)) return -1;
    else {
      if (instance.attribute(m_attIndex).isNominal()) {
        if ((int) m_splitPoint == (int) instance.value(m_attIndex)) return 0;
        else return 1;
      } else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint)) return 0;
      else return 1;
    }
  }
  public double classifyInstance(Instance inst) throws Exception {

    if (m_attribute == null) {
      return m_intercept;
    } else {
      if (inst.isMissing(m_attribute.index())) {
        throw new Exception("UnivariateLinearRegression: No missing values!");
      }
      return m_intercept + m_slope * inst.value(m_attribute.index());
    }
  }
  /**
   * Determines the output format based on the input format and returns this. In case the output
   * format cannot be returned immediately, i.e., immediateOutputFormat() returns false, then this
   * method will be called from batchFinished().
   *
   * @param inputFormat the input format to base the output format on
   * @return the output format
   * @throws Exception in case the determination goes wrong
   * @see #hasImmediateOutputFormat()
   * @see #batchFinished()
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {

    Instances data;
    Instances result;
    FastVector atts;
    FastVector values;
    HashSet hash;
    int i;
    int n;
    boolean isDate;
    Instance inst;
    Vector sorted;

    m_Cols.setUpper(inputFormat.numAttributes() - 1);
    data = new Instances(inputFormat);
    atts = new FastVector();
    for (i = 0; i < data.numAttributes(); i++) {
      if (!m_Cols.isInRange(i) || !data.attribute(i).isNumeric()) {
        atts.addElement(data.attribute(i));
        continue;
      }

      // date attribute?
      isDate = (data.attribute(i).type() == Attribute.DATE);

      // determine all available attribtues in dataset
      hash = new HashSet();
      for (n = 0; n < data.numInstances(); n++) {
        inst = data.instance(n);
        if (inst.isMissing(i)) continue;

        if (isDate) hash.add(inst.stringValue(i));
        else hash.add(new Double(inst.value(i)));
      }

      // sort values
      sorted = new Vector();
      for (Object o : hash) sorted.add(o);
      Collections.sort(sorted);

      // create attribute from sorted values
      values = new FastVector();
      for (Object o : sorted) {
        if (isDate) values.addElement(o.toString());
        else values.addElement(Utils.doubleToString(((Double) o).doubleValue(), MAX_DECIMALS));
      }
      atts.addElement(new Attribute(data.attribute(i).name(), values));
    }

    result = new Instances(inputFormat.relationName(), atts, 0);
    result.setClassIndex(inputFormat.classIndex());

    return result;
  }
Beispiel #19
0
  /**
   * method to set a new value
   *
   * @param r random function
   * @param numOfValues
   * @param instance
   * @param useMissing
   */
  private void changeValueRandomly(
      Random r, int numOfValues, int indexOfAtt, Instance instance, boolean useMissing) {
    int currValue;

    // get current value
    // if value is missing set current value to number of values
    // whiche is the highest possible value plus one
    if (instance.isMissing(indexOfAtt)) {
      currValue = numOfValues;
    } else {
      currValue = (int) instance.value(indexOfAtt);
    }

    // with only two possible values it is easier
    if ((numOfValues == 2) && (!instance.isMissing(indexOfAtt))) {
      instance.setValue(indexOfAtt, (double) ((currValue + 1) % 2));
    } else {
      // get randomly a new value not equal to the current value
      // if missing values are used as values they must be treated
      // in a special way
      while (true) {
        int newValue;
        if (useMissing) {
          newValue = (int) (r.nextDouble() * (double) (numOfValues + 1));
        } else {
          newValue = (int) (r.nextDouble() * (double) numOfValues);
        }
        // have we found a new value?
        if (newValue != currValue) {
          // the value 1 above the highest possible value (=numOfValues)
          // is used as missing value
          if (newValue == numOfValues) {
            instance.setMissing(indexOfAtt);
          } else {
            instance.setValue(indexOfAtt, (double) newValue);
          }
          break;
        }
      }
    }
  }
Beispiel #20
0
        @Override
        boolean evaluate(
            Instance inst,
            int lhsAttIndex,
            String rhsOperand,
            double numericOperand,
            Pattern regexPattern,
            boolean rhsIsAttribute,
            int rhsAttIndex) {

          return (inst.isMissing(lhsAttIndex));
        }
Beispiel #21
0
  /**
   * Constructs an instance suitable for passing to the model for scoring
   *
   * @param incoming the incoming instance
   * @return an instance with values mapped to be consistent with what the model is expecting
   */
  protected Instance mapIncomingFieldsToModelFields(Instance incoming) {
    Instances modelHeader = m_model.getHeader();
    double[] vals = new double[modelHeader.numAttributes()];

    for (int i = 0; i < modelHeader.numAttributes(); i++) {

      if (m_attributeMap[i] < 0) {
        // missing or type mismatch
        vals[i] = Utils.missingValue();
        continue;
      }

      Attribute modelAtt = modelHeader.attribute(i);
      Attribute incomingAtt = incoming.dataset().attribute(m_attributeMap[i]);

      if (incoming.isMissing(incomingAtt.index())) {
        vals[i] = Utils.missingValue();
        continue;
      }

      if (modelAtt.isNumeric()) {
        vals[i] = incoming.value(m_attributeMap[i]);
      } else if (modelAtt.isNominal()) {
        String incomingVal = incoming.stringValue(m_attributeMap[i]);
        int modelIndex = modelAtt.indexOfValue(incomingVal);

        if (modelIndex < 0) {
          vals[i] = Utils.missingValue();
        } else {
          vals[i] = modelIndex;
        }
      } else if (modelAtt.isString()) {
        vals[i] = 0;
        modelAtt.setStringValue(incoming.stringValue(m_attributeMap[i]));
      }
    }

    if (modelHeader.classIndex() >= 0) {
      // set class to missing value
      vals[modelHeader.classIndex()] = Utils.missingValue();
    }

    Instance newInst = null;
    if (incoming instanceof SparseInstance) {
      newInst = new SparseInstance(incoming.weight(), vals);
    } else {
      newInst = new DenseInstance(incoming.weight(), vals);
    }

    newInst.setDataset(modelHeader);
    return newInst;
  }
  public int SelectRow_Random(Instances pool, int desiredAttr) {
    // randomly select among instances with
    //  -unbought desiredAttr and
    //  -desiredLabel
    int numberValidInstances = 0;
    for (int i = 0; i < pool.numInstances(); i++) {
      Instance inst = pool.instance(i);

      if (inst.isMissing(desiredAttr)) numberValidInstances++;
    }
    if (numberValidInstances == 0) return -1;
    int randomInstance = r.nextInt(numberValidInstances);
    int index = 0;
    for (int i = 0; i < pool.numInstances(); i++) {
      Instance inst = pool.instance(i);
      if (inst.isMissing(desiredAttr)) {
        if (index == randomInstance) return i;
        else index++;
      }
    }
    return -1;
  }
  /**
   * Compare two datasets to see if they differ.
   *
   * @param data1 one set of instances
   * @param data2 the other set of instances
   * @throws Exception if the datasets differ
   */
  protected void compareDatasets(Instances data1, Instances data2) throws Exception {

    if (data1.numAttributes() != data2.numAttributes())
      throw new Exception("number of attributes has changed");

    if (!(data2.numInstances() == data1.numInstances()))
      throw new Exception("number of instances has changed");

    for (int i = 0; i < data2.numInstances(); i++) {
      Instance orig = data1.instance(i);
      Instance copy = data2.instance(i);
      for (int j = 0; j < orig.numAttributes(); j++) {
        if (orig.isMissing(j)) {
          if (!copy.isMissing(j)) throw new Exception("instances have changed");
        } else if (!orig.toString(j).equals(copy.toString(j))) {
          throw new Exception("instances have changed");
        }

        if (orig.weight() != copy.weight()) throw new Exception("instance weights have changed");
      }
    }
  }
Beispiel #24
0
  /**
   * Checks if an instance contains an item set.
   *
   * @param instance the instance to be tested
   * @return true if the given instance contains this item set
   */
  public boolean containedByTreatZeroAsMissing(Instance instance) {

    if (instance instanceof weka.core.SparseInstance) {
      int numInstVals = instance.numValues();
      int numItemSetVals = m_items.length;

      for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals; ) {
        int instIndex = Integer.MAX_VALUE;
        if (p1 < numInstVals) {
          instIndex = instance.index(p1);
        }
        int itemIndex = p2;

        if (m_items[itemIndex] > -1) {
          if (itemIndex != instIndex) {
            return false;
          } else {
            if (instance.isMissingSparse(p1)) {
              return false;
            }
            if (m_items[itemIndex] != (int) instance.valueSparse(p1)) {
              return false;
            }
          }

          p1++;
          p2++;
        } else {
          if (itemIndex < instIndex) {
            p2++;
          } else if (itemIndex == instIndex) {
            p2++;
            p1++;
          }
        }
      }
    } else {
      for (int i = 0; i < instance.numAttributes(); i++) {
        if (m_items[i] > -1) {
          if (instance.isMissing(i) || (int) instance.value(i) == 0) {
            return false;
          }
          if (m_items[i] != (int) instance.value(i)) {
            return false;
          }
        }
      }
    }

    return true;
  }
Beispiel #25
0
  /**
   * Returns weights if instance is assigned to more than one subset. Returns null if instance is
   * only assigned to one subset.
   */
  public final double[] weights(Instance instance) {

    double[] weights;
    int i;

    if (instance.isMissing(m_attIndex)) {
      weights = new double[m_numSubsets];
      for (i = 0; i < m_numSubsets; i++)
        weights[i] = m_distribution.perBag(i) / m_distribution.total();
      return weights;
    } else {
      return null;
    }
  }
Beispiel #26
0
  /**
   * Gets the subset of instances that apply to a particluar branch of the split. If the branch
   * index is -1, the subset will consist of those instances that don't apply to any branch.
   *
   * @param branch the index of the branch
   * @param sourceInstances the instances from which to find the subset
   * @return the set of instances that apply
   */
  public ReferenceInstances instancesDownBranch(int branch, Instances instances) {

    ReferenceInstances filteredInstances = new ReferenceInstances(instances, 1);
    if (branch == -1) {
      for (Enumeration e = instances.enumerateInstances(); e.hasMoreElements(); ) {
        Instance inst = (Instance) e.nextElement();
        if (inst.isMissing(attIndex)) filteredInstances.addReference(inst);
      }
    } else if (branch == 0) {
      for (Enumeration e = instances.enumerateInstances(); e.hasMoreElements(); ) {
        Instance inst = (Instance) e.nextElement();
        if (!inst.isMissing(attIndex) && inst.value(attIndex) < splitPoint)
          filteredInstances.addReference(inst);
      }
    } else {
      for (Enumeration e = instances.enumerateInstances(); e.hasMoreElements(); ) {
        Instance inst = (Instance) e.nextElement();
        if (!inst.isMissing(attIndex) && inst.value(attIndex) >= splitPoint)
          filteredInstances.addReference(inst);
      }
    }
    return filteredInstances;
  }
Beispiel #27
0
  /**
   * Checks if an instance contains an item set.
   *
   * @param instance the instance to be tested
   * @return true if the given instance contains this item set
   */
  public boolean containedBy(Instance instance) {
    for (int i = 0; i < instance.numAttributes(); i++) {
      if (m_items[i] > -1) {
        if (instance.isMissing(i)) {
          return false;
        }
        if (m_items[i] != (int) instance.value(i)) {
          return false;
        }
      }
    }

    return true;
  }
  int SelectRow_ErrorMargin(Instances pool, Classifier myEstimator, int desiredAttr) {

    // for each instance with unbought desiredAttr and label = desiredLabel
    // measure Prob(i,L(i)) the class probability of the true label, choose the one minimizing it.
    // i.e. the most erroneous instance

    int numInstances = pool.numInstances();
    double[] classProb = new double[numInstances];
    boolean[] isValidInstance = new boolean[numInstances];
    double[] probs = null;
    Instance inst;

    for (int i = 0; i < numInstances; i++) {
      inst = pool.instance(i);
      if (inst.isMissing(desiredAttr)) {
        try {
          probs = myEstimator.distributionForInstance(inst);
          classProb[i] = probs[(int) inst.classValue()];
          isValidInstance[i] = true;

        } catch (Exception e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }

      } else {
        classProb[i] = Double.POSITIVE_INFINITY;
        isValidInstance[i] = false;
      }
    }

    double leastCorrect = classProb[Utils.minIndex(classProb)];
    int numLeastCorrect = 0;
    for (int i = 0; i < numInstances; i++) {
      if (isValidInstance[i] && classProb[i] == leastCorrect) numLeastCorrect++;
    }

    int randomInstance = r.nextInt(numLeastCorrect);
    int index = 0;

    for (int i = 0; i < numInstances; i++) {
      if (isValidInstance[i] && classProb[i] == leastCorrect) {
        if (index == randomInstance) return i;
        else index++;
      }
    }
    return -1;
  }
Beispiel #29
0
  /**
   * Convert a single instance over if the class is nominal. The converted instance is added to the
   * end of the output queue.
   *
   * @param instance the instance to convert
   */
  private void convertInstanceNominal(Instance instance) {

    if (!m_needToTransform) {
      push(instance);
      return;
    }

    double[] vals = new double[outputFormatPeek().numAttributes()];
    int attSoFar = 0;

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if ((!att.isNominal()) || (j == getInputFormat().classIndex())) {
        vals[attSoFar] = instance.value(j);
        attSoFar++;
      } else {
        if ((att.numValues() <= 2) && (!m_TransformAll)) {
          vals[attSoFar] = instance.value(j);
          attSoFar++;
        } else {
          if (instance.isMissing(j)) {
            for (int k = 0; k < att.numValues(); k++) {
              vals[attSoFar + k] = instance.value(j);
            }
          } else {
            for (int k = 0; k < att.numValues(); k++) {
              if (k == (int) instance.value(j)) {
                vals[attSoFar + k] = 1;
              } else {
                vals[attSoFar + k] = 0;
              }
            }
          }
          attSoFar += att.numValues();
        }
      }
    }
    Instance inst = null;
    if (instance instanceof SparseInstance) {
      inst = new SparseInstance(instance.weight(), vals);
    } else {
      inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat());
    inst.setDataset(getOutputFormat());
    push(inst);
  }
  /**
   * Computes class distribution of an instance using the FastRandomTree.
   *
   * <p>In Weka's RandomTree, the distributions were normalized so that all probabilities sum to 1;
   * this would abolish the effect of instance weights on voting. In FastRandomForest 0.97 onwards,
   * the distributions are normalized by dividing with the number of instances going into a leaf.
   *
   * <p>
   *
   * @param instance the instance to compute the distribution for
   * @return the computed class distribution
   * @throws Exception if computation fails
   */
  @Override
  public double[] distributionForInstance(Instance instance) throws Exception {

    double[] returnedDist = null;

    if (m_Attribute > -1) { // ============================ node is not a leaf

      if (instance.isMissing(m_Attribute)) { // ---------------- missing value

        returnedDist = new double[m_MotherForest.getM_Info().numClasses()];
        // split instance up
        for (int i = 0; i < m_Successors.length; i++) {
          double[] help = m_Successors[i].distributionForInstance(instance);
          if (help != null) {
            for (int j = 0; j < help.length; j++) {
              returnedDist[j] += m_Prop[i] * help[j];
            }
          }
        }

      } else if (m_MotherForest.getM_Info().attribute(m_Attribute).isNominal()) { // ------ nominal

        // returnedDist = m_Successors[(int) instance.value(m_Attribute)]
        //        .distributionForInstance(instance);

        // 0.99: new - binary splits (also) for nominal attributes
        if (instance.value(m_Attribute) == m_SplitPoint) {
          returnedDist = m_Successors[0].distributionForInstance(instance);
        } else {
          returnedDist = m_Successors[1].distributionForInstance(instance);
        }

      } else { // ------------------------------------------ numeric attributes

        if (instance.value(m_Attribute) < m_SplitPoint) {
          returnedDist = m_Successors[0].distributionForInstance(instance);
        } else {
          returnedDist = m_Successors[1].distributionForInstance(instance);
        }
      }

      return returnedDist;

    } else { // =============================================== node is a leaf

      return m_ClassProbs;
    }
  }