/** * Process a classifier's prediction for an instance and update a set of plotting instances and * additional plotting info. m_PlotShape for nominal class datasets holds shape types (actual data * points have automatic shape type assignment; classifier error data points have box shape type). * For numeric class datasets, the actual data points are stored in m_PlotInstances and m_PlotSize * stores the error (which is later converted to shape size values). * * @param toPredict the actual data point * @param classifier the classifier * @param eval the evaluation object to use for evaluating the classifier on the instance to * predict * @see #m_PlotShapes * @see #m_PlotSizes * @see #m_PlotInstances */ public void process(Instance toPredict, Classifier classifier, Evaluation eval) { double pred; double[] values; int i; try { pred = eval.evaluateModelOnceAndRecordPrediction(classifier, toPredict); if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) { toPredict = ((weka.classifiers.misc.InputMappedClassifier) classifier) .constructMappedInstance(toPredict); } if (!m_SaveForVisualization) return; if (m_PlotInstances != null) { values = new double[m_PlotInstances.numAttributes()]; for (i = 0; i < m_PlotInstances.numAttributes(); i++) { if (i < toPredict.classIndex()) { values[i] = toPredict.value(i); } else if (i == toPredict.classIndex()) { values[i] = pred; values[i + 1] = toPredict.value(i); i++; } else { values[i] = toPredict.value(i - 1); } } m_PlotInstances.add(new DenseInstance(1.0, values)); if (toPredict.classAttribute().isNominal()) { if (toPredict.isMissing(toPredict.classIndex()) || Utils.isMissingValue(pred)) { m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE)); } else if (pred != toPredict.classValue()) { // set to default error point shape m_PlotShapes.addElement(new Integer(Plot2D.ERROR_SHAPE)); } else { // otherwise set to constant (automatically assigned) point shape m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE)); } m_PlotSizes.addElement(new Integer(Plot2D.DEFAULT_SHAPE_SIZE)); } else { // store the error (to be converted to a point size later) Double errd = null; if (!toPredict.isMissing(toPredict.classIndex()) && !Utils.isMissingValue(pred)) { errd = new Double(pred - toPredict.classValue()); m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE)); } else { // missing shape if actual class not present or prediction is missing m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE)); } m_PlotSizes.addElement(errd); } } } catch (Exception ex) { ex.printStackTrace(); } }
@Override boolean evaluate( Instance inst, int lhsAttIndex, String rhsOperand, double numericOperand, Pattern regexPattern, boolean rhsIsAttribute, int rhsAttIndex) { if (rhsIsAttribute) { if (inst.isMissing(lhsAttIndex) && inst.isMissing(rhsAttIndex)) { return true; } if (inst.isMissing(lhsAttIndex) || inst.isMissing(rhsAttIndex)) { return false; } return Utils.eq(inst.value(lhsAttIndex), inst.value(rhsAttIndex)); } if (inst.isMissing(lhsAttIndex)) { return false; } return (Utils.eq(inst.value(lhsAttIndex), numericOperand)); }
/** * Compare two datasets to see if they differ. * * @param data1 one set of instances * @param data2 the other set of instances * @throws Exception if the datasets differ */ protected void compareDatasets(Instances data1, Instances data2) throws Exception { if (m_CheckHeader) { if (!data2.equalHeaders(data1)) { throw new Exception("header has been modified\n" + data2.equalHeadersMsg(data1)); } } if (!(data2.numInstances() == data1.numInstances())) { throw new Exception("number of instances has changed"); } for (int i = 0; i < data2.numInstances(); i++) { Instance orig = data1.instance(i); Instance copy = data2.instance(i); for (int j = 0; j < orig.numAttributes(); j++) { if (orig.isMissing(j)) { if (!copy.isMissing(j)) { throw new Exception("instances have changed"); } } else { if (m_CompareValuesAsString) { if (!orig.toString(j).equals(copy.toString(j))) { throw new Exception("instances have changed"); } } else { if (Math.abs(orig.value(j) - copy.value(j)) > m_MaxDiffValues) { throw new Exception("instances have changed"); } } } if (Math.abs(orig.weight() - copy.weight()) > m_MaxDiffWeights) { throw new Exception("instance weights have changed"); } } } }
/** * Convert a single instance over. The converted instance is added to the end of the output queue. * * @param instance the instance to convert */ protected void convertInstance(Instance instance) { int index = 0; double[] vals = new double[outputFormatPeek().numAttributes()]; // Copy and convert the values for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (m_DiscretizeCols.isInRange(i) && getInputFormat().attribute(i).isNumeric()) { int j; double currentVal = instance.value(i); if (m_CutPoints[i] == null) { if (instance.isMissing(i)) { vals[index] = Utils.missingValue(); } else { vals[index] = 0; } index++; } else { if (!m_MakeBinary) { if (instance.isMissing(i)) { vals[index] = Utils.missingValue(); } else { for (j = 0; j < m_CutPoints[i].length; j++) { if (currentVal <= m_CutPoints[i][j]) { break; } } vals[index] = j; } index++; } else { for (j = 0; j < m_CutPoints[i].length; j++) { if (instance.isMissing(i)) { vals[index] = Utils.missingValue(); } else if (currentVal <= m_CutPoints[i][j]) { vals[index] = 0; } else { vals[index] = 1; } index++; } } } } else { vals[index] = instance.value(i); index++; } } Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); inst.setDataset(getOutputFormat()); push(inst); }
public void testTypical() { m_Filter = getFilter("6,3"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes() - 1, result.numAttributes()); for (int i = 0; i < result.numInstances(); i++) { Instance orig = m_Instances.instance(i); if (orig.isMissing(5) || orig.isMissing(2)) { assertTrue("Instance " + (i + 1) + " should have been ?", result.instance(i).isMissing(4)); } else { assertEquals(orig.value(5) - orig.value(2), result.instance(i).value(4), EXPR_DELTA); } } }
/** * Input an instance for filtering. Ordinarily the instance is processed and made available for * output immediately. Some filters require all instances be read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @exception IllegalStateException if no input format has been defined. * @exception Exception if there was a problem during the filtering. */ public boolean input(Instance instance) throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } double[] vals = new double[instance.numAttributes() + 1]; for (int i = 0; i < instance.numAttributes(); i++) { if (instance.isMissing(i)) { vals[i] = Instance.missingValue(); } else { vals[i] = instance.value(i); } } evaluateExpression(vals); Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new Instance(instance.weight(), vals); } copyStringValues(inst, false, instance.dataset(), getOutputFormat()); inst.setDataset(getOutputFormat()); push(inst); return true; }
/** * Input an instance for filtering. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @throws Exception if the input format was not set or the date format cannot be parsed */ public boolean input(Instance instance) throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } Instance newInstance = (Instance) instance.copy(); int index = m_AttIndex.getIndex(); if (!newInstance.isMissing(index)) { double value = instance.value(index); try { // Format and parse under the new format to force any required // loss in precision. value = m_OutputAttribute.parseDate(m_OutputAttribute.formatDate(value)); } catch (ParseException pe) { throw new RuntimeException("Output date format couldn't parse its own output!!"); } newInstance.setValue(index, value); } push(newInstance); return true; }
/** Computes average class values for each attribute and value */ private void computeAverageClassValues() { double totalCounts, sum; Instance instance; double[] counts; double[][] avgClassValues = new double[getInputFormat().numAttributes()][0]; m_Indices = new int[getInputFormat().numAttributes()][0]; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (att.isNominal()) { avgClassValues[j] = new double[att.numValues()]; counts = new double[att.numValues()]; for (int i = 0; i < getInputFormat().numInstances(); i++) { instance = getInputFormat().instance(i); if (!instance.classIsMissing() && (!instance.isMissing(j))) { counts[(int) instance.value(j)] += instance.weight(); avgClassValues[j][(int) instance.value(j)] += instance.weight() * instance.classValue(); } } sum = Utils.sum(avgClassValues[j]); totalCounts = Utils.sum(counts); if (Utils.gr(totalCounts, 0)) { for (int k = 0; k < att.numValues(); k++) { if (Utils.gr(counts[k], 0)) { avgClassValues[j][k] /= counts[k]; } else { avgClassValues[j][k] = sum / totalCounts; } } } m_Indices[j] = Utils.sort(avgClassValues[j]); } } }
@Override boolean evaluate( Instance inst, int lhsAttIndex, String rhsOperand, double numericOperand, Pattern regexPattern, boolean rhsIsAttribute, int rhsAttIndex) { if (inst.isMissing(lhsAttIndex)) { return false; } if (regexPattern == null) { return false; } String lhsString = ""; try { lhsString = inst.stringValue(lhsAttIndex); } catch (IllegalArgumentException ex) { return false; } return regexPattern.matcher(lhsString).matches(); }
/** * Convert an input instance * * @param current the input instance to convert * @return a transformed instance * @throws Exception if a problem occurs */ protected Instance convertInstance(Instance current) throws Exception { double[] vals = new double[getOutputFormat().numAttributes()]; int index = 0; for (int j = 0; j < current.numAttributes(); j++) { if (j != current.classIndex()) { if (m_unchanged != null && m_unchanged.attribute(current.attribute(j).name()) != null) { vals[index++] = current.value(j); } else { Estimator[] estForAtt = m_estimatorLookup.get(current.attribute(j).name()); for (int k = 0; k < current.classAttribute().numValues(); k++) { if (current.isMissing(j)) { vals[index++] = Utils.missingValue(); } else { double e = estForAtt[k].getProbability(current.value(j)); vals[index++] = e; } } } } } vals[vals.length - 1] = current.classValue(); DenseInstance instNew = new DenseInstance(current.weight(), vals); return instNew; }
public double ExpectedClassificationError(Instances pool, int attr_i) { // initialize alpha's to one int alpha[][][]; int NumberOfFeatures = pool.numAttributes() - 1; int NumberOfLabels = pool.numClasses(); alpha = new int[NumberOfFeatures][NumberOfLabels][]; for (int i = 0; i < NumberOfFeatures; i++) for (int j = 0; j < NumberOfLabels; j++) alpha[i][j] = new int[pool.attribute(i).numValues()]; for (int i = 0; i < NumberOfFeatures; i++) for (int j = 0; j < NumberOfLabels; j++) for (int k = 0; k < alpha[i][j].length; k++) alpha[i][j][k] = 1; // construct alpha's for (int i = 0; i < NumberOfFeatures; i++) // for each attribute { if (i == pool.classIndex()) // skip the class attribute i++; for (Enumeration<Instance> e = pool.enumerateInstances(); e.hasMoreElements(); ) // for each instance { Instance inst = e.nextElement(); if (!inst.isMissing(i)) // if attribute i is not missing (i.e. its been bought) { int j = (int) inst.classValue(); int k = (int) inst.value(i); alpha[i][j][k]++; } } } return ExpectedClassificationError(alpha, attr_i); }
/** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @exception Exception if distribution can't be computed */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] probs = new double[instance.numClasses()]; int attIndex; for (int j = 0; j < instance.numClasses(); j++) { probs[j] = 1; Enumeration<Attribute> enumAtts = instance.enumerateAttributes(); attIndex = 0; while (enumAtts.hasMoreElements()) { Attribute attribute = enumAtts.nextElement(); if (!instance.isMissing(attribute)) { if (attribute.isNominal()) { probs[j] *= m_Counts[j][attIndex][(int) instance.value(attribute)]; } else { probs[j] *= normalDens(instance.value(attribute), m_Means[j][attIndex], m_Devs[j][attIndex]); } } attIndex++; } probs[j] *= m_Priors[j]; } // Normalize probabilities Utils.normalize(probs); return probs; }
public int SelectRow_First(Instances pool, int desiredAttr, int desiredLabel) { // buy the desiredAttr-th attribute of an (the first) instance with label argmin_j; for (int i = 0; i < pool.numInstances(); i++) { Instance inst = pool.instance(i); if ((int) inst.classValue() == desiredLabel && inst.isMissing(desiredAttr)) return i; } return -1; }
public int SelectRow_First(Instances pool, int desiredAttr) { // buy the desiredAttr-th attribute of an (the first) instance regardless of label for (int i = 0; i < pool.numInstances(); i++) { Instance inst = pool.instance(i); if (inst.isMissing(desiredAttr)) return i; } return -1; }
public int SelectRow_KLDivergenceMisclassified( Instances pool, Classifier myEstimator, int desiredAttr) { // for each instance with unbought desiredAttr and label = desiredLabel // measure KL-divergence (relative entropy between two prob distributions): // KL(P||Q) = sum_i p_i log (p_i/q_i) // withr respect to Q = Uniform, we have // KL(P||U) = sum_i p_i log(p_i) // choose (row) that is minimum (i.e. closest to uniform) int numInstances = pool.numInstances(); double[] KLDivs = new double[numInstances]; boolean[] isValidInstance = new boolean[numInstances]; boolean misclassified = false; double[] probs = null; Instance inst; for (int i = 0; i < numInstances; i++) { inst = pool.instance(i); try { if (inst.classValue() != myEstimator.classifyInstance(inst)) misclassified = true; else misclassified = false; } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } if (inst.isMissing(desiredAttr) && misclassified) { try { probs = myEstimator.distributionForInstance(inst); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } for (int j = 0; j < probs.length; j++) KLDivs[i] += MyXLogX(probs[j]); isValidInstance[i] = true; } else { KLDivs[i] = Double.MAX_VALUE; isValidInstance[i] = false; } } double leastDivergence = KLDivs[Utils.minIndex(KLDivs)]; int numLeastDivs = 0; for (int i = 0; i < numInstances; i++) if (isValidInstance[i] && KLDivs[i] == leastDivergence) numLeastDivs++; int randomInstance = r.nextInt(numLeastDivs); int index = 0; for (int i = 0; i < numInstances; i++) { if (isValidInstance[i] && KLDivs[i] == leastDivergence) { if (index == randomInstance) return i; else index++; } } return -1; }
/** * Returns index of subset instance is assigned to. Returns -1 if instance is assigned to more * than one subset. * * @exception Exception if something goes wrong */ public final int whichSubset(Instance instance) throws Exception { if (instance.isMissing(m_attIndex)) return -1; else { if (instance.attribute(m_attIndex).isNominal()) { if ((int) m_splitPoint == (int) instance.value(m_attIndex)) return 0; else return 1; } else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint)) return 0; else return 1; } }
public double classifyInstance(Instance inst) throws Exception { if (m_attribute == null) { return m_intercept; } else { if (inst.isMissing(m_attribute.index())) { throw new Exception("UnivariateLinearRegression: No missing values!"); } return m_intercept + m_slope * inst.value(m_attribute.index()); } }
/** * Determines the output format based on the input format and returns this. In case the output * format cannot be returned immediately, i.e., immediateOutputFormat() returns false, then this * method will be called from batchFinished(). * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong * @see #hasImmediateOutputFormat() * @see #batchFinished() */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances data; Instances result; FastVector atts; FastVector values; HashSet hash; int i; int n; boolean isDate; Instance inst; Vector sorted; m_Cols.setUpper(inputFormat.numAttributes() - 1); data = new Instances(inputFormat); atts = new FastVector(); for (i = 0; i < data.numAttributes(); i++) { if (!m_Cols.isInRange(i) || !data.attribute(i).isNumeric()) { atts.addElement(data.attribute(i)); continue; } // date attribute? isDate = (data.attribute(i).type() == Attribute.DATE); // determine all available attribtues in dataset hash = new HashSet(); for (n = 0; n < data.numInstances(); n++) { inst = data.instance(n); if (inst.isMissing(i)) continue; if (isDate) hash.add(inst.stringValue(i)); else hash.add(new Double(inst.value(i))); } // sort values sorted = new Vector(); for (Object o : hash) sorted.add(o); Collections.sort(sorted); // create attribute from sorted values values = new FastVector(); for (Object o : sorted) { if (isDate) values.addElement(o.toString()); else values.addElement(Utils.doubleToString(((Double) o).doubleValue(), MAX_DECIMALS)); } atts.addElement(new Attribute(data.attribute(i).name(), values)); } result = new Instances(inputFormat.relationName(), atts, 0); result.setClassIndex(inputFormat.classIndex()); return result; }
/** * method to set a new value * * @param r random function * @param numOfValues * @param instance * @param useMissing */ private void changeValueRandomly( Random r, int numOfValues, int indexOfAtt, Instance instance, boolean useMissing) { int currValue; // get current value // if value is missing set current value to number of values // whiche is the highest possible value plus one if (instance.isMissing(indexOfAtt)) { currValue = numOfValues; } else { currValue = (int) instance.value(indexOfAtt); } // with only two possible values it is easier if ((numOfValues == 2) && (!instance.isMissing(indexOfAtt))) { instance.setValue(indexOfAtt, (double) ((currValue + 1) % 2)); } else { // get randomly a new value not equal to the current value // if missing values are used as values they must be treated // in a special way while (true) { int newValue; if (useMissing) { newValue = (int) (r.nextDouble() * (double) (numOfValues + 1)); } else { newValue = (int) (r.nextDouble() * (double) numOfValues); } // have we found a new value? if (newValue != currValue) { // the value 1 above the highest possible value (=numOfValues) // is used as missing value if (newValue == numOfValues) { instance.setMissing(indexOfAtt); } else { instance.setValue(indexOfAtt, (double) newValue); } break; } } } }
@Override boolean evaluate( Instance inst, int lhsAttIndex, String rhsOperand, double numericOperand, Pattern regexPattern, boolean rhsIsAttribute, int rhsAttIndex) { return (inst.isMissing(lhsAttIndex)); }
/** * Constructs an instance suitable for passing to the model for scoring * * @param incoming the incoming instance * @return an instance with values mapped to be consistent with what the model is expecting */ protected Instance mapIncomingFieldsToModelFields(Instance incoming) { Instances modelHeader = m_model.getHeader(); double[] vals = new double[modelHeader.numAttributes()]; for (int i = 0; i < modelHeader.numAttributes(); i++) { if (m_attributeMap[i] < 0) { // missing or type mismatch vals[i] = Utils.missingValue(); continue; } Attribute modelAtt = modelHeader.attribute(i); Attribute incomingAtt = incoming.dataset().attribute(m_attributeMap[i]); if (incoming.isMissing(incomingAtt.index())) { vals[i] = Utils.missingValue(); continue; } if (modelAtt.isNumeric()) { vals[i] = incoming.value(m_attributeMap[i]); } else if (modelAtt.isNominal()) { String incomingVal = incoming.stringValue(m_attributeMap[i]); int modelIndex = modelAtt.indexOfValue(incomingVal); if (modelIndex < 0) { vals[i] = Utils.missingValue(); } else { vals[i] = modelIndex; } } else if (modelAtt.isString()) { vals[i] = 0; modelAtt.setStringValue(incoming.stringValue(m_attributeMap[i])); } } if (modelHeader.classIndex() >= 0) { // set class to missing value vals[modelHeader.classIndex()] = Utils.missingValue(); } Instance newInst = null; if (incoming instanceof SparseInstance) { newInst = new SparseInstance(incoming.weight(), vals); } else { newInst = new DenseInstance(incoming.weight(), vals); } newInst.setDataset(modelHeader); return newInst; }
public int SelectRow_Random(Instances pool, int desiredAttr) { // randomly select among instances with // -unbought desiredAttr and // -desiredLabel int numberValidInstances = 0; for (int i = 0; i < pool.numInstances(); i++) { Instance inst = pool.instance(i); if (inst.isMissing(desiredAttr)) numberValidInstances++; } if (numberValidInstances == 0) return -1; int randomInstance = r.nextInt(numberValidInstances); int index = 0; for (int i = 0; i < pool.numInstances(); i++) { Instance inst = pool.instance(i); if (inst.isMissing(desiredAttr)) { if (index == randomInstance) return i; else index++; } } return -1; }
/** * Compare two datasets to see if they differ. * * @param data1 one set of instances * @param data2 the other set of instances * @throws Exception if the datasets differ */ protected void compareDatasets(Instances data1, Instances data2) throws Exception { if (data1.numAttributes() != data2.numAttributes()) throw new Exception("number of attributes has changed"); if (!(data2.numInstances() == data1.numInstances())) throw new Exception("number of instances has changed"); for (int i = 0; i < data2.numInstances(); i++) { Instance orig = data1.instance(i); Instance copy = data2.instance(i); for (int j = 0; j < orig.numAttributes(); j++) { if (orig.isMissing(j)) { if (!copy.isMissing(j)) throw new Exception("instances have changed"); } else if (!orig.toString(j).equals(copy.toString(j))) { throw new Exception("instances have changed"); } if (orig.weight() != copy.weight()) throw new Exception("instance weights have changed"); } } }
/** * Checks if an instance contains an item set. * * @param instance the instance to be tested * @return true if the given instance contains this item set */ public boolean containedByTreatZeroAsMissing(Instance instance) { if (instance instanceof weka.core.SparseInstance) { int numInstVals = instance.numValues(); int numItemSetVals = m_items.length; for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals; ) { int instIndex = Integer.MAX_VALUE; if (p1 < numInstVals) { instIndex = instance.index(p1); } int itemIndex = p2; if (m_items[itemIndex] > -1) { if (itemIndex != instIndex) { return false; } else { if (instance.isMissingSparse(p1)) { return false; } if (m_items[itemIndex] != (int) instance.valueSparse(p1)) { return false; } } p1++; p2++; } else { if (itemIndex < instIndex) { p2++; } else if (itemIndex == instIndex) { p2++; p1++; } } } } else { for (int i = 0; i < instance.numAttributes(); i++) { if (m_items[i] > -1) { if (instance.isMissing(i) || (int) instance.value(i) == 0) { return false; } if (m_items[i] != (int) instance.value(i)) { return false; } } } } return true; }
/** * Returns weights if instance is assigned to more than one subset. Returns null if instance is * only assigned to one subset. */ public final double[] weights(Instance instance) { double[] weights; int i; if (instance.isMissing(m_attIndex)) { weights = new double[m_numSubsets]; for (i = 0; i < m_numSubsets; i++) weights[i] = m_distribution.perBag(i) / m_distribution.total(); return weights; } else { return null; } }
/** * Gets the subset of instances that apply to a particluar branch of the split. If the branch * index is -1, the subset will consist of those instances that don't apply to any branch. * * @param branch the index of the branch * @param sourceInstances the instances from which to find the subset * @return the set of instances that apply */ public ReferenceInstances instancesDownBranch(int branch, Instances instances) { ReferenceInstances filteredInstances = new ReferenceInstances(instances, 1); if (branch == -1) { for (Enumeration e = instances.enumerateInstances(); e.hasMoreElements(); ) { Instance inst = (Instance) e.nextElement(); if (inst.isMissing(attIndex)) filteredInstances.addReference(inst); } } else if (branch == 0) { for (Enumeration e = instances.enumerateInstances(); e.hasMoreElements(); ) { Instance inst = (Instance) e.nextElement(); if (!inst.isMissing(attIndex) && inst.value(attIndex) < splitPoint) filteredInstances.addReference(inst); } } else { for (Enumeration e = instances.enumerateInstances(); e.hasMoreElements(); ) { Instance inst = (Instance) e.nextElement(); if (!inst.isMissing(attIndex) && inst.value(attIndex) >= splitPoint) filteredInstances.addReference(inst); } } return filteredInstances; }
/** * Checks if an instance contains an item set. * * @param instance the instance to be tested * @return true if the given instance contains this item set */ public boolean containedBy(Instance instance) { for (int i = 0; i < instance.numAttributes(); i++) { if (m_items[i] > -1) { if (instance.isMissing(i)) { return false; } if (m_items[i] != (int) instance.value(i)) { return false; } } } return true; }
int SelectRow_ErrorMargin(Instances pool, Classifier myEstimator, int desiredAttr) { // for each instance with unbought desiredAttr and label = desiredLabel // measure Prob(i,L(i)) the class probability of the true label, choose the one minimizing it. // i.e. the most erroneous instance int numInstances = pool.numInstances(); double[] classProb = new double[numInstances]; boolean[] isValidInstance = new boolean[numInstances]; double[] probs = null; Instance inst; for (int i = 0; i < numInstances; i++) { inst = pool.instance(i); if (inst.isMissing(desiredAttr)) { try { probs = myEstimator.distributionForInstance(inst); classProb[i] = probs[(int) inst.classValue()]; isValidInstance[i] = true; } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { classProb[i] = Double.POSITIVE_INFINITY; isValidInstance[i] = false; } } double leastCorrect = classProb[Utils.minIndex(classProb)]; int numLeastCorrect = 0; for (int i = 0; i < numInstances; i++) { if (isValidInstance[i] && classProb[i] == leastCorrect) numLeastCorrect++; } int randomInstance = r.nextInt(numLeastCorrect); int index = 0; for (int i = 0; i < numInstances; i++) { if (isValidInstance[i] && classProb[i] == leastCorrect) { if (index == randomInstance) return i; else index++; } } return -1; }
/** * Convert a single instance over if the class is nominal. The converted instance is added to the * end of the output queue. * * @param instance the instance to convert */ private void convertInstanceNominal(Instance instance) { if (!m_needToTransform) { push(instance); return; } double[] vals = new double[outputFormatPeek().numAttributes()]; int attSoFar = 0; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if ((!att.isNominal()) || (j == getInputFormat().classIndex())) { vals[attSoFar] = instance.value(j); attSoFar++; } else { if ((att.numValues() <= 2) && (!m_TransformAll)) { vals[attSoFar] = instance.value(j); attSoFar++; } else { if (instance.isMissing(j)) { for (int k = 0; k < att.numValues(); k++) { vals[attSoFar + k] = instance.value(j); } } else { for (int k = 0; k < att.numValues(); k++) { if (k == (int) instance.value(j)) { vals[attSoFar + k] = 1; } else { vals[attSoFar + k] = 0; } } } attSoFar += att.numValues(); } } } Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); inst.setDataset(getOutputFormat()); push(inst); }
/** * Computes class distribution of an instance using the FastRandomTree. * * <p>In Weka's RandomTree, the distributions were normalized so that all probabilities sum to 1; * this would abolish the effect of instance weights on voting. In FastRandomForest 0.97 onwards, * the distributions are normalized by dividing with the number of instances going into a leaf. * * <p> * * @param instance the instance to compute the distribution for * @return the computed class distribution * @throws Exception if computation fails */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] returnedDist = null; if (m_Attribute > -1) { // ============================ node is not a leaf if (instance.isMissing(m_Attribute)) { // ---------------- missing value returnedDist = new double[m_MotherForest.getM_Info().numClasses()]; // split instance up for (int i = 0; i < m_Successors.length; i++) { double[] help = m_Successors[i].distributionForInstance(instance); if (help != null) { for (int j = 0; j < help.length; j++) { returnedDist[j] += m_Prop[i] * help[j]; } } } } else if (m_MotherForest.getM_Info().attribute(m_Attribute).isNominal()) { // ------ nominal // returnedDist = m_Successors[(int) instance.value(m_Attribute)] // .distributionForInstance(instance); // 0.99: new - binary splits (also) for nominal attributes if (instance.value(m_Attribute) == m_SplitPoint) { returnedDist = m_Successors[0].distributionForInstance(instance); } else { returnedDist = m_Successors[1].distributionForInstance(instance); } } else { // ------------------------------------------ numeric attributes if (instance.value(m_Attribute) < m_SplitPoint) { returnedDist = m_Successors[0].distributionForInstance(instance); } else { returnedDist = m_Successors[1].distributionForInstance(instance); } } return returnedDist; } else { // =============================================== node is a leaf return m_ClassProbs; } }