/** * The degree of coverage instance covered by this rule * * @param datum the instance in question * @return the degree to which the instance is covered by this rule */ public double coverageDegree(Instance datum) { double isCover = 1; for (int i = 0; i < m_Antds.size(); i++) { Antd antd = (Antd) m_Antds.elementAt(i); isCover *= antd.covers(datum); } return isCover; }
/** * Prints this rule * * @param classAttr the class attribute in the data * @return a textual description of this rule */ public String toString(AttributeWeka classAttr) { StringBuffer text = new StringBuffer(); if (m_Antds.size() > 0) { for (int j = 0; j < (m_Antds.size() - 1); j++) text.append("(" + ((Antd) (m_Antds.elementAt(j))).toString() + ") and "); text.append("(" + ((Antd) (m_Antds.lastElement())).toString() + ")"); } text.append(" => " + classAttr.name() + "=" + classAttr.value((int) m_Consequent)); return text.toString(); }
protected void swap(int a, int b) { if (a < 0 || a >= size() || b < 0 || b >= size()) return; if (fv != null) { Object aob = fv.elementAt(a); Object bob = fv.elementAt(b); fv.setElementAt(bob, a); fv.setElementAt(aob, b); } else { Object aob = v.elementAt(a); Object bob = v.elementAt(b); v.setElementAt(bob, a); v.setElementAt(aob, b); } }
public double getConfidence() { if (!hasAntds()) return Double.NaN; return ((Antd) m_Antds.lastElement()).m_confidence; }
public void findAndSetSupportBoundForKnownAntecedents( Instances thisClassifiersExtension, boolean allWeightsAreOne) { if (m_Antds == null) return; double maxPurity = Double.NEGATIVE_INFINITY; boolean[] finishedAntecedents = new boolean[m_Antds.size()]; int numFinishedAntecedents = 0; while (numFinishedAntecedents < m_Antds.size()) { double maxPurityOfAllAntecedents = Double.NEGATIVE_INFINITY; int bestAntecedentsIndex = -1; double bestSupportBoundForAllAntecedents = Double.NaN; Instances ext = new Instances(thisClassifiersExtension, 0); for (int j = 0; j < m_Antds.size(); j++) { if (finishedAntecedents[j]) continue; ext = new Instances(thisClassifiersExtension); /* * Remove instances which are not relevant, because they are not covered * by the _other_ antecedents. */ for (int k = 0; k < m_Antds.size(); k++) { if (k == j) continue; Antd exclusionAntd = ((Antd) m_Antds.elementAt(k)); for (int y = 0; y < ext.numInstances(); y++) { if (exclusionAntd.covers(ext.instance(y)) == 0) { ext.delete(y--); } } } if (ext.attribute(((Antd) m_Antds.elementAt(j)).att.index()).isNumeric() && ext.numInstances() > 0) { NumericAntd currentAntd = (NumericAntd) ((NumericAntd) m_Antds.elementAt(j)).copy(); currentAntd.fuzzyYet = true; ext.deleteWithMissing(currentAntd.att.index()); double sumOfWeights = ext.sumOfWeights(); if (!Utils.gr(sumOfWeights, 0.0)) return; ext.sort(currentAntd.att.index()); double maxPurityForThisAntecedent = 0; double bestFoundSupportBound = Double.NaN; double lastAccu = 0; double lastCover = 0; // Test all possible edge points if (currentAntd.value == 0) { for (int k = 1; k < ext.numInstances(); k++) { // break the loop if there is no gain (only works when all instances have weight 1) if ((lastAccu + (ext.numInstances() - k - 1)) / (lastCover + (ext.numInstances() - k - 1)) < maxPurityForThisAntecedent && allWeightsAreOne) { break; } // Bag 1 if (currentAntd.splitPoint < ext.instance(k).value(currentAntd.att.index()) && ext.instance(k).value(currentAntd.att.index()) != ext.instance(k - 1).value(currentAntd.att.index())) { currentAntd.supportBound = ext.instance(k).value(currentAntd.att.index()); double[] accuArray = new double[ext.numInstances()]; double[] coverArray = new double[ext.numInstances()]; for (int i = 0; i < ext.numInstances(); i++) { coverArray[i] = ext.instance(i).weight(); double coverValue = currentAntd.covers(ext.instance(i)); if (coverArray[i] >= coverValue * ext.instance(i).weight()) { coverArray[i] = coverValue * ext.instance(i).weight(); if (ext.instance(i).classValue() == m_Consequent) { accuArray[i] = coverValue * ext.instance(i).weight(); } } } double purity = (Utils.sum(accuArray)) / (Utils.sum(coverArray)); if (purity >= maxPurityForThisAntecedent) { maxPurityForThisAntecedent = purity; bestFoundSupportBound = currentAntd.supportBound; } lastAccu = Utils.sum(accuArray); lastCover = Utils.sum(coverArray); } } } else { for (int k = ext.numInstances() - 2; k >= 0; k--) { // break the loop if there is no gain (only works when all instances have weight 1) if ((lastAccu + (k)) / (lastCover + (k)) < maxPurityForThisAntecedent && allWeightsAreOne) { break; } // Bag 2 if (currentAntd.splitPoint > ext.instance(k).value(currentAntd.att.index()) && ext.instance(k).value(currentAntd.att.index()) != ext.instance(k + 1).value(currentAntd.att.index())) { currentAntd.supportBound = ext.instance(k).value(currentAntd.att.index()); double[] accuArray = new double[ext.numInstances()]; double[] coverArray = new double[ext.numInstances()]; for (int i = 0; i < ext.numInstances(); i++) { coverArray[i] = ext.instance(i).weight(); double coverValue = currentAntd.covers(ext.instance(i)); if (coverArray[i] >= coverValue * ext.instance(i).weight()) { coverArray[i] = coverValue * ext.instance(i).weight(); if (ext.instance(i).classValue() == m_Consequent) { accuArray[i] = coverValue * ext.instance(i).weight(); } } } double purity = (Utils.sum(accuArray)) / (Utils.sum(coverArray)); if (purity >= maxPurityForThisAntecedent) { maxPurityForThisAntecedent = purity; bestFoundSupportBound = currentAntd.supportBound; } lastAccu = Utils.sum(accuArray); lastCover = Utils.sum(coverArray); } } } if (maxPurityForThisAntecedent > maxPurityOfAllAntecedents) { bestAntecedentsIndex = j; bestSupportBoundForAllAntecedents = bestFoundSupportBound; maxPurityOfAllAntecedents = maxPurityForThisAntecedent; } } else { // Nominal Antd finishedAntecedents[j] = true; numFinishedAntecedents++; continue; } } if (bestAntecedentsIndex == -1) { return; } if (maxPurity <= maxPurityOfAllAntecedents) { if (Double.isNaN(bestSupportBoundForAllAntecedents)) { ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).supportBound = ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).splitPoint; } else { ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).supportBound = bestSupportBoundForAllAntecedents; ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).fuzzyYet = true; } maxPurity = maxPurityOfAllAntecedents; } finishedAntecedents[bestAntecedentsIndex] = true; numFinishedAntecedents++; } }
/** * This function fits the rule to the data which it overlaps. This way the rule can only * interpolate but not extrapolate. * * @param instances The data to which the rule shall be fitted */ public void fitAndSetCoreBound(Instances instances) { if (m_Antds == null) return; boolean[] antExistingForDimension = new boolean[instances.numAttributes() - 1]; for (int i = 0; i < m_Antds.size(); i++) { antExistingForDimension[((Antd) m_Antds.elementAt(i)).att.index()] = true; } FastVector newAntds = new FastVector(10); // for (int i=0; i < instances.numAttributes()-1; i++){ for (int iterator = 0; iterator < m_Antds.size(); iterator++) { int i = ((Antd) m_Antds.elementAt(iterator)).getAttr().index(); if (!antExistingForDimension[i]) continue; // Excluding non existant antecedents Instances instancesWithoutMissingValues = new Instances(instances); instancesWithoutMissingValues.deleteWithMissing(i); if (instancesWithoutMissingValues.attribute(i).isNumeric() && instancesWithoutMissingValues.numInstances() > 0) { boolean bag0AntdExists = false; boolean bag1AntdExists = false; for (int j = 0; j < m_Antds.size(); j++) { if (((Antd) m_Antds.elementAt(j)).att.index() == i) { if (((Antd) m_Antds.elementAt(j)).value == 0) { bag0AntdExists = true; } else { bag1AntdExists = true; } newAntds.addElement((Antd) m_Antds.elementAt(j)); } } double higherCore = Double.NaN; double lowerCore = Double.NaN; if (!bag0AntdExists) { if (Double.isNaN(higherCore)) higherCore = instancesWithoutMissingValues.kthSmallestValue( i, instancesWithoutMissingValues.numInstances()); NumericAntd antd; antd = new NumericAntd(instancesWithoutMissingValues.attribute(i)); antd.value = 0; antd.splitPoint = higherCore; newAntds.addElement(antd); } if (!bag1AntdExists) { if (Double.isNaN(lowerCore)) lowerCore = instancesWithoutMissingValues.kthSmallestValue(i, 1); NumericAntd antd; antd = new NumericAntd(instancesWithoutMissingValues.attribute(i)); antd.value = 1; antd.splitPoint = lowerCore; newAntds.addElement(antd); } } else { for (int j = 0; j < m_Antds.size(); j++) { if (((Antd) m_Antds.elementAt(j)).att.index() == i) { newAntds.addElement(m_Antds.elementAt(j)); } } } } m_Antds = newAntds; }
/** * Prune all the possible final sequences of the rule using the pruning data. The measure used to * prune the rule is based on flag given. * * @param pruneData the pruning data used to prune the rule * @param useWhole flag to indicate whether use the error rate of the whole pruning data instead * of the data covered */ public void prune(Instances pruneData, boolean useWhole) { Instances data = pruneData; double total = data.sumOfWeights(); if (!Utils.gr(total, 0.0)) return; /* The default accurate # and rate on pruning data */ double defAccu = computeDefAccu(data); if (m_Debug) System.err.println( "Pruning with " + defAccu + " positive data out of " + total + " instances"); int size = m_Antds.size(); if (size == 0) return; // Default rule before pruning double[] worthRt = new double[size]; double[] coverage = new double[size]; double[] worthValue = new double[size]; for (int w = 0; w < size; w++) { worthRt[w] = coverage[w] = worthValue[w] = 0.0; } /* Calculate accuracy parameters for all the antecedents in this rule */ double tn = 0.0; // True negative if useWhole for (int x = 0; x < size; x++) { Antd antd = (Antd) m_Antds.elementAt(x); Instances newData = data; data = new Instances(newData, 0); // Make data empty for (int y = 0; y < newData.numInstances(); y++) { Instance ins = newData.instance(y); if (antd.covers(ins) > 0) { // Covered by this antecedent coverage[x] += ins.weight(); data.add(ins); // Add to data for further pruning if ((int) ins.classValue() == (int) m_Consequent) // Accurate prediction worthValue[x] += ins.weight(); } else if (useWhole) { // Not covered if ((int) ins.classValue() != (int) m_Consequent) tn += ins.weight(); } } if (useWhole) { worthValue[x] += tn; worthRt[x] = worthValue[x] / total; } else // Note if coverage is 0, accuracy is 0.5 worthRt[x] = (worthValue[x] + 1.0) / (coverage[x] + 2.0); } double maxValue = (defAccu + 1.0) / (total + 2.0); int maxIndex = -1; for (int i = 0; i < worthValue.length; i++) { if (m_Debug) { double denom = useWhole ? total : coverage[i]; System.err.println( i + "(useAccuray? " + !useWhole + "): " + worthRt[i] + "=" + worthValue[i] + "/" + denom); } if (worthRt[i] > maxValue) { // Prefer to the maxValue = worthRt[i]; // shorter rule maxIndex = i; } } if (maxIndex == -1) return; /* Prune the antecedents according to the accuracy parameters */ for (int z = size - 1; z > maxIndex; z--) m_Antds.removeElementAt(z); }
/** * Build one rule using the growing data * * @param data the growing data used to build the rule * @throws Exception if the consequent is not set yet */ public void grow(Instances data) throws Exception { if (m_Consequent == -1) throw new Exception(" Consequent not set yet."); Instances growData = data; double sumOfWeights = growData.sumOfWeights(); if (!Utils.gr(sumOfWeights, 0.0)) return; /* Compute the default accurate rate of the growing data */ double defAccu = computeDefAccu(growData); double defAcRt = (defAccu + 1.0) / (sumOfWeights + 1.0); /* Keep the record of which attributes have already been used*/ boolean[] used = new boolean[growData.numAttributes()]; for (int k = 0; k < used.length; k++) used[k] = false; int numUnused = used.length; // If there are already antecedents existing for (int j = 0; j < m_Antds.size(); j++) { Antd antdj = (Antd) m_Antds.elementAt(j); if (!antdj.getAttr().isNumeric()) { used[antdj.getAttr().index()] = true; numUnused--; } } double maxInfoGain; while (Utils.gr(growData.numInstances(), 0.0) && (numUnused > 0) && Utils.sm(defAcRt, 1.0)) { // We require that infoGain be positive /*if(numAntds == originalSize) maxInfoGain = 0.0; // At least one condition allowed else maxInfoGain = Utils.eq(defAcRt, 1.0) ? defAccu/(double)numAntds : 0.0; */ maxInfoGain = 0.0; /* Build a list of antecedents */ Antd oneAntd = null; Instances coverData = null; Enumeration enumAttr = growData.enumerateAttributes(); /* Build one condition based on all attributes not used yet*/ while (enumAttr.hasMoreElements()) { AttributeWeka att = (AttributeWeka) (enumAttr.nextElement()); if (m_Debug) System.err.println("\nOne condition: size = " + growData.sumOfWeights()); Antd antd = null; if (att.isNumeric()) antd = new NumericAntd(att); else antd = new NominalAntd(att); if (!used[att.index()]) { /* Compute the best information gain for each attribute, it's stored in the antecedent formed by this attribute. This procedure returns the data covered by the antecedent*/ Instances coveredData = computeInfoGain(growData, defAcRt, antd); if (coveredData != null) { double infoGain = antd.getMaxInfoGain(); if (m_Debug) System.err.println( "Test of \'" + antd.toString() + "\': infoGain = " + infoGain + " | Accuracy = " + antd.getAccuRate() + "=" + antd.getAccu() + "/" + antd.getCover() + " def. accuracy: " + defAcRt); if (infoGain > maxInfoGain) { oneAntd = antd; coverData = coveredData; maxInfoGain = infoGain; } } } } if (oneAntd == null) break; // Cannot find antds if (Utils.sm(oneAntd.getAccu(), m_MinNo)) break; // Too low coverage // Numeric attributes can be used more than once if (!oneAntd.getAttr().isNumeric()) { used[oneAntd.getAttr().index()] = true; numUnused--; } m_Antds.addElement(oneAntd); growData = coverData; // Grow data size is shrinking defAcRt = oneAntd.getAccuRate(); } }
/** * the number of antecedents of the rule * * @return the size of this rule */ public double size() { return (double) m_Antds.size(); }
/** * Whether this rule has antecedents, i.e. whether it is a default rule * * @return the boolean value indicating whether the rule has antecedents */ public boolean hasAntds() { if (m_Antds == null) return false; else return (m_Antds.size() > 0); }
protected final int size() { return (fv != null) ? fv.size() : v.size(); }
/** Main method for testing this class. */ public static void main(String[] options) { try { // Create numeric attributes "length" and "weight" Attribute length = new Attribute("length"); Attribute weight = new Attribute("weight"); // Create vector to hold nominal values "first", "second", "third" FastVector my_nominal_values = new FastVector(3); my_nominal_values.addElement("first"); my_nominal_values.addElement("second"); // Create nominal attribute "position" Attribute position = new Attribute("position", my_nominal_values); // Create vector of the above attributes FastVector attributes = new FastVector(3); attributes.addElement(length); attributes.addElement(weight); attributes.addElement(position); // Create the empty dataset "race" with above attributes Instances race = new Instances("race", attributes, 0); // Make position the class attribute race.setClassIndex(position.index()); // Create empty instance with three attribute values BinarySparseInstance inst = new BinarySparseInstance(3); // Set instance's values for the attributes "length", "weight", and "position" inst.setValue(length, (float) 5.3); inst.setValue(weight, 300); inst.setValue(position, "first"); // Set instance's dataset to be the dataset "race" inst.setDataset(race); // Print the instance System.out.println("The instance: " + inst); // Print the first attribute System.out.println("First attribute: " + inst.attribute(0)); // Print the class attribute System.out.println("Class attribute: " + inst.classAttribute()); // Print the class index System.out.println("Class index: " + inst.classIndex()); // Say if class is missing System.out.println("Class is missing: " + inst.classIsMissing()); // Print the instance's class value in internal format System.out.println("Class value (internal format): " + inst.classValue()); // Print a shallow copy of this instance SparseInstance copy = (SparseInstance) inst.copy(); System.out.println("Shallow copy: " + copy); // Set dataset for shallow copy copy.setDataset(inst.dataset()); System.out.println("Shallow copy with dataset set: " + copy); // Print out all values in internal format System.out.print("All stored values in internal format: "); for (int i = 0; i < inst.numValues(); i++) { if (i > 0) { System.out.print(","); } System.out.print(inst.valueSparse(i)); } System.out.println(); // Set all values to zero System.out.print("All values set to zero: "); while (inst.numValues() > 0) { inst.setValueSparse(0, 0); } for (int i = 0; i < inst.numValues(); i++) { if (i > 0) { System.out.print(","); } System.out.print(inst.valueSparse(i)); } System.out.println(); // Set all values to one System.out.print("All values set to one: "); for (int i = 0; i < inst.numAttributes(); i++) { inst.setValue(i, 1); } for (int i = 0; i < inst.numValues(); i++) { if (i > 0) { System.out.print(","); } System.out.print(inst.valueSparse(i)); } System.out.println(); // Unset dataset for copy, delete first attribute, and insert it again copy.setDataset(null); copy.deleteAttributeAt(0); copy.insertAttributeAt(0); copy.setDataset(inst.dataset()); System.out.println("Copy with first attribute deleted and inserted: " + copy); // Same for second attribute copy.setDataset(null); copy.deleteAttributeAt(1); copy.insertAttributeAt(1); copy.setDataset(inst.dataset()); System.out.println("Copy with second attribute deleted and inserted: " + copy); // Same for last attribute copy.setDataset(null); copy.deleteAttributeAt(2); copy.insertAttributeAt(2); copy.setDataset(inst.dataset()); System.out.println("Copy with third attribute deleted and inserted: " + copy); // Enumerate attributes (leaving out the class attribute) System.out.println("Enumerating attributes (leaving out class):"); Enumeration enu = inst.enumerateAttributes(); while (enu.hasMoreElements()) { Attribute att = (Attribute) enu.nextElement(); System.out.println(att); } // Headers are equivalent? System.out.println("Header of original and copy equivalent: " + inst.equalHeaders(copy)); // Test for missing values System.out.println("Length of copy missing: " + copy.isMissing(length)); System.out.println("Weight of copy missing: " + copy.isMissing(weight.index())); System.out.println("Length of copy missing: " + Instance.isMissingValue(copy.value(length))); System.out.println("Missing value coded as: " + Instance.missingValue()); // Prints number of attributes and classes System.out.println("Number of attributes: " + copy.numAttributes()); System.out.println("Number of classes: " + copy.numClasses()); // Replace missing values float[] meansAndModes = {2, 3, 0}; copy.replaceMissingValues(meansAndModes); System.out.println("Copy with missing value replaced: " + copy); // Setting and getting values and weights copy.setClassMissing(); System.out.println("Copy with missing class: " + copy); copy.setClassValue(0); System.out.println("Copy with class value set to first value: " + copy); copy.setClassValue("second"); System.out.println("Copy with class value set to \"second\": " + copy); copy.setMissing(1); System.out.println("Copy with second attribute set to be missing: " + copy); copy.setMissing(length); System.out.println("Copy with length set to be missing: " + copy); copy.setValue(0, 0); System.out.println("Copy with first attribute set to 0: " + copy); copy.setValue(weight, 1); System.out.println("Copy with weight attribute set to 1: " + copy); copy.setValue(position, "second"); System.out.println("Copy with position set to \"second\": " + copy); copy.setValue(2, "first"); System.out.println("Copy with last attribute set to \"first\": " + copy); System.out.println("Current weight of instance copy: " + copy.weight()); copy.setWeight(2); System.out.println("Current weight of instance copy (set to 2): " + copy.weight()); System.out.println("Last value of copy: " + copy.toString(2)); System.out.println("Value of position for copy: " + copy.toString(position)); System.out.println("Last value of copy (internal format): " + copy.value(2)); System.out.println("Value of position for copy (internal format): " + copy.value(position)); } catch (Exception e) { e.printStackTrace(); } }