public void findAndSetSupportBoundForKnownAntecedents( Instances thisClassifiersExtension, boolean allWeightsAreOne) { if (m_Antds == null) return; double maxPurity = Double.NEGATIVE_INFINITY; boolean[] finishedAntecedents = new boolean[m_Antds.size()]; int numFinishedAntecedents = 0; while (numFinishedAntecedents < m_Antds.size()) { double maxPurityOfAllAntecedents = Double.NEGATIVE_INFINITY; int bestAntecedentsIndex = -1; double bestSupportBoundForAllAntecedents = Double.NaN; Instances ext = new Instances(thisClassifiersExtension, 0); for (int j = 0; j < m_Antds.size(); j++) { if (finishedAntecedents[j]) continue; ext = new Instances(thisClassifiersExtension); /* * Remove instances which are not relevant, because they are not covered * by the _other_ antecedents. */ for (int k = 0; k < m_Antds.size(); k++) { if (k == j) continue; Antd exclusionAntd = ((Antd) m_Antds.elementAt(k)); for (int y = 0; y < ext.numInstances(); y++) { if (exclusionAntd.covers(ext.instance(y)) == 0) { ext.delete(y--); } } } if (ext.attribute(((Antd) m_Antds.elementAt(j)).att.index()).isNumeric() && ext.numInstances() > 0) { NumericAntd currentAntd = (NumericAntd) ((NumericAntd) m_Antds.elementAt(j)).copy(); currentAntd.fuzzyYet = true; ext.deleteWithMissing(currentAntd.att.index()); double sumOfWeights = ext.sumOfWeights(); if (!Utils.gr(sumOfWeights, 0.0)) return; ext.sort(currentAntd.att.index()); double maxPurityForThisAntecedent = 0; double bestFoundSupportBound = Double.NaN; double lastAccu = 0; double lastCover = 0; // Test all possible edge points if (currentAntd.value == 0) { for (int k = 1; k < ext.numInstances(); k++) { // break the loop if there is no gain (only works when all instances have weight 1) if ((lastAccu + (ext.numInstances() - k - 1)) / (lastCover + (ext.numInstances() - k - 1)) < maxPurityForThisAntecedent && allWeightsAreOne) { break; } // Bag 1 if (currentAntd.splitPoint < ext.instance(k).value(currentAntd.att.index()) && ext.instance(k).value(currentAntd.att.index()) != ext.instance(k - 1).value(currentAntd.att.index())) { currentAntd.supportBound = ext.instance(k).value(currentAntd.att.index()); double[] accuArray = new double[ext.numInstances()]; double[] coverArray = new double[ext.numInstances()]; for (int i = 0; i < ext.numInstances(); i++) { coverArray[i] = ext.instance(i).weight(); double coverValue = currentAntd.covers(ext.instance(i)); if (coverArray[i] >= coverValue * ext.instance(i).weight()) { coverArray[i] = coverValue * ext.instance(i).weight(); if (ext.instance(i).classValue() == m_Consequent) { accuArray[i] = coverValue * ext.instance(i).weight(); } } } double purity = (Utils.sum(accuArray)) / (Utils.sum(coverArray)); if (purity >= maxPurityForThisAntecedent) { maxPurityForThisAntecedent = purity; bestFoundSupportBound = currentAntd.supportBound; } lastAccu = Utils.sum(accuArray); lastCover = Utils.sum(coverArray); } } } else { for (int k = ext.numInstances() - 2; k >= 0; k--) { // break the loop if there is no gain (only works when all instances have weight 1) if ((lastAccu + (k)) / (lastCover + (k)) < maxPurityForThisAntecedent && allWeightsAreOne) { break; } // Bag 2 if (currentAntd.splitPoint > ext.instance(k).value(currentAntd.att.index()) && ext.instance(k).value(currentAntd.att.index()) != ext.instance(k + 1).value(currentAntd.att.index())) { currentAntd.supportBound = ext.instance(k).value(currentAntd.att.index()); double[] accuArray = new double[ext.numInstances()]; double[] coverArray = new double[ext.numInstances()]; for (int i = 0; i < ext.numInstances(); i++) { coverArray[i] = ext.instance(i).weight(); double coverValue = currentAntd.covers(ext.instance(i)); if (coverArray[i] >= coverValue * ext.instance(i).weight()) { coverArray[i] = coverValue * ext.instance(i).weight(); if (ext.instance(i).classValue() == m_Consequent) { accuArray[i] = coverValue * ext.instance(i).weight(); } } } double purity = (Utils.sum(accuArray)) / (Utils.sum(coverArray)); if (purity >= maxPurityForThisAntecedent) { maxPurityForThisAntecedent = purity; bestFoundSupportBound = currentAntd.supportBound; } lastAccu = Utils.sum(accuArray); lastCover = Utils.sum(coverArray); } } } if (maxPurityForThisAntecedent > maxPurityOfAllAntecedents) { bestAntecedentsIndex = j; bestSupportBoundForAllAntecedents = bestFoundSupportBound; maxPurityOfAllAntecedents = maxPurityForThisAntecedent; } } else { // Nominal Antd finishedAntecedents[j] = true; numFinishedAntecedents++; continue; } } if (bestAntecedentsIndex == -1) { return; } if (maxPurity <= maxPurityOfAllAntecedents) { if (Double.isNaN(bestSupportBoundForAllAntecedents)) { ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).supportBound = ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).splitPoint; } else { ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).supportBound = bestSupportBoundForAllAntecedents; ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).fuzzyYet = true; } maxPurity = maxPurityOfAllAntecedents; } finishedAntecedents[bestAntecedentsIndex] = true; numFinishedAntecedents++; } }
/** * This function fits the rule to the data which it overlaps. This way the rule can only * interpolate but not extrapolate. * * @param instances The data to which the rule shall be fitted */ public void fitAndSetCoreBound(Instances instances) { if (m_Antds == null) return; boolean[] antExistingForDimension = new boolean[instances.numAttributes() - 1]; for (int i = 0; i < m_Antds.size(); i++) { antExistingForDimension[((Antd) m_Antds.elementAt(i)).att.index()] = true; } FastVector newAntds = new FastVector(10); // for (int i=0; i < instances.numAttributes()-1; i++){ for (int iterator = 0; iterator < m_Antds.size(); iterator++) { int i = ((Antd) m_Antds.elementAt(iterator)).getAttr().index(); if (!antExistingForDimension[i]) continue; // Excluding non existant antecedents Instances instancesWithoutMissingValues = new Instances(instances); instancesWithoutMissingValues.deleteWithMissing(i); if (instancesWithoutMissingValues.attribute(i).isNumeric() && instancesWithoutMissingValues.numInstances() > 0) { boolean bag0AntdExists = false; boolean bag1AntdExists = false; for (int j = 0; j < m_Antds.size(); j++) { if (((Antd) m_Antds.elementAt(j)).att.index() == i) { if (((Antd) m_Antds.elementAt(j)).value == 0) { bag0AntdExists = true; } else { bag1AntdExists = true; } newAntds.addElement((Antd) m_Antds.elementAt(j)); } } double higherCore = Double.NaN; double lowerCore = Double.NaN; if (!bag0AntdExists) { if (Double.isNaN(higherCore)) higherCore = instancesWithoutMissingValues.kthSmallestValue( i, instancesWithoutMissingValues.numInstances()); NumericAntd antd; antd = new NumericAntd(instancesWithoutMissingValues.attribute(i)); antd.value = 0; antd.splitPoint = higherCore; newAntds.addElement(antd); } if (!bag1AntdExists) { if (Double.isNaN(lowerCore)) lowerCore = instancesWithoutMissingValues.kthSmallestValue(i, 1); NumericAntd antd; antd = new NumericAntd(instancesWithoutMissingValues.attribute(i)); antd.value = 1; antd.splitPoint = lowerCore; newAntds.addElement(antd); } } else { for (int j = 0; j < m_Antds.size(); j++) { if (((Antd) m_Antds.elementAt(j)).att.index() == i) { newAntds.addElement(m_Antds.elementAt(j)); } } } } m_Antds = newAntds; }