Esempio n. 1
0
  public void findAndSetSupportBoundForKnownAntecedents(
      Instances thisClassifiersExtension, boolean allWeightsAreOne) {
    if (m_Antds == null) return;

    double maxPurity = Double.NEGATIVE_INFINITY;
    boolean[] finishedAntecedents = new boolean[m_Antds.size()];
    int numFinishedAntecedents = 0;

    while (numFinishedAntecedents < m_Antds.size()) {
      double maxPurityOfAllAntecedents = Double.NEGATIVE_INFINITY;
      int bestAntecedentsIndex = -1;
      double bestSupportBoundForAllAntecedents = Double.NaN;

      Instances ext = new Instances(thisClassifiersExtension, 0);
      for (int j = 0; j < m_Antds.size(); j++) {
        if (finishedAntecedents[j]) continue;

        ext = new Instances(thisClassifiersExtension);
        /*
         * Remove instances which are not relevant, because they are not covered
         * by the _other_ antecedents.
         */
        for (int k = 0; k < m_Antds.size(); k++) {
          if (k == j) continue;
          Antd exclusionAntd = ((Antd) m_Antds.elementAt(k));
          for (int y = 0; y < ext.numInstances(); y++) {
            if (exclusionAntd.covers(ext.instance(y)) == 0) {
              ext.delete(y--);
            }
          }
        }

        if (ext.attribute(((Antd) m_Antds.elementAt(j)).att.index()).isNumeric()
            && ext.numInstances() > 0) {
          NumericAntd currentAntd = (NumericAntd) ((NumericAntd) m_Antds.elementAt(j)).copy();
          currentAntd.fuzzyYet = true;
          ext.deleteWithMissing(currentAntd.att.index());

          double sumOfWeights = ext.sumOfWeights();
          if (!Utils.gr(sumOfWeights, 0.0)) return;

          ext.sort(currentAntd.att.index());

          double maxPurityForThisAntecedent = 0;
          double bestFoundSupportBound = Double.NaN;

          double lastAccu = 0;
          double lastCover = 0;
          // Test all possible edge points
          if (currentAntd.value == 0) {
            for (int k = 1; k < ext.numInstances(); k++) {
              // break the loop if there is no gain (only works when all instances have weight 1)
              if ((lastAccu + (ext.numInstances() - k - 1))
                          / (lastCover + (ext.numInstances() - k - 1))
                      < maxPurityForThisAntecedent
                  && allWeightsAreOne) {
                break;
              }

              // Bag 1
              if (currentAntd.splitPoint < ext.instance(k).value(currentAntd.att.index())
                  && ext.instance(k).value(currentAntd.att.index())
                      != ext.instance(k - 1).value(currentAntd.att.index())) {
                currentAntd.supportBound = ext.instance(k).value(currentAntd.att.index());

                double[] accuArray = new double[ext.numInstances()];
                double[] coverArray = new double[ext.numInstances()];
                for (int i = 0; i < ext.numInstances(); i++) {
                  coverArray[i] = ext.instance(i).weight();
                  double coverValue = currentAntd.covers(ext.instance(i));
                  if (coverArray[i] >= coverValue * ext.instance(i).weight()) {
                    coverArray[i] = coverValue * ext.instance(i).weight();
                    if (ext.instance(i).classValue() == m_Consequent) {
                      accuArray[i] = coverValue * ext.instance(i).weight();
                    }
                  }
                }

                double purity = (Utils.sum(accuArray)) / (Utils.sum(coverArray));
                if (purity >= maxPurityForThisAntecedent) {
                  maxPurityForThisAntecedent = purity;
                  bestFoundSupportBound = currentAntd.supportBound;
                }
                lastAccu = Utils.sum(accuArray);
                lastCover = Utils.sum(coverArray);
              }
            }
          } else {
            for (int k = ext.numInstances() - 2; k >= 0; k--) {
              // break the loop if there is no gain (only works when all instances have weight 1)
              if ((lastAccu + (k)) / (lastCover + (k)) < maxPurityForThisAntecedent
                  && allWeightsAreOne) {
                break;
              }
              // Bag 2
              if (currentAntd.splitPoint > ext.instance(k).value(currentAntd.att.index())
                  && ext.instance(k).value(currentAntd.att.index())
                      != ext.instance(k + 1).value(currentAntd.att.index())) {
                currentAntd.supportBound = ext.instance(k).value(currentAntd.att.index());

                double[] accuArray = new double[ext.numInstances()];
                double[] coverArray = new double[ext.numInstances()];
                for (int i = 0; i < ext.numInstances(); i++) {
                  coverArray[i] = ext.instance(i).weight();
                  double coverValue = currentAntd.covers(ext.instance(i));
                  if (coverArray[i] >= coverValue * ext.instance(i).weight()) {
                    coverArray[i] = coverValue * ext.instance(i).weight();
                    if (ext.instance(i).classValue() == m_Consequent) {
                      accuArray[i] = coverValue * ext.instance(i).weight();
                    }
                  }
                }

                double purity = (Utils.sum(accuArray)) / (Utils.sum(coverArray));
                if (purity >= maxPurityForThisAntecedent) {
                  maxPurityForThisAntecedent = purity;
                  bestFoundSupportBound = currentAntd.supportBound;
                }
                lastAccu = Utils.sum(accuArray);
                lastCover = Utils.sum(coverArray);
              }
            }
          }

          if (maxPurityForThisAntecedent > maxPurityOfAllAntecedents) {
            bestAntecedentsIndex = j;
            bestSupportBoundForAllAntecedents = bestFoundSupportBound;
            maxPurityOfAllAntecedents = maxPurityForThisAntecedent;
          }
        } else {
          // Nominal Antd
          finishedAntecedents[j] = true;
          numFinishedAntecedents++;
          continue;
        }
      }

      if (bestAntecedentsIndex == -1) {
        return;
      }

      if (maxPurity <= maxPurityOfAllAntecedents) {
        if (Double.isNaN(bestSupportBoundForAllAntecedents)) {
          ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).supportBound =
              ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).splitPoint;
        } else {
          ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).supportBound =
              bestSupportBoundForAllAntecedents;
          ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).fuzzyYet = true;
        }

        maxPurity = maxPurityOfAllAntecedents;
      }
      finishedAntecedents[bestAntecedentsIndex] = true;
      numFinishedAntecedents++;
    }
  }
Esempio n. 2
0
  /**
   * This function fits the rule to the data which it overlaps. This way the rule can only
   * interpolate but not extrapolate.
   *
   * @param instances The data to which the rule shall be fitted
   */
  public void fitAndSetCoreBound(Instances instances) {
    if (m_Antds == null) return;
    boolean[] antExistingForDimension = new boolean[instances.numAttributes() - 1];
    for (int i = 0; i < m_Antds.size(); i++) {
      antExistingForDimension[((Antd) m_Antds.elementAt(i)).att.index()] = true;
    }

    FastVector newAntds = new FastVector(10);
    //    for (int i=0; i < instances.numAttributes()-1; i++){
    for (int iterator = 0; iterator < m_Antds.size(); iterator++) {
      int i = ((Antd) m_Antds.elementAt(iterator)).getAttr().index();

      if (!antExistingForDimension[i]) continue; // Excluding non existant antecedents
      Instances instancesWithoutMissingValues = new Instances(instances);
      instancesWithoutMissingValues.deleteWithMissing(i);

      if (instancesWithoutMissingValues.attribute(i).isNumeric()
          && instancesWithoutMissingValues.numInstances() > 0) {
        boolean bag0AntdExists = false;
        boolean bag1AntdExists = false;
        for (int j = 0; j < m_Antds.size(); j++) {
          if (((Antd) m_Antds.elementAt(j)).att.index() == i) {
            if (((Antd) m_Antds.elementAt(j)).value == 0) {
              bag0AntdExists = true;
            } else {
              bag1AntdExists = true;
            }
            newAntds.addElement((Antd) m_Antds.elementAt(j));
          }
        }

        double higherCore = Double.NaN;
        double lowerCore = Double.NaN;

        if (!bag0AntdExists) {
          if (Double.isNaN(higherCore))
            higherCore =
                instancesWithoutMissingValues.kthSmallestValue(
                    i, instancesWithoutMissingValues.numInstances());
          NumericAntd antd;
          antd = new NumericAntd(instancesWithoutMissingValues.attribute(i));
          antd.value = 0;
          antd.splitPoint = higherCore;
          newAntds.addElement(antd);
        }

        if (!bag1AntdExists) {
          if (Double.isNaN(lowerCore))
            lowerCore = instancesWithoutMissingValues.kthSmallestValue(i, 1);
          NumericAntd antd;
          antd = new NumericAntd(instancesWithoutMissingValues.attribute(i));
          antd.value = 1;
          antd.splitPoint = lowerCore;
          newAntds.addElement(antd);
        }
      } else {
        for (int j = 0; j < m_Antds.size(); j++) {
          if (((Antd) m_Antds.elementAt(j)).att.index() == i) {
            newAntds.addElement(m_Antds.elementAt(j));
          }
        }
      }
    }
    m_Antds = newAntds;
  }