예제 #1
0
  /**
   * The degree of coverage instance covered by this rule
   *
   * @param datum the instance in question
   * @return the degree to which the instance is covered by this rule
   */
  public double coverageDegree(Instance datum) {
    double isCover = 1;

    for (int i = 0; i < m_Antds.size(); i++) {
      Antd antd = (Antd) m_Antds.elementAt(i);
      isCover *= antd.covers(datum);
    }
    return isCover;
  }
예제 #2
0
  /**
   * Prints this rule
   *
   * @param classAttr the class attribute in the data
   * @return a textual description of this rule
   */
  public String toString(AttributeWeka classAttr) {
    StringBuffer text = new StringBuffer();
    if (m_Antds.size() > 0) {
      for (int j = 0; j < (m_Antds.size() - 1); j++)
        text.append("(" + ((Antd) (m_Antds.elementAt(j))).toString() + ") and ");
      text.append("(" + ((Antd) (m_Antds.lastElement())).toString() + ")");
    }
    text.append(" => " + classAttr.name() + "=" + classAttr.value((int) m_Consequent));

    return text.toString();
  }
예제 #3
0
 protected void swap(int a, int b) {
   if (a < 0 || a >= size() || b < 0 || b >= size()) return;
   if (fv != null) {
     Object aob = fv.elementAt(a);
     Object bob = fv.elementAt(b);
     fv.setElementAt(bob, a);
     fv.setElementAt(aob, b);
   } else {
     Object aob = v.elementAt(a);
     Object bob = v.elementAt(b);
     v.setElementAt(bob, a);
     v.setElementAt(aob, b);
   }
 }
예제 #4
0
 public double getConfidence() {
   if (!hasAntds()) return Double.NaN;
   return ((Antd) m_Antds.lastElement()).m_confidence;
 }
예제 #5
0
  public void findAndSetSupportBoundForKnownAntecedents(
      Instances thisClassifiersExtension, boolean allWeightsAreOne) {
    if (m_Antds == null) return;

    double maxPurity = Double.NEGATIVE_INFINITY;
    boolean[] finishedAntecedents = new boolean[m_Antds.size()];
    int numFinishedAntecedents = 0;

    while (numFinishedAntecedents < m_Antds.size()) {
      double maxPurityOfAllAntecedents = Double.NEGATIVE_INFINITY;
      int bestAntecedentsIndex = -1;
      double bestSupportBoundForAllAntecedents = Double.NaN;

      Instances ext = new Instances(thisClassifiersExtension, 0);
      for (int j = 0; j < m_Antds.size(); j++) {
        if (finishedAntecedents[j]) continue;

        ext = new Instances(thisClassifiersExtension);
        /*
         * Remove instances which are not relevant, because they are not covered
         * by the _other_ antecedents.
         */
        for (int k = 0; k < m_Antds.size(); k++) {
          if (k == j) continue;
          Antd exclusionAntd = ((Antd) m_Antds.elementAt(k));
          for (int y = 0; y < ext.numInstances(); y++) {
            if (exclusionAntd.covers(ext.instance(y)) == 0) {
              ext.delete(y--);
            }
          }
        }

        if (ext.attribute(((Antd) m_Antds.elementAt(j)).att.index()).isNumeric()
            && ext.numInstances() > 0) {
          NumericAntd currentAntd = (NumericAntd) ((NumericAntd) m_Antds.elementAt(j)).copy();
          currentAntd.fuzzyYet = true;
          ext.deleteWithMissing(currentAntd.att.index());

          double sumOfWeights = ext.sumOfWeights();
          if (!Utils.gr(sumOfWeights, 0.0)) return;

          ext.sort(currentAntd.att.index());

          double maxPurityForThisAntecedent = 0;
          double bestFoundSupportBound = Double.NaN;

          double lastAccu = 0;
          double lastCover = 0;
          // Test all possible edge points
          if (currentAntd.value == 0) {
            for (int k = 1; k < ext.numInstances(); k++) {
              // break the loop if there is no gain (only works when all instances have weight 1)
              if ((lastAccu + (ext.numInstances() - k - 1))
                          / (lastCover + (ext.numInstances() - k - 1))
                      < maxPurityForThisAntecedent
                  && allWeightsAreOne) {
                break;
              }

              // Bag 1
              if (currentAntd.splitPoint < ext.instance(k).value(currentAntd.att.index())
                  && ext.instance(k).value(currentAntd.att.index())
                      != ext.instance(k - 1).value(currentAntd.att.index())) {
                currentAntd.supportBound = ext.instance(k).value(currentAntd.att.index());

                double[] accuArray = new double[ext.numInstances()];
                double[] coverArray = new double[ext.numInstances()];
                for (int i = 0; i < ext.numInstances(); i++) {
                  coverArray[i] = ext.instance(i).weight();
                  double coverValue = currentAntd.covers(ext.instance(i));
                  if (coverArray[i] >= coverValue * ext.instance(i).weight()) {
                    coverArray[i] = coverValue * ext.instance(i).weight();
                    if (ext.instance(i).classValue() == m_Consequent) {
                      accuArray[i] = coverValue * ext.instance(i).weight();
                    }
                  }
                }

                double purity = (Utils.sum(accuArray)) / (Utils.sum(coverArray));
                if (purity >= maxPurityForThisAntecedent) {
                  maxPurityForThisAntecedent = purity;
                  bestFoundSupportBound = currentAntd.supportBound;
                }
                lastAccu = Utils.sum(accuArray);
                lastCover = Utils.sum(coverArray);
              }
            }
          } else {
            for (int k = ext.numInstances() - 2; k >= 0; k--) {
              // break the loop if there is no gain (only works when all instances have weight 1)
              if ((lastAccu + (k)) / (lastCover + (k)) < maxPurityForThisAntecedent
                  && allWeightsAreOne) {
                break;
              }
              // Bag 2
              if (currentAntd.splitPoint > ext.instance(k).value(currentAntd.att.index())
                  && ext.instance(k).value(currentAntd.att.index())
                      != ext.instance(k + 1).value(currentAntd.att.index())) {
                currentAntd.supportBound = ext.instance(k).value(currentAntd.att.index());

                double[] accuArray = new double[ext.numInstances()];
                double[] coverArray = new double[ext.numInstances()];
                for (int i = 0; i < ext.numInstances(); i++) {
                  coverArray[i] = ext.instance(i).weight();
                  double coverValue = currentAntd.covers(ext.instance(i));
                  if (coverArray[i] >= coverValue * ext.instance(i).weight()) {
                    coverArray[i] = coverValue * ext.instance(i).weight();
                    if (ext.instance(i).classValue() == m_Consequent) {
                      accuArray[i] = coverValue * ext.instance(i).weight();
                    }
                  }
                }

                double purity = (Utils.sum(accuArray)) / (Utils.sum(coverArray));
                if (purity >= maxPurityForThisAntecedent) {
                  maxPurityForThisAntecedent = purity;
                  bestFoundSupportBound = currentAntd.supportBound;
                }
                lastAccu = Utils.sum(accuArray);
                lastCover = Utils.sum(coverArray);
              }
            }
          }

          if (maxPurityForThisAntecedent > maxPurityOfAllAntecedents) {
            bestAntecedentsIndex = j;
            bestSupportBoundForAllAntecedents = bestFoundSupportBound;
            maxPurityOfAllAntecedents = maxPurityForThisAntecedent;
          }
        } else {
          // Nominal Antd
          finishedAntecedents[j] = true;
          numFinishedAntecedents++;
          continue;
        }
      }

      if (bestAntecedentsIndex == -1) {
        return;
      }

      if (maxPurity <= maxPurityOfAllAntecedents) {
        if (Double.isNaN(bestSupportBoundForAllAntecedents)) {
          ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).supportBound =
              ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).splitPoint;
        } else {
          ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).supportBound =
              bestSupportBoundForAllAntecedents;
          ((NumericAntd) m_Antds.elementAt(bestAntecedentsIndex)).fuzzyYet = true;
        }

        maxPurity = maxPurityOfAllAntecedents;
      }
      finishedAntecedents[bestAntecedentsIndex] = true;
      numFinishedAntecedents++;
    }
  }
예제 #6
0
  /**
   * This function fits the rule to the data which it overlaps. This way the rule can only
   * interpolate but not extrapolate.
   *
   * @param instances The data to which the rule shall be fitted
   */
  public void fitAndSetCoreBound(Instances instances) {
    if (m_Antds == null) return;
    boolean[] antExistingForDimension = new boolean[instances.numAttributes() - 1];
    for (int i = 0; i < m_Antds.size(); i++) {
      antExistingForDimension[((Antd) m_Antds.elementAt(i)).att.index()] = true;
    }

    FastVector newAntds = new FastVector(10);
    //    for (int i=0; i < instances.numAttributes()-1; i++){
    for (int iterator = 0; iterator < m_Antds.size(); iterator++) {
      int i = ((Antd) m_Antds.elementAt(iterator)).getAttr().index();

      if (!antExistingForDimension[i]) continue; // Excluding non existant antecedents
      Instances instancesWithoutMissingValues = new Instances(instances);
      instancesWithoutMissingValues.deleteWithMissing(i);

      if (instancesWithoutMissingValues.attribute(i).isNumeric()
          && instancesWithoutMissingValues.numInstances() > 0) {
        boolean bag0AntdExists = false;
        boolean bag1AntdExists = false;
        for (int j = 0; j < m_Antds.size(); j++) {
          if (((Antd) m_Antds.elementAt(j)).att.index() == i) {
            if (((Antd) m_Antds.elementAt(j)).value == 0) {
              bag0AntdExists = true;
            } else {
              bag1AntdExists = true;
            }
            newAntds.addElement((Antd) m_Antds.elementAt(j));
          }
        }

        double higherCore = Double.NaN;
        double lowerCore = Double.NaN;

        if (!bag0AntdExists) {
          if (Double.isNaN(higherCore))
            higherCore =
                instancesWithoutMissingValues.kthSmallestValue(
                    i, instancesWithoutMissingValues.numInstances());
          NumericAntd antd;
          antd = new NumericAntd(instancesWithoutMissingValues.attribute(i));
          antd.value = 0;
          antd.splitPoint = higherCore;
          newAntds.addElement(antd);
        }

        if (!bag1AntdExists) {
          if (Double.isNaN(lowerCore))
            lowerCore = instancesWithoutMissingValues.kthSmallestValue(i, 1);
          NumericAntd antd;
          antd = new NumericAntd(instancesWithoutMissingValues.attribute(i));
          antd.value = 1;
          antd.splitPoint = lowerCore;
          newAntds.addElement(antd);
        }
      } else {
        for (int j = 0; j < m_Antds.size(); j++) {
          if (((Antd) m_Antds.elementAt(j)).att.index() == i) {
            newAntds.addElement(m_Antds.elementAt(j));
          }
        }
      }
    }
    m_Antds = newAntds;
  }
예제 #7
0
  /**
   * Prune all the possible final sequences of the rule using the pruning data. The measure used to
   * prune the rule is based on flag given.
   *
   * @param pruneData the pruning data used to prune the rule
   * @param useWhole flag to indicate whether use the error rate of the whole pruning data instead
   *     of the data covered
   */
  public void prune(Instances pruneData, boolean useWhole) {
    Instances data = pruneData;

    double total = data.sumOfWeights();
    if (!Utils.gr(total, 0.0)) return;

    /* The default accurate # and rate on pruning data */
    double defAccu = computeDefAccu(data);

    if (m_Debug)
      System.err.println(
          "Pruning with " + defAccu + " positive data out of " + total + " instances");

    int size = m_Antds.size();
    if (size == 0) return; // Default rule before pruning

    double[] worthRt = new double[size];
    double[] coverage = new double[size];
    double[] worthValue = new double[size];
    for (int w = 0; w < size; w++) {
      worthRt[w] = coverage[w] = worthValue[w] = 0.0;
    }

    /* Calculate accuracy parameters for all the antecedents in this rule */
    double tn = 0.0; // True negative if useWhole
    for (int x = 0; x < size; x++) {
      Antd antd = (Antd) m_Antds.elementAt(x);
      Instances newData = data;
      data = new Instances(newData, 0); // Make data empty

      for (int y = 0; y < newData.numInstances(); y++) {
        Instance ins = newData.instance(y);

        if (antd.covers(ins) > 0) { // Covered by this antecedent
          coverage[x] += ins.weight();
          data.add(ins); // Add to data for further pruning
          if ((int) ins.classValue() == (int) m_Consequent) // Accurate prediction
          worthValue[x] += ins.weight();
        } else if (useWhole) { // Not covered
          if ((int) ins.classValue() != (int) m_Consequent) tn += ins.weight();
        }
      }

      if (useWhole) {
        worthValue[x] += tn;
        worthRt[x] = worthValue[x] / total;
      } else // Note if coverage is 0, accuracy is 0.5
      worthRt[x] = (worthValue[x] + 1.0) / (coverage[x] + 2.0);
    }

    double maxValue = (defAccu + 1.0) / (total + 2.0);
    int maxIndex = -1;
    for (int i = 0; i < worthValue.length; i++) {
      if (m_Debug) {
        double denom = useWhole ? total : coverage[i];
        System.err.println(
            i
                + "(useAccuray? "
                + !useWhole
                + "): "
                + worthRt[i]
                + "="
                + worthValue[i]
                + "/"
                + denom);
      }
      if (worthRt[i] > maxValue) { // Prefer to the
        maxValue = worthRt[i]; // shorter rule
        maxIndex = i;
      }
    }

    if (maxIndex == -1) return;

    /* Prune the antecedents according to the accuracy parameters */
    for (int z = size - 1; z > maxIndex; z--) m_Antds.removeElementAt(z);
  }
예제 #8
0
  /**
   * Build one rule using the growing data
   *
   * @param data the growing data used to build the rule
   * @throws Exception if the consequent is not set yet
   */
  public void grow(Instances data) throws Exception {
    if (m_Consequent == -1) throw new Exception(" Consequent not set yet.");

    Instances growData = data;
    double sumOfWeights = growData.sumOfWeights();
    if (!Utils.gr(sumOfWeights, 0.0)) return;

    /* Compute the default accurate rate of the growing data */
    double defAccu = computeDefAccu(growData);
    double defAcRt = (defAccu + 1.0) / (sumOfWeights + 1.0);

    /* Keep the record of which attributes have already been used*/
    boolean[] used = new boolean[growData.numAttributes()];
    for (int k = 0; k < used.length; k++) used[k] = false;
    int numUnused = used.length;

    // If there are already antecedents existing
    for (int j = 0; j < m_Antds.size(); j++) {
      Antd antdj = (Antd) m_Antds.elementAt(j);
      if (!antdj.getAttr().isNumeric()) {
        used[antdj.getAttr().index()] = true;
        numUnused--;
      }
    }

    double maxInfoGain;
    while (Utils.gr(growData.numInstances(), 0.0) && (numUnused > 0) && Utils.sm(defAcRt, 1.0)) {

      // We require that infoGain be positive
      /*if(numAntds == originalSize)
      maxInfoGain = 0.0; // At least one condition allowed
      else
      maxInfoGain = Utils.eq(defAcRt, 1.0) ?
      defAccu/(double)numAntds : 0.0; */
      maxInfoGain = 0.0;

      /* Build a list of antecedents */
      Antd oneAntd = null;
      Instances coverData = null;
      Enumeration enumAttr = growData.enumerateAttributes();

      /* Build one condition based on all attributes not used yet*/
      while (enumAttr.hasMoreElements()) {
        AttributeWeka att = (AttributeWeka) (enumAttr.nextElement());

        if (m_Debug) System.err.println("\nOne condition: size = " + growData.sumOfWeights());

        Antd antd = null;
        if (att.isNumeric()) antd = new NumericAntd(att);
        else antd = new NominalAntd(att);

        if (!used[att.index()]) {
          /* Compute the best information gain for each attribute,
          it's stored in the antecedent formed by this attribute.
          This procedure returns the data covered by the antecedent*/
          Instances coveredData = computeInfoGain(growData, defAcRt, antd);
          if (coveredData != null) {
            double infoGain = antd.getMaxInfoGain();
            if (m_Debug)
              System.err.println(
                  "Test of \'"
                      + antd.toString()
                      + "\': infoGain = "
                      + infoGain
                      + " | Accuracy = "
                      + antd.getAccuRate()
                      + "="
                      + antd.getAccu()
                      + "/"
                      + antd.getCover()
                      + " def. accuracy: "
                      + defAcRt);

            if (infoGain > maxInfoGain) {
              oneAntd = antd;
              coverData = coveredData;
              maxInfoGain = infoGain;
            }
          }
        }
      }

      if (oneAntd == null) break; // Cannot find antds
      if (Utils.sm(oneAntd.getAccu(), m_MinNo)) break; // Too low coverage

      // Numeric attributes can be used more than once
      if (!oneAntd.getAttr().isNumeric()) {
        used[oneAntd.getAttr().index()] = true;
        numUnused--;
      }

      m_Antds.addElement(oneAntd);

      growData = coverData; // Grow data size is shrinking
      defAcRt = oneAntd.getAccuRate();
    }
  }
예제 #9
0
 /**
  * the number of antecedents of the rule
  *
  * @return the size of this rule
  */
 public double size() {
   return (double) m_Antds.size();
 }
예제 #10
0
 /**
  * Whether this rule has antecedents, i.e. whether it is a default rule
  *
  * @return the boolean value indicating whether the rule has antecedents
  */
 public boolean hasAntds() {
   if (m_Antds == null) return false;
   else return (m_Antds.size() > 0);
 }
예제 #11
0
 protected final int size() {
   return (fv != null) ? fv.size() : v.size();
 }
  /** Main method for testing this class. */
  public static void main(String[] options) {

    try {

      // Create numeric attributes "length" and "weight"
      Attribute length = new Attribute("length");
      Attribute weight = new Attribute("weight");

      // Create vector to hold nominal values "first", "second", "third"
      FastVector my_nominal_values = new FastVector(3);
      my_nominal_values.addElement("first");
      my_nominal_values.addElement("second");

      // Create nominal attribute "position"
      Attribute position = new Attribute("position", my_nominal_values);

      // Create vector of the above attributes
      FastVector attributes = new FastVector(3);
      attributes.addElement(length);
      attributes.addElement(weight);
      attributes.addElement(position);

      // Create the empty dataset "race" with above attributes
      Instances race = new Instances("race", attributes, 0);

      // Make position the class attribute
      race.setClassIndex(position.index());

      // Create empty instance with three attribute values
      BinarySparseInstance inst = new BinarySparseInstance(3);

      // Set instance's values for the attributes "length", "weight", and "position"
      inst.setValue(length, (float) 5.3);
      inst.setValue(weight, 300);
      inst.setValue(position, "first");

      // Set instance's dataset to be the dataset "race"
      inst.setDataset(race);

      // Print the instance
      System.out.println("The instance: " + inst);

      // Print the first attribute
      System.out.println("First attribute: " + inst.attribute(0));

      // Print the class attribute
      System.out.println("Class attribute: " + inst.classAttribute());

      // Print the class index
      System.out.println("Class index: " + inst.classIndex());

      // Say if class is missing
      System.out.println("Class is missing: " + inst.classIsMissing());

      // Print the instance's class value in internal format
      System.out.println("Class value (internal format): " + inst.classValue());

      // Print a shallow copy of this instance
      SparseInstance copy = (SparseInstance) inst.copy();
      System.out.println("Shallow copy: " + copy);

      // Set dataset for shallow copy
      copy.setDataset(inst.dataset());
      System.out.println("Shallow copy with dataset set: " + copy);

      // Print out all values in internal format
      System.out.print("All stored values in internal format: ");
      for (int i = 0; i < inst.numValues(); i++) {
        if (i > 0) {
          System.out.print(",");
        }
        System.out.print(inst.valueSparse(i));
      }
      System.out.println();

      // Set all values to zero
      System.out.print("All values set to zero: ");
      while (inst.numValues() > 0) {
        inst.setValueSparse(0, 0);
      }
      for (int i = 0; i < inst.numValues(); i++) {
        if (i > 0) {
          System.out.print(",");
        }
        System.out.print(inst.valueSparse(i));
      }
      System.out.println();

      // Set all values to one
      System.out.print("All values set to one: ");
      for (int i = 0; i < inst.numAttributes(); i++) {
        inst.setValue(i, 1);
      }
      for (int i = 0; i < inst.numValues(); i++) {
        if (i > 0) {
          System.out.print(",");
        }
        System.out.print(inst.valueSparse(i));
      }
      System.out.println();

      // Unset dataset for copy, delete first attribute, and insert it again
      copy.setDataset(null);
      copy.deleteAttributeAt(0);
      copy.insertAttributeAt(0);
      copy.setDataset(inst.dataset());
      System.out.println("Copy with first attribute deleted and inserted: " + copy);

      // Same for second attribute
      copy.setDataset(null);
      copy.deleteAttributeAt(1);
      copy.insertAttributeAt(1);
      copy.setDataset(inst.dataset());
      System.out.println("Copy with second attribute deleted and inserted: " + copy);

      // Same for last attribute
      copy.setDataset(null);
      copy.deleteAttributeAt(2);
      copy.insertAttributeAt(2);
      copy.setDataset(inst.dataset());
      System.out.println("Copy with third attribute deleted and inserted: " + copy);

      // Enumerate attributes (leaving out the class attribute)
      System.out.println("Enumerating attributes (leaving out class):");
      Enumeration enu = inst.enumerateAttributes();
      while (enu.hasMoreElements()) {
        Attribute att = (Attribute) enu.nextElement();
        System.out.println(att);
      }

      // Headers are equivalent?
      System.out.println("Header of original and copy equivalent: " + inst.equalHeaders(copy));

      // Test for missing values
      System.out.println("Length of copy missing: " + copy.isMissing(length));
      System.out.println("Weight of copy missing: " + copy.isMissing(weight.index()));
      System.out.println("Length of copy missing: " + Instance.isMissingValue(copy.value(length)));
      System.out.println("Missing value coded as: " + Instance.missingValue());

      // Prints number of attributes and classes
      System.out.println("Number of attributes: " + copy.numAttributes());
      System.out.println("Number of classes: " + copy.numClasses());

      // Replace missing values
      float[] meansAndModes = {2, 3, 0};
      copy.replaceMissingValues(meansAndModes);
      System.out.println("Copy with missing value replaced: " + copy);

      // Setting and getting values and weights
      copy.setClassMissing();
      System.out.println("Copy with missing class: " + copy);
      copy.setClassValue(0);
      System.out.println("Copy with class value set to first value: " + copy);
      copy.setClassValue("second");
      System.out.println("Copy with class value set to \"second\": " + copy);
      copy.setMissing(1);
      System.out.println("Copy with second attribute set to be missing: " + copy);
      copy.setMissing(length);
      System.out.println("Copy with length set to be missing: " + copy);
      copy.setValue(0, 0);
      System.out.println("Copy with first attribute set to 0: " + copy);
      copy.setValue(weight, 1);
      System.out.println("Copy with weight attribute set to 1: " + copy);
      copy.setValue(position, "second");
      System.out.println("Copy with position set to \"second\": " + copy);
      copy.setValue(2, "first");
      System.out.println("Copy with last attribute set to \"first\": " + copy);
      System.out.println("Current weight of instance copy: " + copy.weight());
      copy.setWeight(2);
      System.out.println("Current weight of instance copy (set to 2): " + copy.weight());
      System.out.println("Last value of copy: " + copy.toString(2));
      System.out.println("Value of position for copy: " + copy.toString(position));
      System.out.println("Last value of copy (internal format): " + copy.value(2));
      System.out.println("Value of position for copy (internal format): " + copy.value(position));
    } catch (Exception e) {
      e.printStackTrace();
    }
  }