Ejemplo n.º 1
0
 /**
  * Remove the last rule in the ruleset as well as it's stats. It might be useful when the last
  * rule was added for testing purpose and then the test failed
  */
 public void removeLast() {
   int last = m_Ruleset.size() - 1;
   m_Ruleset.removeElementAt(last);
   m_Filtered.removeElementAt(last);
   m_SimpleStats.removeElementAt(last);
   if (m_Distributions != null) m_Distributions.removeElementAt(last);
 }
Ejemplo n.º 2
0
  /**
   * Try to reduce the DL of the ruleset by testing removing the rules one by one in reverse order
   * and update all the stats
   *
   * @param expFPRate expected FP/(FP+FN), used in dataDL calculation
   * @param checkErr whether check if error rate >= 0.5
   */
  public void reduceDL(double expFPRate, boolean checkErr) {

    boolean needUpdate = false;
    double[] rulesetStat = new double[6];
    for (int j = 0; j < m_SimpleStats.size(); j++) {
      // Covered stats are cumulative
      rulesetStat[0] += ((double[]) m_SimpleStats.elementAt(j))[0];
      rulesetStat[2] += ((double[]) m_SimpleStats.elementAt(j))[2];
      rulesetStat[4] += ((double[]) m_SimpleStats.elementAt(j))[4];
      if (j == m_SimpleStats.size() - 1) { // Last rule
        rulesetStat[1] = ((double[]) m_SimpleStats.elementAt(j))[1];
        rulesetStat[3] = ((double[]) m_SimpleStats.elementAt(j))[3];
        rulesetStat[5] = ((double[]) m_SimpleStats.elementAt(j))[5];
      }
    }

    // Potential
    for (int k = m_SimpleStats.size() - 1; k >= 0; k--) {

      double[] ruleStat = (double[]) m_SimpleStats.elementAt(k);

      // rulesetStat updated
      double ifDeleted = potential(k, expFPRate, rulesetStat, ruleStat, checkErr);
      if (!Double.isNaN(ifDeleted)) {
        /*System.err.println("!!!deleted ("+k+"): save "+ifDeleted
          +" | "+rulesetStat[0]
          +" | "+rulesetStat[1]
          +" | "+rulesetStat[4]
          +" | "+rulesetStat[5]);
        */

        if (k == (m_SimpleStats.size() - 1)) removeLast();
        else {
          m_Ruleset.removeElementAt(k);
          needUpdate = true;
        }
      }
    }

    if (needUpdate) {
      m_Filtered = null;
      m_SimpleStats = null;
      countData();
    }
  }
Ejemplo n.º 3
0
  /**
   * Method that generates all large itemsets with a minimum support, and from these all association
   * rules.
   *
   * @param instances the instances to be used for generating the associations
   * @exception Exception if rules can't be built successfully
   */
  public void buildAssociations(Instances instances) throws Exception {

    int temp = m_premiseCount, exactNumber = m_numRules - 5;

    if (instances.checkForStringAttributes()) {
      throw new Exception("Can't handle string attributes!");
    }
    m_instances = instances;
    m_instances.setClassIndex(m_instances.numAttributes() - 1);

    // prior estimation
    m_priorEstimator = new PriorEstimation(m_instances, m_numRandRules, m_numIntervals, false);
    m_priors = m_priorEstimator.estimatePrior();
    m_midPoints = m_priorEstimator.getMidPoints();

    m_Ls = new FastVector();
    m_hashtables = new FastVector();

    for (int i = 1; i < m_instances.numAttributes(); i++) {
      m_bestChanged = false;

      // find large item sets
      findLargeItemSets(i);

      // find association rules (rule generation procedure)
      findRulesQuickly();

      if (m_bestChanged) {
        temp = m_premiseCount;
        while (RuleGeneration.expectation(m_premiseCount, m_premiseCount, m_midPoints, m_priors)
            <= m_expectation) {
          m_premiseCount++;
          if (m_premiseCount > m_instances.numInstances()) break;
        }
      }
      if (m_premiseCount > m_instances.numInstances()) {

        // Reserve space for variables
        m_allTheRules = new FastVector[3];
        m_allTheRules[0] = new FastVector();
        m_allTheRules[1] = new FastVector();
        m_allTheRules[2] = new FastVector();

        int k = 0;
        while (m_best.size() > 0 && exactNumber > 0) {
          m_allTheRules[0].insertElementAt((ItemSet) ((RuleItem) m_best.last()).premise(), k);
          m_allTheRules[1].insertElementAt((ItemSet) ((RuleItem) m_best.last()).consequence(), k);
          m_allTheRules[2].insertElementAt(new Double(((RuleItem) m_best.last()).accuracy()), k);
          boolean remove = m_best.remove(m_best.last());
          k++;
          exactNumber--;
        }
        return;
      }

      if (temp != m_premiseCount && m_Ls.size() > 0) {
        FastVector kSets = (FastVector) m_Ls.lastElement();
        m_Ls.removeElementAt(m_Ls.size() - 1);
        kSets = ItemSet.deleteItemSets(kSets, m_premiseCount, Integer.MAX_VALUE);
        m_Ls.addElement(kSets);
      }
    }

    // Reserve space for variables
    m_allTheRules = new FastVector[3];
    m_allTheRules[0] = new FastVector();
    m_allTheRules[1] = new FastVector();
    m_allTheRules[2] = new FastVector();

    int k = 0;
    while (m_best.size() > 0 && exactNumber > 0) {
      m_allTheRules[0].insertElementAt((ItemSet) ((RuleItem) m_best.last()).premise(), k);
      m_allTheRules[1].insertElementAt((ItemSet) ((RuleItem) m_best.last()).consequence(), k);
      m_allTheRules[2].insertElementAt(new Double(((RuleItem) m_best.last()).accuracy()), k);
      boolean remove = m_best.remove(m_best.last());
      k++;
      exactNumber--;
    }
  }