Exemplo n.º 1
0
  /**
   * Subtracts an item set from another one.
   *
   * @param toSubtract the item set to be subtracted from this one.
   * @return an item set that only contains items form this item sets that are not contained by
   *     toSubtract
   */
  public final AprioriItemSet subtract(AprioriItemSet toSubtract) {

    AprioriItemSet result = new AprioriItemSet(m_totalTransactions);

    result.m_items = new int[m_items.length];

    for (int i = 0; i < m_items.length; i++)
      if (toSubtract.m_items[i] == -1) result.m_items[i] = m_items[i];
      else result.m_items[i] = -1;
    result.m_counter = 0;
    return result;
  }
Exemplo n.º 2
0
  /**
   * Generates rules with more than one item in the consequence.
   *
   * @param rules all the rules having (k-1)-item sets as consequences
   * @param numItemsInSet the size of the item set for which the rules are to be generated
   * @param numItemsInConsequence the value of (k-1)
   * @param minConfidence the minimum confidence a rule has to have
   * @param hashtables the hashtables containing all(!) previously generated item sets
   * @return all the rules having (k)-item sets as consequences
   */
  private final FastVector[] moreComplexRules(
      FastVector[] rules,
      int numItemsInSet,
      int numItemsInConsequence,
      double minConfidence,
      FastVector hashtables) {

    AprioriItemSet newPremise;
    FastVector[] result, moreResults;
    FastVector newConsequences, newPremises = new FastVector(), newConf = new FastVector();
    Hashtable hashtable;

    if (numItemsInSet > numItemsInConsequence + 1) {
      hashtable = (Hashtable) hashtables.elementAt(numItemsInSet - numItemsInConsequence - 2);
      newConsequences = mergeAllItemSets(rules[1], numItemsInConsequence - 1, m_totalTransactions);
      Enumeration enu = newConsequences.elements();
      while (enu.hasMoreElements()) {
        AprioriItemSet current = (AprioriItemSet) enu.nextElement();
        current.m_counter = m_counter;
        newPremise = subtract(current);
        newPremise.m_counter = ((Integer) hashtable.get(newPremise)).intValue();
        newPremises.addElement(newPremise);
        newConf.addElement(new Double(confidenceForRule(newPremise, current)));
      }
      result = new FastVector[3];
      result[0] = newPremises;
      result[1] = newConsequences;
      result[2] = newConf;
      pruneRules(result, minConfidence);
      moreResults =
          moreComplexRules(
              result, numItemsInSet, numItemsInConsequence + 1, minConfidence, hashtables);
      if (moreResults != null)
        for (int i = 0; i < moreResults[0].size(); i++) {
          result[0].addElement(moreResults[0].elementAt(i));
          result[1].addElement(moreResults[1].elementAt(i));
          result[2].addElement(moreResults[2].elementAt(i));
        }
      return result;
    } else return null;
  }
Exemplo n.º 3
0
  /**
   * Generates all rules for an item set.
   *
   * @param minConfidence the minimum confidence the rules have to have
   * @param hashtables containing all(!) previously generated item sets
   * @param numItemsInSet the size of the item set for which the rules are to be generated
   * @return all the rules with minimum confidence for the given item set
   */
  public FastVector[] generateRules(
      double minConfidence, FastVector hashtables, int numItemsInSet) {

    FastVector premises = new FastVector(),
        consequences = new FastVector(),
        conf = new FastVector();
    FastVector[] rules = new FastVector[3], moreResults;
    AprioriItemSet premise, consequence;
    Hashtable hashtable = (Hashtable) hashtables.elementAt(numItemsInSet - 2);

    // Generate all rules with one item in the consequence.
    for (int i = 0; i < m_items.length; i++)
      if (m_items[i] != -1) {
        premise = new AprioriItemSet(m_totalTransactions);
        consequence = new AprioriItemSet(m_totalTransactions);
        premise.m_items = new int[m_items.length];
        consequence.m_items = new int[m_items.length];
        consequence.m_counter = m_counter;

        for (int j = 0; j < m_items.length; j++) consequence.m_items[j] = -1;
        System.arraycopy(m_items, 0, premise.m_items, 0, m_items.length);
        premise.m_items[i] = -1;

        consequence.m_items[i] = m_items[i];
        premise.m_counter = ((Integer) hashtable.get(premise)).intValue();
        premises.addElement(premise);
        consequences.addElement(consequence);
        conf.addElement(new Double(confidenceForRule(premise, consequence)));
      }
    rules[0] = premises;
    rules[1] = consequences;
    rules[2] = conf;
    pruneRules(rules, minConfidence);

    // Generate all the other rules
    moreResults = moreComplexRules(rules, numItemsInSet, 1, minConfidence, hashtables);
    if (moreResults != null)
      for (int i = 0; i < moreResults[0].size(); i++) {
        rules[0].addElement(moreResults[0].elementAt(i));
        rules[1].addElement(moreResults[1].elementAt(i));
        rules[2].addElement(moreResults[2].elementAt(i));
      }
    return rules;
  }
Exemplo n.º 4
0
  /**
   * Generates all significant rules for an item set.
   *
   * @param minMetric the minimum metric (confidence, lift, leverage, improvement) the rules have to
   *     have
   * @param metricType (confidence=0, lift, leverage, improvement)
   * @param hashtables containing all(!) previously generated item sets
   * @param numItemsInSet the size of the item set for which the rules are to be generated
   * @param numTransactions
   * @param significanceLevel the significance level for testing the rules
   * @return all the rules with minimum metric for the given item set
   * @exception Exception if something goes wrong
   */
  public final FastVector[] generateRulesBruteForce(
      double minMetric,
      int metricType,
      FastVector hashtables,
      int numItemsInSet,
      int numTransactions,
      double significanceLevel)
      throws Exception {

    FastVector premises = new FastVector(),
        consequences = new FastVector(),
        conf = new FastVector(),
        lift = new FastVector(),
        lev = new FastVector(),
        conv = new FastVector();
    FastVector[] rules = new FastVector[6];
    AprioriItemSet premise, consequence;
    Hashtable hashtableForPremise, hashtableForConsequence;
    int numItemsInPremise, help, max, consequenceUnconditionedCounter;
    double[][] contingencyTable = new double[2][2];
    double metric, chiSquared = 0;

    // Generate all possible rules for this item set and test their
    // significance.
    max = (int) Math.pow(2, numItemsInSet);
    for (int j = 1; j < max; j++) {
      numItemsInPremise = 0;
      help = j;
      while (help > 0) {
        if (help % 2 == 1) numItemsInPremise++;
        help /= 2;
      }
      if (numItemsInPremise < numItemsInSet) {
        hashtableForPremise = (Hashtable) hashtables.elementAt(numItemsInPremise - 1);
        hashtableForConsequence =
            (Hashtable) hashtables.elementAt(numItemsInSet - numItemsInPremise - 1);
        premise = new AprioriItemSet(m_totalTransactions);
        consequence = new AprioriItemSet(m_totalTransactions);
        premise.m_items = new int[m_items.length];

        consequence.m_items = new int[m_items.length];
        consequence.m_counter = m_counter;
        help = j;
        for (int i = 0; i < m_items.length; i++)
          if (m_items[i] != -1) {
            if (help % 2 == 1) {
              premise.m_items[i] = m_items[i];
              consequence.m_items[i] = -1;
            } else {
              premise.m_items[i] = -1;
              consequence.m_items[i] = m_items[i];
            }
            help /= 2;
          } else {
            premise.m_items[i] = -1;
            consequence.m_items[i] = -1;
          }
        premise.m_counter = ((Integer) hashtableForPremise.get(premise)).intValue();
        consequenceUnconditionedCounter =
            ((Integer) hashtableForConsequence.get(consequence)).intValue();

        if (significanceLevel != -1) {
          contingencyTable[0][0] = (consequence.m_counter);
          contingencyTable[0][1] = (premise.m_counter - consequence.m_counter);
          contingencyTable[1][0] = (consequenceUnconditionedCounter - consequence.m_counter);
          contingencyTable[1][1] =
              (numTransactions
                  - premise.m_counter
                  - consequenceUnconditionedCounter
                  + consequence.m_counter);
          chiSquared = ContingencyTables.chiSquared(contingencyTable, false);
        }

        if (metricType == 0) {

          metric = confidenceForRule(premise, consequence);

          if ((!(metric < minMetric))
              && (significanceLevel == -1 || !(chiSquared > significanceLevel))) {
            premises.addElement(premise);
            consequences.addElement(consequence);
            conf.addElement(new Double(metric));
            lift.addElement(
                new Double(liftForRule(premise, consequence, consequenceUnconditionedCounter)));
            lev.addElement(
                new Double(
                    leverageForRule(
                        premise, consequence, premise.m_counter, consequenceUnconditionedCounter)));
            conv.addElement(
                new Double(
                    convictionForRule(
                        premise, consequence, premise.m_counter, consequenceUnconditionedCounter)));
          }
        } else {
          double tempConf = confidenceForRule(premise, consequence);
          double tempLift = liftForRule(premise, consequence, consequenceUnconditionedCounter);
          double tempLev =
              leverageForRule(
                  premise, consequence, premise.m_counter, consequenceUnconditionedCounter);
          double tempConv =
              convictionForRule(
                  premise, consequence, premise.m_counter, consequenceUnconditionedCounter);
          switch (metricType) {
            case 1:
              metric = tempLift;
              break;
            case 2:
              metric = tempLev;
              break;
            case 3:
              metric = tempConv;
              break;
            default:
              throw new Exception("ItemSet: Unknown metric type!");
          }
          if (!(metric < minMetric)
              && (significanceLevel == -1 || !(chiSquared > significanceLevel))) {
            premises.addElement(premise);
            consequences.addElement(consequence);
            conf.addElement(new Double(tempConf));
            lift.addElement(new Double(tempLift));
            lev.addElement(new Double(tempLev));
            conv.addElement(new Double(tempConv));
          }
        }
      }
    }
    rules[0] = premises;
    rules[1] = consequences;
    rules[2] = conf;
    rules[3] = lift;
    rules[4] = lev;
    rules[5] = conv;
    return rules;
  }