Пример #1
0
 public void addItemset(Itemset itemset) {
   while (levels.size() <= itemset.getItems().size()) {
     levels.add(new ArrayList<Itemset>());
   }
   levels.get(itemset.getItems().size()).add(itemset);
   itemsetsCount++;
 }
Пример #2
0
  // Generate C(k+1) by join itemset-pairs in F(k)
  private static List<Itemset> generateCandidates(List<Itemset> frequentItemsets) {

    if (frequentItemsets.isEmpty() || frequentItemsets.size() == 1) {
      return new ArrayList<>();
    }

    Collections.sort(frequentItemsets);

    List<Itemset> candidates = new ArrayList<>();
    Itemset candidate;
    for (int i = 0, j = 1; i != frequentItemsets.size(); ) {
      while (j != frequentItemsets.size()
          && Itemset.generateCandidate(frequentItemsets.get(i), frequentItemsets.get(j)) != null) {
        ++j;
      }
      for (int k = i; k != j; ++k) {
        for (int l = k + 1; l != j; ++l) {
          Itemset itemset =
              (Itemset.generateCandidate(frequentItemsets.get(k), frequentItemsets.get(l)));
          assert itemset != null;
          candidates.add(itemset);
        }
      }
      i = j;
      j++;
    }

    return candidates;
  }
Пример #3
0
  private void generateRules(ArrayList<Itemset> Lk, int clas) {
    int i, uncover;
    Itemset itemset;
    double confidence[] = new double[2];
    double var1[] = new double[2];
    double var2[] = new double[2];

    for (i = Lk.size() - 1; i >= 0; i--) {
      itemset = Lk.get(i);
      var2 = itemset.getSupport();
      if (var2[0] > 0.0) {
        var1 = itemset.getSupportClass();
        confidence[0] = var1[0] / var2[0];
        confidence[1] = var1[1] / var2[1];
      } else {
        confidence[0] = confidence[1] = 0.0;
      }
      if (confidence[0] > 0.4) {
        this.ruleBaseClase.add(itemset);
        ruleStage1++;
      }
      if (confidence[0] > this.minconf) Lk.remove(i);
    }

    if (this.ruleBaseClase.size() > 500000) {
      this.ruleBaseClase.reduceRules(clas);
      // System.out.println("Number of rules: " + this.ruleBase.size());
      System.gc();
    }
  }
Пример #4
0
  /**
   * Function to add all itemsets with unknown values for given attribute.
   *
   * @param source The dataset that contains all the itemsets.
   * @param attIndex The index of the attribute with possible unknown values.
   * @throws Exception
   */
  public final void addWithUnknownValue(MyDataset source, int attIndex) {
    double[] probs;
    double weight, newWeight;
    int classIndex;
    Itemset itemset;
    int j;
    probs = new double[perValue.length];

    for (j = 0; j < perValue.length; j++) {
      // if ( Comparators.isEqual( total, 0 ) )
      if (total == 0) probs[j] = 1.0 / probs.length;
      else probs[j] = perValue[j] / total;
    }

    Enumeration enum2 = source.enumerateItemsets();

    while (enum2.hasMoreElements()) {
      itemset = (Itemset) enum2.nextElement();

      if (itemset.isMissing(attIndex)) {
        classIndex = (int) itemset.getClassValue();
        weight = itemset.getWeight();
        perClass[classIndex] = perClass[classIndex] + weight;
        total = total + weight;

        for (j = 0; j < perValue.length; j++) {
          newWeight = probs[j] * weight;
          perClassPerValue[j][classIndex] = perClassPerValue[j][classIndex] + newWeight;
          perValue[j] = perValue[j] + newWeight;
        }
      }
    }
  }
  /**
   * This method saves a sequential pattern to the output file or in memory, depending on if the
   * user provided an output file path or not when he launched the algorithm
   *
   * @param prefix the pattern to be saved.
   * @throws IOException exception if error while writing the output file.
   */
  private void savePattern(SequentialPattern prefix) throws IOException {
    // increase the number of pattern found for statistics purposes
    patternCount++;

    // if the result should be saved to a file
    if (writer != null) {
      StringBuffer r = new StringBuffer("");
      for (Itemset itemset : prefix.getItemsets()) {
        //			r.append('(');
        for (String item : itemset.getItems()) {
          String string = item.toString();
          r.append(string);
          r.append(' ');
        }
        r.append("-1 ");
      }
      //
      //		//  print the list of Pattern IDs that contains this pattern.
      //		if(prefix.getSequencesID() != null){
      //			r.append("SID: ");
      //			for(Integer id : prefix.getSequencesID()){
      //				r.append(id);
      //				r.append(' ');
      //			}
      //		}
      r.append(" #SUP: ");
      r.append(prefix.getSequencesID().size());

      writer.write(r.toString());
      writer.newLine();
    } // otherwise the result is kept into memory
    else {
      patterns.addSequence(prefix, prefix.size());
    }
  }
Пример #6
0
  // this is the apriori_gen procedure that generates starting from
  // a k-itemset collection a new collection of (k+1)-itemsets.
  private Vector apriori_gen(Vector itemsets) {
    if (itemsets.size() == 0) return new Vector(0);

    // create a hashtree so that we can check more efficiently the
    // number of subsets
    // this may not really be necessary when generating rules since
    // itemsets will probably be a small collection, but just in case
    HashTree ht_itemsets = new HashTree(itemsets);
    for (int i = 0; i < itemsets.size(); i++) ht_itemsets.add(i);
    ht_itemsets.prepareForDescent();

    Vector result = new Vector();
    Itemset is_i, is_j;
    for (int i = 0; i < itemsets.size() - 1; i++)
      for (int j = i + 1; j < itemsets.size(); j++) {
        is_i = (Itemset) itemsets.get(i);
        is_j = (Itemset) itemsets.get(j);

        // if we cannot combine element i with j then we shouldn't
        // waste time for bigger j's. This is because we keep the
        // collections ordered, an important detail in this implementation
        if (!is_i.canCombineWith(is_j)) break;
        else {
          Itemset is = is_i.combineWith(is_j);

          // a real k-itemset has k (k-1)-subsets
          // so we test that this holds before adding to result
          if (ht_itemsets.countSubsets(is) == is.size()) result.add(is);
        }
      }

    return result;
  }
Пример #7
0
  // this is the ap-genrules procedure that generates rules out
  // of a frequent itemset.
  private void ap_genrules(Itemset is_frequent, Vector consequents) {
    if (consequents.size() == 0) return;

    // the size of frequent must be bigger than the size of the itemsets
    // in consequents by at least 2, in order to be able to generate
    // a rule in this call
    if (is_frequent.size() > ((Itemset) (consequents.get(0))).size() + 1) {
      Vector new_consequents = apriori_gen(consequents);
      AssociationRule ar;

      for (int i = 0; i < new_consequents.size(); i++) {
        Itemset is_consequent = (Itemset) new_consequents.get(i);
        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        // if the rule satisfies our requirements we add it
        // to our collection
        if (confidence >= min_confidence)
          rules.add(
              new AssociationRule(
                  is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        // otherwise we remove the consequent from the collection
        // and we update the index such that we don't skip a consequent
        else new_consequents.remove(i--);
      }

      ap_genrules(is_frequent, new_consequents);
    }
  }
Пример #8
0
  /**
   * Find association rules in a database, given the set of frequent itemsets.
   *
   * @param cacheReader the object used to read from the cache
   * @param minSupport the minimum support
   * @param minConfidence the minimum confidence
   * @return a Vector containing all association rules found
   */
  public Vector findAssociations(DBCacheReader cacheReader, float minSupport, float minConfidence) {
    min_support = minSupport;
    min_confidence = minConfidence;

    // create the vector where we'll put the rules
    rules = new Vector();

    // read from cache supports of frequent itemsets
    initializeSupports(cacheReader);

    // get the frequent itemsets
    Vector frequent = supports.getItemsets();

    // generate rules from each frequent itemset
    for (int i = 0; i < frequent.size(); i++) {
      // get a frequent itemset
      Itemset is_frequent = (Itemset) frequent.get(i);

      // skip it if it's too small
      if (is_frequent.size() <= 1) continue;

      // get all possible 1 item consequents
      Vector consequents = new Vector(is_frequent.size());
      for (int k = 0; k < is_frequent.size(); k++) {
        int item = is_frequent.getItem(k);
        Itemset is_consequent = new Itemset(1);
        is_consequent.addItem(item);

        // is_consequent now contains a possible consequent
        // verify now that the rule having this consequent
        // satisfies our requirements

        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        if (confidence >= min_confidence) {
          consequents.add(is_consequent);

          // we add the rule to our collection if it satisfies
          // our conditions
          rules.add(
              new AssociationRule(
                  is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        }
      }

      // call the ap_genrules procedure for generating all rules
      // out of this frequent itemset
      ap_genrules(is_frequent, consequents);
    }

    return rules;
  }
Пример #9
0
 // Prune itemsets from C(k+1) that violate downward closure
 private static List<Itemset> prune(List<Itemset> candidates, List<Itemset> frequentItemsets) {
   List<Itemset> prunedCandicates = new ArrayList<>();
   for (Itemset candidate : candidates) {
     if (frequentItemsets.containsAll(candidate.downwardClosure())) {
       prunedCandicates.add(candidate);
     }
   }
   return prunedCandicates;
 }
Пример #10
0
  private void generateLarge(ArrayList<Itemset> Lk, int clas) {
    int i, j, size;
    ArrayList<Itemset> Lnew;
    Itemset newItemset, itemseti, itemsetj;

    size = Lk.size();

    if (size > 1) {
      if (((Lk.get(0)).size() < this.nVariables) && ((Lk.get(0)).size() < this.depth)) {
        Lnew = new ArrayList<Itemset>();

        for (i = 0; i < size - 1; i++) {
          itemseti = Lk.get(i);
          for (j = i + 1; j < size; j++) {
            itemsetj = Lk.get(j);
            if (this.isCombinable(itemseti, itemsetj)) {
              newItemset = itemseti.clone();
              newItemset.add((itemsetj.get(itemsetj.size() - 1)).clone());
              newItemset.calculateSupports(this.dataBase, this.train);
              if (newItemset.getSupportClass()[0] >= this.minsup) Lnew.add(newItemset);
            }
          }

          this.generateRules(Lnew, clas);
          this.generateLarge(Lnew, clas);
          Lnew.clear();
          System.gc();
        }
      }
    }
  }
Пример #11
0
  private boolean isCombinable(Itemset itemseti, Itemset itemsetj) {
    int i;
    Item itemi, itemj;
    Itemset itemset;

    itemi = itemseti.get(itemseti.size() - 1);
    itemj = itemsetj.get(itemseti.size() - 1);
    if (itemi.getVariable() >= itemj.getVariable()) return (false);

    return (true);
  }
Пример #12
0
  // this is the ap-genrules procedure that generates rules out
  // of a frequent itemset.
  private void ap_genrules_constraint(Itemset is_frequent, Vector consequents) {
    if (consequents.size() == 0) return;

    // the size of frequent must be bigger than the size of the itemsets
    // in consequents by at least 2, in order to be able to generate
    // a rule in this call
    if (is_frequent.size() > ((Itemset) (consequents.get(0))).size() + 1) {
      Vector new_consequents = apriori_gen(consequents);
      AssociationRule ar;

      for (int i = 0; i < new_consequents.size(); i++) {
        Itemset is_consequent = (Itemset) new_consequents.get(i);
        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        // if the rule satisfies our confidence requirements
        if (confidence >= min_confidence) {
          // check whether it also satisfies our constraints
          boolean approved = true;

          if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent))
            approved = false;

          if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent))
            approved = false;

          if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent)
            approved = false;

          if (approved && min_consequent > 0 && is_consequent.size() < min_consequent)
            approved = false;

          // if the rule satisifes all requirements then
          // we add it to the rules collection
          if (approved)
            rules.add(
                new AssociationRule(
                    is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        }
        // otherwise we remove the consequent from the collection
        // and we update the index such that we don't skip a consequent
        else new_consequents.remove(i--);
      }

      ap_genrules_constraint(is_frequent, new_consequents);
    }
  }
Пример #13
0
  /**
   * Function to add the given itemset to given the value.
   *
   * @param valueIndex The index of the value.
   * @param itemset The itemset to add.
   */
  public final void add(int valueIndex, Itemset itemset) {
    int classIndex;
    double weight;

    classIndex = (int) itemset.getClassValue();
    weight = itemset.getWeight();
    perClassPerValue[valueIndex][classIndex] = perClassPerValue[valueIndex][classIndex] + weight;

    perValue[valueIndex] = perValue[valueIndex] + weight;
    perClass[classIndex] = perClass[classIndex] + weight;
    total = total + weight;
  }
Пример #14
0
  /**
   * Function to read an itemset and appends it to the dataset.
   *
   * @return True if the itemset was readed succesfully.
   */
  private boolean getItemsetFull() {

    // fill itemset
    for (int j = 0; j < IS.getNumInstances(); j++) {

      double[] itemset = new double[Attributes.getNumAttributes()];
      int index;

      // Get values for all input attributes.
      for (int i = 0; i < Attributes.getInputNumAttributes(); i++) {

        // check type and if there is null

        if (IS.getInstance(j).getInputMissingValues(i)) itemset[i] = Itemset.getMissingValue();
        else {
          if (Attributes.getInputAttribute(i).getType() == 0) // nominal
          {
            for (int k = 0; k < Attributes.getInputAttribute(i).getNumNominalValues(); k++)
              if (Attributes.getInputAttribute(i)
                  .getNominalValue(k)
                  .equals(IS.getInstance(j).getInputNominalValues(i))) itemset[i] = (double) k;
          } else // real and integer
          {
            itemset[i] = IS.getInstance(j).getInputRealValues(i);
          }
        } // else
      } // for

      // Get values for output attribute.
      int i = Attributes.getInputNumAttributes();

      // check type and if there is null
      if (IS.getInstance(j).getOutputMissingValues(0)) itemset[i] = Itemset.getMissingValue();
      else {
        if (Attributes.getOutputAttribute(0).getType() == 0) // nominal
        {
          for (int k = 0; k < Attributes.getOutputAttribute(0).getNumNominalValues(); k++)
            if (Attributes.getOutputAttribute(0)
                .getNominalValue(k)
                .equals(IS.getInstance(j).getOutputNominalValues(0))) itemset[i] = (double) k;
        } else // real and integer
        {
          itemset[i] = IS.getInstance(j).getOutputRealValues(0);
        }
      } // else

      // Add itemset to dataset
      addItemset(new Itemset(1, itemset));
    } // for

    return true;
  }
Пример #15
0
  /**
   * Funtion to add the given itemset to all values weighting it according to given weights.
   *
   * @param itemset The itemset to add.
   * @param weights The weights of the itemset for every value.
   */
  public final void addWeights(Itemset itemset, double[] weights) {
    int classIndex;
    int i;

    classIndex = (int) itemset.getClassValue();

    for (i = 0; i < perValue.length; i++) {
      double weight = itemset.getWeight() * weights[i];
      perClassPerValue[i][classIndex] = perClassPerValue[i][classIndex] + weight;
      perValue[i] = perValue[i] + weight;
      perClass[classIndex] = perClass[classIndex] + weight;
      total = total + weight;
    }
  }
Пример #16
0
  /**
   * Function to shift all itemsets in given range from one value to another.
   *
   * @param from The minimum value.
   * @param to The maximum value.
   * @param source The dataset.
   * @param start The index of the first itemset to add.
   * @param end The index of the first itemset that will not be added.
   */
  public final void shiftRange(int from, int to, MyDataset source, int start, int end) {
    int classIndex;
    double weight;
    Itemset itemset;
    int i;

    for (i = start; i < end; i++) {
      itemset = (Itemset) source.itemset(i);
      classIndex = (int) itemset.getClassValue();
      weight = itemset.getWeight();
      perClassPerValue[from][classIndex] -= weight;
      perClassPerValue[to][classIndex] += weight;
      perValue[from] -= weight;
      perValue[to] += weight;
    }
  }
Пример #17
0
  /**
   * Function to add all itemsets in given range to given value.
   *
   * @param valueIndex The index of the value.
   * @param source The source of the data.
   * @param start The index of the first itemset to add.
   * @param end The index of the first itemset that will not be added.
   * @throws Exception
   */
  public final void addRange(int valueIndex, MyDataset source, int start, int end) {
    double sumOfWeights = 0;
    int classIndex;
    Itemset itemset;
    int i;

    for (i = start; i < end; i++) {
      itemset = (Itemset) source.itemset(i);
      classIndex = (int) itemset.getClassValue();
      sumOfWeights = sumOfWeights + itemset.getWeight();
      perClassPerValue[valueIndex][classIndex] += itemset.getWeight();
      perClass[classIndex] += itemset.getWeight();
    }

    perValue[valueIndex] += sumOfWeights;
    total += sumOfWeights;
  }
Пример #18
0
  public int hasUncoverClass(int clas) {
    int uncover;
    double degree[] = new double[2];
    Itemset itemset;
    boolean stop;

    uncover = 0;
    for (int j = 0; j < train.size(); j++) {
      if (this.train.getOutputAsInteger(j) == clas) {
        stop = false;
        for (int i = 0; i < L2.size() && !stop; i++) {
          itemset = L2.get(i);
          degree = itemset.degree(this.dataBase, this.train.getExample(j));
          if (degree[0] > 0.0) stop = true;
        }

        if (!stop) uncover++;
      }
    }

    return uncover;
  }
Пример #19
0
  // this method stores all frequent itemsets that have support
  // greater than the minimum support in a SET for more efficient
  // access times.
  private void initializeSupports(DBCacheReader cacheReader) {
    // create new SET
    supports = new SET();

    try {
      Itemset is;
      while (true) {
        // get item from cache
        is = cacheReader.getNextItemset();
        // if item has support greater than the minimum support
        // required then we add it to the SET
        if (is.getSupport() >= min_support) {
          supports.insert(is);
        }
      }
    } catch (EOFException e) {
      // do nothing, we just reached the EOF
    } catch (IOException e) {
      System.err.println("Error scanning cache!!!\n" + e);
    } catch (ClassNotFoundException e) {
      System.err.println("Error scanning cache!!!\n" + e);
    }
  }
Пример #20
0
  /**
   * It adds a sequence from an array of string that we have to interpret
   *
   * @param integers
   * @param sequenceID
   */
  public void addSequence(String[] integers, int sequenceID) {
    long timestamp = -1;
    Sequence sequence = new Sequence(sequences.size());
    sequence.setID(sequenceID);
    Itemset itemset = new Itemset();
    int inicio = 0;
    Map<Item, Boolean> counted = new HashMap<Item, Boolean>();

    for (int i = inicio; i < integers.length; i++) {
      if (integers[i].codePointAt(0) == '<') { // Timestamp
        String value = integers[i].substring(1, integers[i].length() - 1);
        timestamp = Long.parseLong(value);
        itemset.setTimestamp(timestamp);
      } else if (integers[i].equals("-1")) { // end of an itemset
        long time = itemset.getTimestamp() + 1;
        sequence.addItemset(itemset);
        itemset = new Itemset();
        itemset.setTimestamp(time);
      } else if (integers[i].equals("-2")) { // end of a sequence
        sequences.add(sequence);
      } else {
        // extract the value for an item
        Item item = itemFactory.getItem(Integer.parseInt(integers[i]));
        if (counted.get(item) == null) {
          counted.put(item, Boolean.TRUE);
          BitSet appearances = frequentItems.get(item);
          if (appearances == null) {
            appearances = new BitSet();
            frequentItems.put(item, appearances);
          }
          appearances.set(sequence.getId());
        }
        itemset.addItem(item);
      }
    }
  }
Пример #21
0
  /**
   * Add a new row to the database. If this is to be the first row added to the database you must
   * have called setColumnNames() before.
   *
   * @param itemset the new row to be added to the data file
   * @exception IOException from library call
   * @exception DBException column names have not been set or an invalid item was contained in the
   *     itemset
   */
  public void addRow(Itemset itemset) throws IOException, DBException {
    if (wroteColumnNames == false) throw new DBException("Column names must be set first");

    int size = itemset.size();
    for (int i = 0; i < size; i++)
      if (itemset.get(i) > numColumns) throw new DBException("Attempt to write invalid item");

    if (needReposition == true) {
      outStream.seek(lastPosition);
      needReposition = false;
    }

    outStream.writeInt(size);
    CRC = updateCRC(CRC, size);

    int item;
    for (int i = 0; i < size; i++) {
      item = itemset.get(i);
      outStream.writeInt(item);
      CRC = updateCRC(CRC, item);
    }

    numRows++;
  }
Пример #22
0
  /**
   * Detects which leaf a itemset falls into
   *
   * @param itemset the itemset
   * @return the leaf no.
   */
  public final int leafNum(Itemset itemset) {

    int lmNum = 0;

    if (type == false) {
      lmNum = lm;
    } else {
      if (itemset.getValue(splitAttr) <= splitValue) {
        lmNum = leftNode.leafNum(itemset);
      } else {
        lmNum = rightNode.leafNum(itemset);
      }
    }

    return lmNum;
  }
Пример #23
0
  /**
   * Predicts the class value of an itemset by the tree
   *
   * @param itemset the itemset
   * @param smooth =true, uses the smoothed model; otherwise uses the unsmoothed
   * @inst itemsets
   * @return the predicted value
   */
  public final double predict(Itemset itemset, boolean smooth) {

    double y = 0.0;

    if (type == false) { // LEAF
      if (smooth == true) {
        y = smoothed.predict(itemset);
      } else {
        if (valueNode == true) {
          y = unsmoothed.coeffs[0];
        } else {
          y = unsmoothed.predict(itemset);
        }
      }
    } else { // NODE
      if (itemset.getValue(splitAttr) <= splitValue) {
        y = leftNode.predict(itemset, smooth);
      } else {
        y = rightNode.predict(itemset, smooth);
      }
    }

    return y;
  }
Пример #24
0
  /**
   * It adds a rule to the rule base
   *
   * @param itemset itemset to be added
   * @param time Time of the rule
   */
  public void add(Itemset itemset, long time) {
    int i;
    Item item;

    int[] antecedent = new int[n_variables];
    for (i = 0; i < n_variables; i++) antecedent[i] = -1; // Don't care

    for (i = 0; i < itemset.size(); i++) {
      item = itemset.get(i);
      antecedent[item.getVariable()] = item.getValue();
    }

    Rule r = new Rule(this.dataBase);
    r.asignaAntecedente(antecedent);
    r.setConsequent(itemset.getClas());
    r.setConfidence(itemset.getSupportClass() / itemset.getSupport());
    r.setSupport(itemset.getSupportClass());
    r.setTime(time);
    this.ruleBase.add(r);
  }
Пример #25
0
  private void generateL2(int clas) {
    int i, j, k, uncover;
    Item item;
    Itemset itemset;

    this.L2.clear();
    itemset = new Itemset(clas);

    for (i = 0; i < this.nVariables; i++) {
      if (this.dataBase.numLabels(i) > 1) {
        for (j = 0; j < this.dataBase.numLabels(i); j++) {
          item = new Item(i, j);
          itemset.add(item);
          itemset.calculateSupports(this.dataBase, this.train);
          if (itemset.getSupportClass()[0] >= this.minsup) this.L2.add(itemset.clone());
          itemset.remove(0);
        }
      }
    }

    this.generateRules(this.L2, clas);
  }
Пример #26
0
  /** sample usage and testing */
  public static void main(String[] args) {
    Itemset is1 = new Itemset();
    is1.add(1);
    is1.add(2);
    Itemset is2 = new Itemset();
    is2.add(3);
    is2.add(2);
    Itemset is3 = new Itemset();
    is3.add(3);
    is3.add(1);
    Itemset is4 = new Itemset();
    is4.add(33);
    is4.add(3);

    ArrayList colNames = new ArrayList(3);
    colNames.add("cheese");
    colNames.add("pizza");
    colNames.add("beer");

    System.out.println("\n\nCreating invalid database:");
    try {
      RandomAccessFile invalid = new RandomAccessFile("invalid.db", "rw");
      invalid.writeChars(ID + " - a bogus file that looks like a valid one");
      invalid.close();
    } catch (Exception e) {
      System.out.println("Shouldn't have happened: " + e);
    }

    System.out.println("\n\nCreating corrupted database:");
    try {
      DBWriter corrupted = new DBWriter("corrupted.db");

      try {
        corrupted.addRow(is1);
      } catch (DBException e) {
        System.out.println(e);
      }

      corrupted.setDescription("a corrupted database");
      corrupted.setColumnNames(colNames);

      corrupted.addRow(is1);
      corrupted.setDescription("a corrupted database - 2");
      corrupted.setColumnNames(colNames);
      corrupted.addRow(is2);
      corrupted.setDescription("a corrupted database - 3");
      corrupted.addRow(is3);

      try {
        corrupted.addRow(is4);
      } catch (DBException e) {
        System.out.println(e);
      }

      corrupted.close();

      System.out.println("corrupting file");

      RandomAccessFile raf = new RandomAccessFile("corrupted.db", "rw");
      raf.seek(770);
      // replace the 2 in the second itemset with a 3
      raf.writeInt(3);
      raf.close();
    } catch (Exception e) {
      System.out.println("Shouldn't have happened: " + e);
    }

    System.out.println("\n\nCreating empty database:");
    try {
      DBWriter empty = new DBWriter("empty.db");

      empty.setDescription("an empty database");
      empty.setColumnNames(colNames);
      empty.close();
    } catch (Exception e) {
      System.out.println("Shouldn't have happened: " + e);
    }

    System.out.println("\n\nCreating correct database:");
    try {
      DBWriter correct = new DBWriter("correct.db");

      correct.setDescription("a correct database");
      correct.setColumnNames(colNames);

      correct.addRow(is1);
      correct.setDescription("a correct database - 2");
      correct.setColumnNames(colNames);
      correct.addRow(is2);
      correct.setDescription("a correct database - 3");
      correct.addRow(is3);

      correct.close();

      correct = new DBWriter("correct.db");

      correct.setColumnNames(colNames);

      correct.addRow(is1);
      correct.setDescription("a correct database - 4");
      correct.setColumnNames(colNames);
      correct.addRow(is2);
      correct.setDescription("a correct database - 5");
      correct.addRow(is3);

      correct.close();
    } catch (Exception e) {
      System.out.println("Shouldn't have happened: " + e);
    }

    System.out.println("\n\nOpening and closing DBWriter:");
    try {
      DBWriter bummer = new DBWriter("bummer.db");
      bummer.close();
    } catch (Exception e) {
      System.out.println("Shouldn't have happened: " + e);
    }
  }
Пример #27
0
  /**
   * Find association rules in a database, given the set of frequent itemsets and a set of
   * restrictions.
   *
   * @param cacheReader the object used to read from the cache
   * @param minSupport the minimum support
   * @param minConfidence the minimum confidence
   * @param inAntecedent the items that must appear in the antecedent of each rule, if null then
   *     this constraint is ignored
   * @param inConsequent the items that must appear in the consequent of each rule, if null then
   *     this constraint is ignored
   * @param ignored the items that should be ignored, if null then this constraint is ignored
   * @param maxAntecedent the maximum number of items that can appear in the antecedent of each
   *     rule, if 0 then this constraint is ignored
   * @param minConsequent the minimum number of items that should appear in the consequent of each
   *     rule, if 0 then this constraint is ignored
   * @return a Vector containing all association rules found
   */
  public Vector findAssociations(
      DBCacheReader cacheReader,
      float minSupport,
      float minConfidence,
      Itemset inAntecedent,
      Itemset inConsequent,
      Itemset ignored,
      int maxAntecedent,
      int minConsequent) {
    min_support = minSupport;
    min_confidence = minConfidence;

    is_in_antecedent = inAntecedent;
    is_in_consequent = inConsequent;
    is_ignored = ignored;
    max_antecedent = maxAntecedent;
    min_consequent = minConsequent;

    // create the vector where we'll put the rules
    rules = new Vector();

    // read from cache supports of frequent itemsets
    initializeSupports(cacheReader);

    // get the frequent itemsets
    Vector frequent = supports.getItemsets();

    if (frequent.size() == 0) return rules;

    // if we need to ignore some items
    if (ignored != null) {
      // remove all frequent itemsets that contain
      // items to be ignored; their subsets that do
      // not contain those items will remain
      for (int i = 0; i < frequent.size(); i++) {
        Itemset is = (Itemset) frequent.get(i);
        if (is.doesIntersect(ignored)) {
          // replace this element with last, delete last,
          // and don't advance index
          frequent.set(i, frequent.lastElement());
          frequent.remove(frequent.size() - 1);
          i--;
        }
      }

      if (frequent.size() == 0) return rules;
    }

    // if we need to have some items in the antecedent or consequent
    if (inAntecedent != null || inConsequent != null) {
      // remove frequent itemsets that don't have the
      // required items
      for (int i = 0; i < frequent.size(); i++) {
        Itemset is = (Itemset) frequent.get(i);
        if (inAntecedent != null && !inAntecedent.isIncludedIn(is)) {
          // replace this element with last, delete last,
          // and don't advance index
          frequent.set(i, frequent.lastElement());
          frequent.remove(frequent.size() - 1);
          i--;
        } else if (inConsequent != null && !inConsequent.isIncludedIn(is)) {
          // replace this element with last, delete last,
          // and don't advance index
          frequent.set(i, frequent.lastElement());
          frequent.remove(frequent.size() - 1);
          i--;
        }
      }

      if (frequent.size() == 0) return rules;
    }

    // generate rules from each frequent itemset
    for (int i = 0; i < frequent.size(); i++) {
      // get a frequent itemset
      Itemset is_frequent = (Itemset) frequent.get(i);

      // skip it if it's too small
      if (is_frequent.size() <= 1 || is_frequent.size() <= minConsequent) continue;

      // get all possible 1 item consequents
      Vector consequents = new Vector(is_frequent.size());
      for (int k = 0; k < is_frequent.size(); k++) {
        int item = is_frequent.getItem(k);
        Itemset is_consequent = new Itemset(1);
        is_consequent.addItem(item);

        // is_consequent now contains a possible consequent
        // verify now that the rule having this consequent
        // satisfies our requirements

        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        if (confidence >= min_confidence) {
          consequents.add(is_consequent);

          // check whether it also satisfies our constraints
          boolean approved = true;

          if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent))
            approved = false;

          if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent))
            approved = false;

          if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent)
            approved = false;

          if (approved && min_consequent > 0 && is_consequent.size() < min_consequent)
            approved = false;

          // if the rule satisifes all requirements then
          // we add it to the rules collection
          if (approved)
            rules.add(
                new AssociationRule(
                    is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        }
      }

      // call the ap-genrules procedure for generating all rules
      // out of this frequent itemset
      ap_genrules_constraint(is_frequent, consequents);
    }

    return rules;
  }
Пример #28
0
  /** It launches the algorithm */
  public void execute() {
    if (somethingWrong) { // We do not execute the program
      System.err.println("An error was found");
      System.err.println("Aborting the program");
      // We should not use the statement: System.exit(-1);
    } else {
      this.proc =
          new AlcalaetalProcess(
              this.trans,
              this.nEvaluations,
              this.popSize,
              this.nBitsGene,
              this.phi,
              this.d,
              this.nFuzzyRegionsForNumericAttributes,
              this.useMaxForOneFrequentItemsets,
              this.minSupport,
              this.minConfidence);
      this.proc.run();
      this.associationRulesSet = this.proc.getRulesSet();
      this.proc.printReport(this.associationRulesSet);

      /*for (int i=0; i < this.associationRulesSet.size(); i++) {
      	System.out.println(this.associationRulesSet.get(i));
      }*/

      try {
        int r, i;
        AssociationRule ar;
        Itemset itemset;

        this.saveFuzzyAttributes(
            this.uniformFuzzyAttributesFilename, this.proc.getUniformFuzzyAttributes());
        this.saveFuzzyAttributes(
            this.adjustedFuzzyAttributesFilename, this.proc.getAdjustedFuzzyAttributes());
        this.saveGeneticLearningLog(
            this.geneticLearningLogFilename, this.proc.getGeneticLearningLog());

        PrintWriter rules_writer = new PrintWriter(this.rulesFilename);
        PrintWriter values_writer = new PrintWriter(this.valuesFilename);

        rules_writer.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
        rules_writer.println("<rules>");

        values_writer.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
        values_writer.print("<values ");
        values_writer.println(
            "n_one_frequent_itemsets=\""
                + this.proc.getNumberOfOneFrequentItemsets()
                + "\" n_rules=\""
                + this.associationRulesSet.size()
                + "\">");

        for (r = 0; r < this.associationRulesSet.size(); r++) {
          ar = this.associationRulesSet.get(r);

          rules_writer.println("<rule id = \"" + r + "\" />");
          values_writer.println(
              "<rule id=\""
                  + r
                  + "\" rule_support=\""
                  + ar.getRuleSupport()
                  + "\" antecedent_support=\""
                  + ar.getAntecedentSupport()
                  + "\" confidence=\""
                  + ar.getConfidence()
                  + "\"/>");
          rules_writer.println("<antecedents>");
          itemset = ar.getAntecedent();

          for (i = 0; i < itemset.size(); i++)
            this.createRule(itemset.get(i), this.proc.getAdjustedFuzzyAttributes(), rules_writer);

          rules_writer.println("</antecedents>");

          rules_writer.println("<consequents>");
          itemset = ar.getConsequent();

          for (i = 0; i < itemset.size(); i++)
            this.createRule(itemset.get(i), this.proc.getAdjustedFuzzyAttributes(), rules_writer);

          rules_writer.println("</consequents>");

          rules_writer.println("</rule>");
        }

        rules_writer.println("</rules>");
        values_writer.println("</values>");

        rules_writer.close();
        values_writer.close();

        System.out.println("\nAlgorithm Finished");
      } catch (FileNotFoundException e) {
        e.printStackTrace();
      }
    }
  }
 /**
  * This method creates a copy of the sequence and add a given item to the last itemset of the
  * sequence. It sets the support of the sequence as the support of the item.
  *
  * @param prefix the sequence
  * @param item the item
  * @return the new sequence
  */
 private SequentialPattern appendItemToPrefixOfSequence(SequentialPattern prefix, String item) {
   SequentialPattern newPrefix = prefix.cloneSequence();
   Itemset itemset = newPrefix.get(newPrefix.size() - 1);
   itemset.addItem(item);
   return newPrefix;
 }
Пример #30
0
  /**
   * Function to add one itemset.
   *
   * @param itemset The itemset to add to the dataset.
   */
  public final void addItemset(Itemset itemset) {
    Itemset newItemset = (Itemset) itemset.copy();

    newItemset.setDataset(this);
    itemsets.addElement(newItemset);
  }