コード例 #1
0
 /**
  * Run the algorithm.
  *
  * @param minSupport Minsup as a percentage (ex: 0.05 = 5 %)
  * @param minConfidence minimum confidence (a value between 0 and 1).
  * @param input the input file path
  * @param output the output file path
  * @param windowSize a window size
  * @throws IOException exception if there is an error reading/writing files
  */
 public void runAlgorithm(
     double minSupport, double minConfidence, String input, String output, int windowSize)
     throws IOException {
   // load the input file into memory
   try {
     this.database = new SequenceDatabase();
     database.loadFile(input);
   } catch (Exception e) {
     e.printStackTrace();
   }
   // convert minimum support to a relative minimum support (integer)
   this.minsuppRelative = (int) Math.ceil(minSupport * database.size());
   // run the algorithm
   runAlgorithm(input, output, minsuppRelative, minConfidence, windowSize);
 }
コード例 #2
0
  /**
   * Run the algorithm.
   *
   * @param relativeMinSupport minsup as a a relative value (integer)
   * @param minConfidence minimum confidence (a value between 0 and 1).
   * @param input the input file path
   * @param output the output file path
   * @param windowSize a window size
   * @throws IOException exception if there is an error reading/writing files
   */
  public void runAlgorithm(
      String input, String output, int relativeMinSupport, double minConfidence, int windowSize)
      throws IOException {
    this.minconf = minConfidence;

    // read the database into memory
    if (database == null) {
      try {
        this.database = new SequenceDatabase();
        database.loadFile(input);
      } catch (Exception e) {
        e.printStackTrace();
      }
    }

    // IMPORTANT : THIS IS A FIX SO THAT THE DEFINITION IS THE SAME AS IN THE ARTICLE!!
    this.windowSize = windowSize + 1;

    // if minsup is 0, set it to 1
    this.minsuppRelative = relativeMinSupport;
    if (this.minsuppRelative == 0) { // protection
      this.minsuppRelative = 1;
    }

    // reset the stats for memory usage
    MemoryLogger.getInstance().reset();
    // prepare the object for writing the output file
    writer = new BufferedWriter(new FileWriter(output));

    // save the start time
    timeStart = System.currentTimeMillis(); // for stats

    // remove infrequent items from the database
    removeItemsThatAreNotFrequent(database);

    // note frequent items in a list "listFrequents"
    List<String> listFrequents = new ArrayList<String>();
    // for each item
    for (Entry<String, Map<Integer, Occurence>> entry : mapItemCount.entrySet()) {
      // if it is frequent
      if (entry.getValue().size() >= minsuppRelative) {
        // add the item to the list
        listFrequents.add(entry.getKey());
      }
    }

    // FOR EACH FREQUENT ITEM WE COMPARE WITH EACH OTHER FREQUENT ITEM TO
    // TRY TO GENERATE A RULE 1-1.
    for (int i = 0; i < listFrequents.size(); i++) {
      String intI = listFrequents.get(i);
      Map<Integer, Occurence> occurencesI = mapItemCount.get(intI);
      for (int j = i + 1; j < listFrequents.size(); j++) {
        String intJ = listFrequents.get(j);
        Map<Integer, Occurence> occurencesJ = mapItemCount.get(intJ);

        // (1) Calculate tidsI, tidsJ, tidsJ-->J  and tidsI->J
        Set<Integer> tidsI = new HashSet<Integer>();
        Set<Integer> tidsJ = null;
        Set<Integer> tidsIJ = new HashSet<Integer>();
        Set<Integer> tidsJI = new HashSet<Integer>();

        // for each occurence of I
        looptid:
        for (Occurence occI : occurencesI.values()) {
          // add the sequenceID to tidsI
          tidsI.add(occI.sequenceID);

          // if J does not appear in that sequence continue loop
          Occurence occJ = occurencesJ.get(occI.sequenceID);
          if (occJ == null) {
            continue looptid;
          }

          // make a big loop to compare if I appears before
          // J in that sequence and
          // if J appears before I
          boolean addedIJ = false;
          boolean addedJI = false;
          // for each occurence of I in that sequence
          loopIJ:
          for (Short posI : occI.occurences) {
            // for each occurence of J in that sequence
            for (Short posJ : occJ.occurences) {
              if (!posI.equals(posJ) && Math.abs(posI - posJ) <= windowSize) {
                if (posI <= posJ) {
                  // if I is before J
                  tidsIJ.add(occI.sequenceID);
                  addedIJ = true;
                } else {
                  // if J is before I
                  tidsJI.add(occI.sequenceID);
                  addedJI = true;
                }
                // if we have found that I is before J and J is before I
                // we don't need to continue.
                if (addedIJ && addedJI) {
                  break loopIJ;
                }
              }
            }
          }
        }
        // END

        // (2) check if the two itemsets have enough common tids
        // if not, we don't need to generate a rule for them.
        // create rule IJ
        if (tidsIJ.size() >= minsuppRelative) {
          // calculate the confidence of I ==> J
          double confIJ = ((double) tidsIJ.size()) / occurencesI.size();

          // create itemset of the rule I ==> J
          String[] itemset1 = new String[] {intI};
          String[] itemset2 = new String[] {intJ};

          // if the confidence is high enough, save the rule
          if (confIJ >= minConfidence) {
            saveRule(tidsIJ, confIJ, itemset1, itemset2);
          }
          // Calculate tidsJ.
          tidsJ = new HashSet<Integer>();
          for (Occurence occJ : occurencesJ.values()) {
            tidsJ.add(occJ.sequenceID);
          }

          // recursive call to try to expand the rule
          expandLeft(itemset1, itemset2, tidsI, tidsIJ);
          expandRight(itemset1, itemset2, tidsI, tidsJ, tidsIJ);
        }

        // create rule JI
        if (tidsJI.size() >= minsuppRelative) {
          double confJI = ((double) tidsJI.size()) / occurencesJ.size();

          // create itemsets for that rule
          String[] itemset1 = new String[] {intI};
          String[] itemset2 = new String[] {intJ};

          // if the rule has enough confidence, save it!
          if (confJI >= minConfidence) {
            saveRule(tidsJI, confJI, itemset2, itemset1);
            //							rules.addRule(ruleJI);
          }

          // Calculate tidsJ.
          if (tidsJ == null) {
            tidsJ = new HashSet<Integer>();
            for (Occurence occJ : occurencesJ.values()) {
              tidsJ.add(occJ.sequenceID);
            }
          }
          // recursive call to try to expand the rule
          expandRight(itemset2, itemset1, tidsJ, tidsI, tidsJI /*, occurencesJ, occurencesI*/);
          expandLeft(itemset2, itemset1, tidsJ, tidsJI /*, occurencesI*/);
        }
      }
    }
    // save the end time for the execution of the algorithm
    timeEnd = System.currentTimeMillis(); // for stats

    // close the file
    writer.close();
    database = null;
  }