Пример #1
0
  /**
   * This method constructs the utility list of pXY
   *
   * @param P : the utility list of prefix P.
   * @param px : the utility list of pX
   * @param py : the utility list of pY
   * @return the utility list of pXY
   */
  private UtilityList construct(UtilityList P, UtilityList px, UtilityList py, int minUtility) {
    // create an empy utility list for pXY
    UtilityList pxyUL = new UtilityList(py.item);

    // == new optimization - LA-prune  == /
    // Initialize the sum of total utility
    long totalUtility = px.sumIutils + px.sumRutils;
    // ================================================

    // for each element in the utility list of pX
    for (Element ex : px.elements) {
      // do a binary search to find element ey in py with tid = ex.tid
      Element ey = findElementWithTID(py, ex.tid);
      if (ey == null) {
        // == new optimization - LA-prune == /
        if (ENABLE_LA_PRUNE) {
          totalUtility -= (ex.iutils + ex.rutils);
          if (totalUtility < minUtility) {
            return null;
          }
        }
        // =============================================== /
        continue;
      }
      // if the prefix p is null
      if (P == null) {
        // Create the new element
        Element eXY = new Element(ex.tid, ex.iutils + ey.iutils, ey.rutils);
        // add the new element to the utility list of pXY
        pxyUL.addElement(eXY);

      } else {
        // find the element in the utility list of p wih the same tid
        Element e = findElementWithTID(P, ex.tid);
        if (e != null) {
          // Create new element
          Element eXY = new Element(ex.tid, ex.iutils + ey.iutils - e.iutils, ey.rutils);
          // add the new element to the utility list of pXY
          pxyUL.addElement(eXY);
        }
      }
    }
    // return the utility list of pXY.
    return pxyUL;
  }
Пример #2
0
  /**
   * Run the algorithm
   *
   * @param input the input file path
   * @param output the output file path
   * @param minUtility the minimum utility threshold
   * @throws IOException exception if error while writing the file
   */
  public void runAlgorithm(String input, String output, int minUtility) throws IOException {
    // reset maximum
    MemoryLogger.getInstance().reset();

    // initialize the buffer for storing the current itemset
    itemsetBuffer = new int[BUFFERS_SIZE];

    mapFMAP = new HashMap<Integer, Map<Integer, Long>>();

    startTimestamp = System.currentTimeMillis();

    writer = new BufferedWriter(new FileWriter(output));

    //  We create a  map to store the TWU of each item
    mapItemToTWU = new HashMap<Integer, Long>();

    // We scan the database a first time to calculate the TWU of each item.
    BufferedReader myInput = null;
    String thisLine;
    try {
      // prepare the object for reading the file
      myInput = new BufferedReader(new InputStreamReader(new FileInputStream(new File(input))));
      // for each line (transaction) until the end of file
      while ((thisLine = myInput.readLine()) != null) {
        // if the line is  a comment, is  empty or is a
        // kind of metadata
        if (thisLine.isEmpty() == true
            || thisLine.charAt(0) == '#'
            || thisLine.charAt(0) == '%'
            || thisLine.charAt(0) == '@') {
          continue;
        }

        // split the transaction according to the : separator
        String split[] = thisLine.split(":");
        // the first part is the list of items
        String items[] = split[0].split(" ");
        // the second part is the transaction utility
        int transactionUtility = Integer.parseInt(split[1]);
        // for each item, we add the transaction utility to its TWU
        for (int i = 0; i < items.length; i++) {
          // convert item to integer
          Integer item = Integer.parseInt(items[i]);
          // get the current TWU of that item
          Long twu = mapItemToTWU.get(item);
          // add the utility of the item in the current transaction to its twu
          twu = (twu == null) ? transactionUtility : twu + transactionUtility;
          mapItemToTWU.put(item, twu);
        }
      }
    } catch (Exception e) {
      // catches exception if error while reading the input file
      e.printStackTrace();
    } finally {
      if (myInput != null) {
        myInput.close();
      }
    }

    // CREATE A LIST TO STORE THE UTILITY LIST OF ITEMS WITH TWU  >= MIN_UTILITY.
    List<UtilityList> listOfUtilityLists = new ArrayList<UtilityList>();
    // CREATE A MAP TO STORE THE UTILITY LIST FOR EACH ITEM.
    // Key : item    Value :  utility list associated to that item
    Map<Integer, UtilityList> mapItemToUtilityList = new HashMap<Integer, UtilityList>();

    // For each item
    for (Integer item : mapItemToTWU.keySet()) {
      // if the item is promising  (TWU >= minutility)
      if (mapItemToTWU.get(item) >= minUtility) {
        // create an empty Utility List that we will fill later.
        UtilityList uList = new UtilityList(item);
        mapItemToUtilityList.put(item, uList);
        // add the item to the list of high TWU items
        listOfUtilityLists.add(uList);
      }
    }
    // SORT THE LIST OF HIGH TWU ITEMS IN ASCENDING ORDER
    Collections.sort(
        listOfUtilityLists,
        new Comparator<UtilityList>() {
          public int compare(UtilityList o1, UtilityList o2) {
            // compare the TWU of the items
            return compareItems(o1.item, o2.item);
          }
        });

    // SECOND DATABASE PASS TO CONSTRUCT THE UTILITY LISTS
    // OF 1-ITEMSETS  HAVING TWU  >= minutil (promising items)
    try {
      // prepare object for reading the file
      myInput = new BufferedReader(new InputStreamReader(new FileInputStream(new File(input))));
      // variable to count the number of transaction
      int tid = 0;
      // for each line (transaction) until the end of file
      while ((thisLine = myInput.readLine()) != null) {
        // if the line is  a comment, is  empty or is a
        // kind of metadata
        if (thisLine.isEmpty() == true
            || thisLine.charAt(0) == '#'
            || thisLine.charAt(0) == '%'
            || thisLine.charAt(0) == '@') {
          continue;
        }

        // split the line according to the separator
        String split[] = thisLine.split(":");
        // get the list of items
        String items[] = split[0].split(" ");
        // get the list of utility values corresponding to each item
        // for that transaction
        String utilityValues[] = split[2].split(" ");

        // Copy the transaction into lists but
        // without items with TWU < minutility

        int remainingUtility = 0;

        long newTWU = 0; // NEW OPTIMIZATION

        // Create a list to store items
        List<Pair> revisedTransaction = new ArrayList<Pair>();
        // for each item
        for (int i = 0; i < items.length; i++) {
          /// convert values to integers
          Pair pair = new Pair();
          pair.item = Integer.parseInt(items[i]);
          pair.utility = Integer.parseInt(utilityValues[i]);
          // if the item has enough utility
          if (mapItemToTWU.get(pair.item) >= minUtility) {
            // add it
            revisedTransaction.add(pair);
            remainingUtility += pair.utility;
            newTWU += pair.utility; // NEW OPTIMIZATION
          }
        }

        // sort the transaction
        Collections.sort(
            revisedTransaction,
            new Comparator<Pair>() {
              public int compare(Pair o1, Pair o2) {
                return compareItems(o1.item, o2.item);
              }
            });

        // for each item left in the transaction
        for (int i = 0; i < revisedTransaction.size(); i++) {
          Pair pair = revisedTransaction.get(i);

          //					int remain = remainingUtility; // FOR OPTIMIZATION

          // subtract the utility of this item from the remaining utility
          remainingUtility = remainingUtility - pair.utility;

          // get the utility list of this item
          UtilityList utilityListOfItem = mapItemToUtilityList.get(pair.item);

          // Add a new Element to the utility list of this item corresponding to this transaction
          Element element = new Element(tid, pair.utility, remainingUtility);

          utilityListOfItem.addElement(element);

          // BEGIN NEW OPTIMIZATION for FHM
          Map<Integer, Long> mapFMAPItem = mapFMAP.get(pair.item);
          if (mapFMAPItem == null) {
            mapFMAPItem = new HashMap<Integer, Long>();
            mapFMAP.put(pair.item, mapFMAPItem);
          }

          for (int j = i + 1; j < revisedTransaction.size(); j++) {
            Pair pairAfter = revisedTransaction.get(j);
            Long twuSum = mapFMAPItem.get(pairAfter.item);
            if (twuSum == null) {
              mapFMAPItem.put(pairAfter.item, newTWU);
            } else {
              mapFMAPItem.put(pairAfter.item, twuSum + newTWU);
            }
          }
          // END OPTIMIZATION of FHM
        }
        tid++; // increase tid number for next transaction
      }
    } catch (Exception e) {
      // to catch error while reading the input file
      e.printStackTrace();
    } finally {
      if (myInput != null) {
        myInput.close();
      }
    }

    // check the memory usage
    MemoryLogger.getInstance().checkMemory();

    // Mine the database recursively
    fhm(itemsetBuffer, 0, null, listOfUtilityLists, minUtility);

    // check the memory usage again and close the file.
    MemoryLogger.getInstance().checkMemory();
    // close output file
    writer.close();
    // record end time
    endTimestamp = System.currentTimeMillis();
  }