/** * This method constructs the utility list of pXY * * @param P : the utility list of prefix P. * @param px : the utility list of pX * @param py : the utility list of pY * @return the utility list of pXY */ private UtilityList construct(UtilityList P, UtilityList px, UtilityList py, int minUtility) { // create an empy utility list for pXY UtilityList pxyUL = new UtilityList(py.item); // == new optimization - LA-prune == / // Initialize the sum of total utility long totalUtility = px.sumIutils + px.sumRutils; // ================================================ // for each element in the utility list of pX for (Element ex : px.elements) { // do a binary search to find element ey in py with tid = ex.tid Element ey = findElementWithTID(py, ex.tid); if (ey == null) { // == new optimization - LA-prune == / if (ENABLE_LA_PRUNE) { totalUtility -= (ex.iutils + ex.rutils); if (totalUtility < minUtility) { return null; } } // =============================================== / continue; } // if the prefix p is null if (P == null) { // Create the new element Element eXY = new Element(ex.tid, ex.iutils + ey.iutils, ey.rutils); // add the new element to the utility list of pXY pxyUL.addElement(eXY); } else { // find the element in the utility list of p wih the same tid Element e = findElementWithTID(P, ex.tid); if (e != null) { // Create new element Element eXY = new Element(ex.tid, ex.iutils + ey.iutils - e.iutils, ey.rutils); // add the new element to the utility list of pXY pxyUL.addElement(eXY); } } } // return the utility list of pXY. return pxyUL; }
/** * Run the algorithm * * @param input the input file path * @param output the output file path * @param minUtility the minimum utility threshold * @throws IOException exception if error while writing the file */ public void runAlgorithm(String input, String output, int minUtility) throws IOException { // reset maximum MemoryLogger.getInstance().reset(); // initialize the buffer for storing the current itemset itemsetBuffer = new int[BUFFERS_SIZE]; mapFMAP = new HashMap<Integer, Map<Integer, Long>>(); startTimestamp = System.currentTimeMillis(); writer = new BufferedWriter(new FileWriter(output)); // We create a map to store the TWU of each item mapItemToTWU = new HashMap<Integer, Long>(); // We scan the database a first time to calculate the TWU of each item. BufferedReader myInput = null; String thisLine; try { // prepare the object for reading the file myInput = new BufferedReader(new InputStreamReader(new FileInputStream(new File(input)))); // for each line (transaction) until the end of file while ((thisLine = myInput.readLine()) != null) { // if the line is a comment, is empty or is a // kind of metadata if (thisLine.isEmpty() == true || thisLine.charAt(0) == '#' || thisLine.charAt(0) == '%' || thisLine.charAt(0) == '@') { continue; } // split the transaction according to the : separator String split[] = thisLine.split(":"); // the first part is the list of items String items[] = split[0].split(" "); // the second part is the transaction utility int transactionUtility = Integer.parseInt(split[1]); // for each item, we add the transaction utility to its TWU for (int i = 0; i < items.length; i++) { // convert item to integer Integer item = Integer.parseInt(items[i]); // get the current TWU of that item Long twu = mapItemToTWU.get(item); // add the utility of the item in the current transaction to its twu twu = (twu == null) ? transactionUtility : twu + transactionUtility; mapItemToTWU.put(item, twu); } } } catch (Exception e) { // catches exception if error while reading the input file e.printStackTrace(); } finally { if (myInput != null) { myInput.close(); } } // CREATE A LIST TO STORE THE UTILITY LIST OF ITEMS WITH TWU >= MIN_UTILITY. List<UtilityList> listOfUtilityLists = new ArrayList<UtilityList>(); // CREATE A MAP TO STORE THE UTILITY LIST FOR EACH ITEM. // Key : item Value : utility list associated to that item Map<Integer, UtilityList> mapItemToUtilityList = new HashMap<Integer, UtilityList>(); // For each item for (Integer item : mapItemToTWU.keySet()) { // if the item is promising (TWU >= minutility) if (mapItemToTWU.get(item) >= minUtility) { // create an empty Utility List that we will fill later. UtilityList uList = new UtilityList(item); mapItemToUtilityList.put(item, uList); // add the item to the list of high TWU items listOfUtilityLists.add(uList); } } // SORT THE LIST OF HIGH TWU ITEMS IN ASCENDING ORDER Collections.sort( listOfUtilityLists, new Comparator<UtilityList>() { public int compare(UtilityList o1, UtilityList o2) { // compare the TWU of the items return compareItems(o1.item, o2.item); } }); // SECOND DATABASE PASS TO CONSTRUCT THE UTILITY LISTS // OF 1-ITEMSETS HAVING TWU >= minutil (promising items) try { // prepare object for reading the file myInput = new BufferedReader(new InputStreamReader(new FileInputStream(new File(input)))); // variable to count the number of transaction int tid = 0; // for each line (transaction) until the end of file while ((thisLine = myInput.readLine()) != null) { // if the line is a comment, is empty or is a // kind of metadata if (thisLine.isEmpty() == true || thisLine.charAt(0) == '#' || thisLine.charAt(0) == '%' || thisLine.charAt(0) == '@') { continue; } // split the line according to the separator String split[] = thisLine.split(":"); // get the list of items String items[] = split[0].split(" "); // get the list of utility values corresponding to each item // for that transaction String utilityValues[] = split[2].split(" "); // Copy the transaction into lists but // without items with TWU < minutility int remainingUtility = 0; long newTWU = 0; // NEW OPTIMIZATION // Create a list to store items List<Pair> revisedTransaction = new ArrayList<Pair>(); // for each item for (int i = 0; i < items.length; i++) { /// convert values to integers Pair pair = new Pair(); pair.item = Integer.parseInt(items[i]); pair.utility = Integer.parseInt(utilityValues[i]); // if the item has enough utility if (mapItemToTWU.get(pair.item) >= minUtility) { // add it revisedTransaction.add(pair); remainingUtility += pair.utility; newTWU += pair.utility; // NEW OPTIMIZATION } } // sort the transaction Collections.sort( revisedTransaction, new Comparator<Pair>() { public int compare(Pair o1, Pair o2) { return compareItems(o1.item, o2.item); } }); // for each item left in the transaction for (int i = 0; i < revisedTransaction.size(); i++) { Pair pair = revisedTransaction.get(i); // int remain = remainingUtility; // FOR OPTIMIZATION // subtract the utility of this item from the remaining utility remainingUtility = remainingUtility - pair.utility; // get the utility list of this item UtilityList utilityListOfItem = mapItemToUtilityList.get(pair.item); // Add a new Element to the utility list of this item corresponding to this transaction Element element = new Element(tid, pair.utility, remainingUtility); utilityListOfItem.addElement(element); // BEGIN NEW OPTIMIZATION for FHM Map<Integer, Long> mapFMAPItem = mapFMAP.get(pair.item); if (mapFMAPItem == null) { mapFMAPItem = new HashMap<Integer, Long>(); mapFMAP.put(pair.item, mapFMAPItem); } for (int j = i + 1; j < revisedTransaction.size(); j++) { Pair pairAfter = revisedTransaction.get(j); Long twuSum = mapFMAPItem.get(pairAfter.item); if (twuSum == null) { mapFMAPItem.put(pairAfter.item, newTWU); } else { mapFMAPItem.put(pairAfter.item, twuSum + newTWU); } } // END OPTIMIZATION of FHM } tid++; // increase tid number for next transaction } } catch (Exception e) { // to catch error while reading the input file e.printStackTrace(); } finally { if (myInput != null) { myInput.close(); } } // check the memory usage MemoryLogger.getInstance().checkMemory(); // Mine the database recursively fhm(itemsetBuffer, 0, null, listOfUtilityLists, minUtility); // check the memory usage again and close the file. MemoryLogger.getInstance().checkMemory(); // close output file writer.close(); // record end time endTimestamp = System.currentTimeMillis(); }