/** * Expands list of files to process into full list of all files that can be found by recursively * descending directories. */ void expand(File dir, String[] files, boolean isUpdate) { if (files == null) { return; } for (int i = 0; i < files.length; i++) { File f; if (dir == null) { f = new File(files[i]); } else { f = new File(dir, files[i]); } if (f.isFile()) { if (entries.add(f)) { if (isUpdate) entryMap.put(entryName(f.getPath()), f); } } else if (f.isDirectory()) { if (entries.add(f)) { if (isUpdate) { String dirPath = f.getPath(); dirPath = (dirPath.endsWith(File.separator)) ? dirPath : (dirPath + File.separator); entryMap.put(entryName(dirPath), f); } expand(f, f.list(), isUpdate); } } else { error(formatMsg("error.nosuch.fileordir", String.valueOf(f))); ok = false; } } }
/* - This method is used has a helper method to run the algo for the archive dataset - Note the archive set has multiple directories ( one for each url ) - So Read all of the directories in first and for each directory run the code */ private static void runArchiveSet() throws Exception { System.out.println("Running TDDD archive"); directory = "../../thesis-datasets/datasets2/"; File file = new File(directory); String[] directory_list = file.list( new FilenameFilter() { @Override public boolean accept(File current, String name) { return new File(current, name).isDirectory(); // make sure its a directory } }); int totalRuns = 0; // used to avg the runs in the end int total_iter_count = 0; // this is used check how many times we will iterate through the data so we can make an // array of that size for (int i = startBoundary; i <= endBoundary; i += increment) total_iter_count++; // System.out.println(Arrays.toString(directory_list)); int sets = 0; // make the arrays to hold the respecitve info for the different verions\ // run it simulateounsly to speed the from the program! double[] block_size_list_last_year = new double[total_iter_count]; double[] ratio_size_list_last_year = new double[total_iter_count]; double[] block_size_list_six_month = new double[total_iter_count]; double[] ratio_size_list__six_month = new double[total_iter_count]; double[] block_size_list_two_year = new double[total_iter_count]; double[] ratio_size_list_two_year = new double[total_iter_count]; int current = 0; int six_month = 2; int last_year = 1; int two_year = 3; // loop through and run the cdc for each directory for (String dir : directory_list) { ReadFile.readFile(directory + dir, fileList); // read all the files in this directory preliminaryStep(directory + dir + "/"); // call the preliminaryStep on all the files totalRuns++; totalSize = fileArray.get(current) .length; // get the length of the file we will be running it against! // run it against six month startCDC( block_size_list_six_month, ratio_size_list__six_month, fileArray.get(current), fileArray.get(six_month), hashed_File_List.get(current), hashed_File_List.get(six_month)); // run it against last year startCDC( block_size_list_last_year, ratio_size_list_last_year, fileArray.get(current), fileArray.get(last_year), hashed_File_List.get(current), hashed_File_List.get(last_year)); // run it against 2 startCDC( block_size_list_two_year, ratio_size_list_two_year, fileArray.get(current), fileArray.get(two_year), hashed_File_List.get(current), hashed_File_List.get(two_year)); // // clear the fileList and hashed_file_list array fileArray.clear(); hashed_File_List.clear(); fileList.clear(); // if (Double.isNaN(ratio_size_list[0])){ // System.out.println(sets+" "+Arrays.toString(ratio_size_list)); // test = true; // break; // } if (sets % 200 == 0) System.out.println(sets); ++sets; } // end of directory list for loop // now output the avged value for all the runs // System.out.println(Arrays.toString(ratio_size_list)); System.out.println("Printing six_month"); int index = 0; for (int i = startBoundary; i <= endBoundary; i += increment) { // avg out the outputs double blockSize = block_size_list_six_month[index] / (double) totalRuns; double ratio = ratio_size_list__six_month[index] / (double) totalRuns; System.out.println(i + " " + i / 2 + 1 + " " + i / 4 + 1 + " " + blockSize + " " + ratio); index++; } System.out.println("Printing last year"); index = 0; for (int i = startBoundary; i <= endBoundary; i += increment) { double blockSize = block_size_list_last_year[index] / (double) totalRuns; double ratio = ratio_size_list_last_year[index] / (double) totalRuns; System.out.println(i + " " + blockSize + " " + ratio); index++; } System.out.println("Printing two year"); index = 0; for (int i = startBoundary; i <= endBoundary; i += increment) { double blockSize = block_size_list_two_year[index] / (double) totalRuns; double ratio = ratio_size_list_two_year[index] / (double) totalRuns; System.out.println(i + " " + blockSize + " " + ratio); index++; } }