/* - This reads the file and hashses the document, which are then stored in our arrayLisrs - we do this before, so we dont have to hash again later ( which is time consuming) */ private static void preliminaryStep(String dir) throws Exception { int start = 0; // start of the sliding window int end = start + window - 1; // ending boundary // prepoccessing step to hash the document, since we dont need to hash the document again for (int i = 0; i < fileList.size(); ++i) { // System.out.println("preliminaryStep " + fileList.get(i)); Path p = Paths.get(dir + fileList.get(i)); // read this file byte[] array = Files.readAllBytes(p); // read the file in bytes // System.out.println(array.length); ArrayList<Long> md5Hashes = new ArrayList<Long>(); // make a new arrayList for this document HashDocument.hashDocument( array, md5Hashes, start, end); // this hashes the entire document using the window and stores itto md5hashes array // add the fileArray and hashedFile to our lists so we can use them later to run the // algorithms // note we hash and read file before, so we don't have to do it again fileArray.add(array); hashed_File_List.add(md5Hashes); } }
/* -- This is a helper method to run the periodic dataset basically */ private static void runPeriodic() throws Exception { System.out.println("Running TDDD Periodic"); // this is alll the directories we will be running int arr[] = {10, 15, 20, 25, 30}; // this is the input number we will be running on // this is the base of the two files // these two are directories, we will concanate with the numbers to get the full dir name String base_old_file = "../../thesis-datasets/input_"; String base_new_file = "../../thesis-datasets/periodic_"; int total_iter_count = 0; // this is used check how many times we will iterate through the data so we can make an // array of that size for (int i = startBoundary; i <= endBoundary; i += increment) total_iter_count++; for (int dir_num : arr) { // set up our directories String old_file_dir = base_old_file + dir_num + "/"; String new_file_dir = base_new_file + dir_num + "/"; System.out.println(old_file_dir); // read all the files in these two directories in sorted order ArrayList<String> old_file_list = new ArrayList<String>(); ArrayList<String> new_file_list = new ArrayList<String>(); ReadFile.readFile(old_file_dir, old_file_list); ReadFile.readFile(new_file_dir, new_file_list); // used to store all the runnings for the periodic data double[] block_size_list = new double[total_iter_count]; double[] ratio_size_list = new double[total_iter_count]; int totalRuns = 0; for (int i = 0; i < old_file_list.size(); ++i) { // System.out.println(old_file_list.get(i) + " " + new_file_list.get(i)); String[] s1 = old_file_list.get(i).split("_"); String[] s2 = new_file_list.get(i).split("_"); // input file should corrospond to the output file if (!s1[1].equals(s2[1]) || !s1[2].equals(s2[2])) System.out.println("We got a huge problem"); // basically same code as in the prelinaryStep method, but we need to modify it for // perdiodic files int start = 0; // start of the sliding window int end = start + window - 1; // ending boundary // we cant call preliminary function, so hash the two files individually // System.out.println("preliminaryStep " + fileList.get(i)); Path p = Paths.get( old_file_dir + old_file_list.get( i)); // read the old file ( the one which we will be using as the base // comparason) Path p2 = Paths.get( new_file_dir + new_file_list.get( i)); // read the old file ( the one which we will be using as the base // comparason) byte[] old_file = Files.readAllBytes(p); // read the file in bytes byte[] new_file = Files.readAllBytes(p2); // System.out.println(array.length); ArrayList<Long> old_file_hashes = new ArrayList<Long>(); // make a new arrayList for this document ArrayList<Long> new_file_hashes = new ArrayList<Long>(); // make a new arrayList for this document HashDocument.hashDocument( old_file, old_file_hashes, start, end); // this hashes the entire document using the window and stores itto md5hashes // array HashDocument.hashDocument( new_file, new_file_hashes, start, end); // this hashes the entire document using the window and stores itto md5hashes // array // now call the startCdc method totalSize = new_file.length; // this is the length of the file startCDC( block_size_list, ratio_size_list, new_file, old_file, new_file_hashes, old_file_hashes); if (totalRuns % 10 == 0) System.out.println(totalRuns); totalRuns++; } // now output the results System.out.println("File dir = " + dir_num + " totalRuns = " + totalRuns); int index = 0; for (int i = startBoundary; i <= endBoundary; i += increment) { // avg out the outputs double blockSize = block_size_list[index] / (double) totalRuns; double ratio = ratio_size_list[index] / (double) totalRuns; System.out.println(i + " " + blockSize + " " + ratio); index++; } // now each index matches the corrosponding file } } // end of methid