private static void startCDC() throws IOException, Exception { long remainder = 7; for (int i = startBoundary; i <= endBoundary; i += increment) { long minBoundary = min_multiplier * i; // we will set the mod value as the minimum boundary long maxBoundary = max_multiplier * i; // we will set this as the maximum boundary long divisor1 = i; // this will be used to mod the results long divisor2 = i / 2 + 1; // the backup divisor is half the original divisor long divisor3 = i / 4 + 1; totalSize = fileArray.get(1) .length; // note we only care about the size of the second file since that's the file // we are measuring System.out.print(divisor1 + " " + divisor2 + " " + divisor3 + " "); runBytes( window, divisor1, divisor2, divisor3, remainder, minBoundary, maxBoundary); // run the karb rabin algorithm // this is the block size per boundary double blockSize = (double) totalSize / (double) numOfPieces; double ratio = (double) coverage / (double) totalSize; System.out.println(blockSize + " " + ratio); // clear the hashTable, and counters so we can reset the values for the next round of // boundaries coverage = 0; numOfPieces = 0; table.clear(); HashClass.duplicate_counter = 0; HashClass.max_list_length = 0; } }
/* - Overloaded method just for the internet archive dataset - The first two params hold the block size and ratioSize respectively (for all the runnings) - The last set of params are the actual file in byte and the hashed versions of the file we will be running the code against - current_ -- are the lists that contain the most recent file version - previous_ -- are the listrs that contain the previous versions */ private static void startCDC( double[] block_size_list, double[] ratio_size_list, byte[] current_array, byte[] previous_array, ArrayList<Long> current_md5Hashes, ArrayList<Long> previous_md5Hashes) throws Exception { long remainder = 7; // this is the remainder that we will be comparing with int index = 0; // used to traverse the two lists for (int i = startBoundary; i <= endBoundary; i += increment) { long minBoundary = min_multiplier * i; // we will set the mod value as the minimum boundary long maxBoundary = max_multiplier * i; // we will set this as the maximum boundary long divisor1 = i; // this will be used to mod the results long divisor2 = i / 2 + 1; // the backup divisor is half the original divisor long divisor3 = i / 4 + 1; // System.out.print( i+" "); storeChunks( previous_array, previous_md5Hashes, divisor1, divisor2, divisor3, remainder, minBoundary, maxBoundary); // cut up the first file and store it runTddd( current_array, current_md5Hashes, divisor1, divisor2, divisor3, remainder, minBoundary, maxBoundary); // call the method again, but on the second file only // this is the block size per boundary double blockSize = (double) totalSize / (double) numOfPieces; double ratio = (double) coverage / (double) totalSize; // extra step, add the data back into the list block_size_list[index] += blockSize; ratio_size_list[index] += ratio; ++index; // clear the hashTable, and counters so we can reset the values for the next round of // boundaries coverage = 0; numOfPieces = 0; table.clear(); HashClass.duplicate_counter = 0; HashClass.max_list_length = 0; } }