Example #1
0
  private static void startCDC() throws IOException, Exception {
    long remainder = 7;
    for (int i = startBoundary; i <= endBoundary; i += increment) {
      long minBoundary = min_multiplier * i; // we will set the mod value as the minimum boundary
      long maxBoundary = max_multiplier * i; // we will set this as the maximum boundary
      long divisor1 = i; // this will be used to mod the results
      long divisor2 = i / 2 + 1; // the backup divisor is half the original divisor
      long divisor3 = i / 4 + 1;
      totalSize =
          fileArray.get(1)
              .length; // note we only care about the size of the second file since that's the file
                       // we are measuring
      System.out.print(divisor1 + " " + divisor2 + " " + divisor3 + " ");
      runBytes(
          window,
          divisor1,
          divisor2,
          divisor3,
          remainder,
          minBoundary,
          maxBoundary); // run the karb rabin algorithm
      // this is the block size per boundary
      double blockSize = (double) totalSize / (double) numOfPieces;
      double ratio = (double) coverage / (double) totalSize;
      System.out.println(blockSize + " " + ratio);

      // clear the hashTable, and counters so we can reset the values for the next round of
      // boundaries
      coverage = 0;
      numOfPieces = 0;
      table.clear();
      HashClass.duplicate_counter = 0;
      HashClass.max_list_length = 0;
    }
  }
Example #2
0
  /*
  	- Overloaded method just for the internet archive dataset
  	- The first two params hold the block size and ratioSize respectively (for all the runnings)
  	- The last set of params are the actual file in byte and the hashed versions of the file we will be running the code against
  	- current_ -- are the lists that contain the most recent file version
  	- previous_ -- are the listrs that contain the previous versions
  */
  private static void startCDC(
      double[] block_size_list,
      double[] ratio_size_list,
      byte[] current_array,
      byte[] previous_array,
      ArrayList<Long> current_md5Hashes,
      ArrayList<Long> previous_md5Hashes)
      throws Exception {
    long remainder = 7; // this is the remainder that we will be comparing with
    int index = 0; // used to traverse the two lists
    for (int i = startBoundary; i <= endBoundary; i += increment) {
      long minBoundary = min_multiplier * i; // we will set the mod value as the minimum boundary
      long maxBoundary = max_multiplier * i; // we will set this as the maximum boundary
      long divisor1 = i; // this will be used to mod the results
      long divisor2 = i / 2 + 1; // the backup divisor is half the original divisor
      long divisor3 = i / 4 + 1;
      // System.out.print( i+" ");
      storeChunks(
          previous_array,
          previous_md5Hashes,
          divisor1,
          divisor2,
          divisor3,
          remainder,
          minBoundary,
          maxBoundary); // cut up the first file and store it
      runTddd(
          current_array,
          current_md5Hashes,
          divisor1,
          divisor2,
          divisor3,
          remainder,
          minBoundary,
          maxBoundary); // call the method again, but on the second file only
      // this is the block size per boundary
      double blockSize = (double) totalSize / (double) numOfPieces;
      double ratio = (double) coverage / (double) totalSize;

      // extra step, add the data back into the list
      block_size_list[index] += blockSize;
      ratio_size_list[index] += ratio;
      ++index;
      // clear the hashTable, and counters so we can reset the values for the next round of
      // boundaries
      coverage = 0;
      numOfPieces = 0;
      table.clear();
      HashClass.duplicate_counter = 0;
      HashClass.max_list_length = 0;
    }
  }