Ejemplo n.º 1
1
  /**
   * Joins array elements to string.
   *
   * @param arr Array.
   * @return String.
   */
  @Nullable
  public static String compactArray(Object[] arr) {
    if (arr == null || arr.length == 0) return null;

    String sep = ", ";

    StringBuilder sb = new StringBuilder();

    for (Object s : arr) sb.append(s).append(sep);

    if (sb.length() > 0) sb.setLength(sb.length() - sep.length());

    return U.compact(sb.toString());
  }
Ejemplo n.º 2
0
  /* -------------------------------------------------------------------------------------------------------
  This method:

  	This method:
  	--	Takes in three paramters:
  		1. array - this is the byte array that actually holds the document contents
  		2. md5Hases - holds the entire hash values of the document
  		3. Divisor1/Divisor2/divisor3... - main and back up divisors
  		5. The remainder we are looking for
  		6/7. min/max boundaries

  	-- We will start running the karb rabin algorithm
  	-- We will find the boundaries using mod values and once they equal the mod value we have stored
  	-- we also have the divsor2/3 .. which are backup divisors. If we don't find a boundary by the divisor1 once we hit the maxBoundary
  	-- we will see if we have one with divisor2, if not, then we will see if we have one with divisor3 and so on
  	-- We will hash everything in that hash boundary and store it
  -------------------------------------------------------------------------------------------------------- */
  private static void runTddd(
      byte[] array,
      ArrayList<Long> md5Hashes,
      long divisor1,
      long divisor2,
      long divisor3,
      long remainder,
      long minBoundary,
      long maxBoundary) {
    int documentStart = 0; // used to keep track of where the boundaries are
    boolean match = false; // used to ck if we encountered a match
    int backUpBreakPoint = -1; // used to store the backup breakpoint
    int secondBackUpBreakPoint = -1; // used with the divisor3
    StringBuilder builder = new StringBuilder();
    int i = documentStart + (int) minBoundary - 1; // so we start at the minimum
    // loop through all the values in the document
    for (; i < md5Hashes.size() - 1; ++i) {
      // if ((i - documentStart + 1) < minBoundary) //  if the size of this boundary is less than
      // the min, continue looping
      // 	continue;
      /*-----------------------------------------------------------------
      	- If the mod of this equals the modvalue we defined, then
      	- this is a boundary
      ------------------------------------------------------------------*/

      if ((md5Hashes.get(i - 1) + md5Hashes.get(i) + md5Hashes.get(i + 1)) % divisor1
          == remainder) // ck if this equals the mod value
      {
        // Hash all the values in the range (documentStart,current(i))
        // Remember we only want to hash the original VALUES from the array that contains the
        // original
        // content of the file. Not the hash values in the md5Hash Array
        for (int j = documentStart; j <= i; ++j) {
          builder.append(array[j]); // store everything upto the current value
        }
        String original = builder.toString();
        // if the string is a perfect match ( hash and original string)
        if (HashClass.is_string_match(original, table)) // iinsert the hash in the table)
        coverage += i - documentStart + 1; // this is the amount of bytes we saved
        documentStart = i + 1; // set this as the beginning of the new boundary
        backUpBreakPoint = -1; // reset this
        secondBackUpBreakPoint = -1;
        numOfPieces++; // increment the num of pieces
        i = i + (int) minBoundary - 1; // skip all the way here
        builder.setLength(0); // reset the stringBuilder for the next round
      } else if ((md5Hashes.get(i - 1) + md5Hashes.get(i) + md5Hashes.get(i + 1)) % divisor2
          == remainder) { //  check if this is the backup point
        backUpBreakPoint = i; // this is the backup breakpoint
      } else if ((md5Hashes.get(i - 1) + md5Hashes.get(i) + md5Hashes.get(i + 1)) % divisor2
          == remainder) {
        secondBackUpBreakPoint = i; // set the second backup point
      }
      if ((i - documentStart + 1) >= maxBoundary) { // we have reached the maximum
        // ck if we have a backUpbreakpoint
        int point;
        if (backUpBreakPoint != -1) // if we do, set this as the boundary
        point = backUpBreakPoint;
        else if (secondBackUpBreakPoint != -1) {
          point = secondBackUpBreakPoint; // if we don't have a first break point, find the second
        } else point = i; // else this current value of i is the breakpoint

        // Hash all the values in the range (documentStart,current(i))
        // Remember we only want to hash the original VALUES from the array that contains the
        // original
        // content of the file. Not the hash values in the md5Hash Array
        for (int j = documentStart; j <= point; ++j) {
          builder.append(array[j]); // store everything upto the current value
        }
        String original = builder.toString();
        if (HashClass.is_string_match(original, table))
          coverage += point - documentStart + 1; // this is the amount of bytes we saved
        numOfPieces++; // increment the num of pieces
        documentStart = point + 1; // set this as the beginning of the new boundary
        backUpBreakPoint = -1; // reset this
        secondBackUpBreakPoint = -1; // reset the secondBackUp point
        i = point + (int) minBoundary - 1; // skip all the way here ;
        builder.setLength(0); // reset the stringBuilder for the next round
      }
    } // end of the for loop

    // -------------------------------------------------------------------------------------------
    //  we are missing the last boundary, so hash that last value
    //	We will also check against our values of the strings we already have, and if we encountered
    // this
    //	already, then we will simply increment the counter, otherwise we will insert it in the
    // hashtable
    //	and increase our miss counter
    // ----------------------------------------------------------------------------------------------
    for (int j = documentStart; j < array.length; ++j) {
      builder.append(array[j]);
    }
    // only compute hash and insert into our hashtable only if the string buffer isn't empty
    String original = builder.toString();
    if (HashClass.is_string_match(original, table))
      coverage += array.length - documentStart; // this is the amount of bytes we saved
    numOfPieces++; // increment the num of pieces
  } // end of the method