/** * Joins array elements to string. * * @param arr Array. * @return String. */ @Nullable public static String compactArray(Object[] arr) { if (arr == null || arr.length == 0) return null; String sep = ", "; StringBuilder sb = new StringBuilder(); for (Object s : arr) sb.append(s).append(sep); if (sb.length() > 0) sb.setLength(sb.length() - sep.length()); return U.compact(sb.toString()); }
/* ------------------------------------------------------------------------------------------------------- This method: This method: -- Takes in three paramters: 1. array - this is the byte array that actually holds the document contents 2. md5Hases - holds the entire hash values of the document 3. Divisor1/Divisor2/divisor3... - main and back up divisors 5. The remainder we are looking for 6/7. min/max boundaries -- We will start running the karb rabin algorithm -- We will find the boundaries using mod values and once they equal the mod value we have stored -- we also have the divsor2/3 .. which are backup divisors. If we don't find a boundary by the divisor1 once we hit the maxBoundary -- we will see if we have one with divisor2, if not, then we will see if we have one with divisor3 and so on -- We will hash everything in that hash boundary and store it -------------------------------------------------------------------------------------------------------- */ private static void runTddd( byte[] array, ArrayList<Long> md5Hashes, long divisor1, long divisor2, long divisor3, long remainder, long minBoundary, long maxBoundary) { int documentStart = 0; // used to keep track of where the boundaries are boolean match = false; // used to ck if we encountered a match int backUpBreakPoint = -1; // used to store the backup breakpoint int secondBackUpBreakPoint = -1; // used with the divisor3 StringBuilder builder = new StringBuilder(); int i = documentStart + (int) minBoundary - 1; // so we start at the minimum // loop through all the values in the document for (; i < md5Hashes.size() - 1; ++i) { // if ((i - documentStart + 1) < minBoundary) // if the size of this boundary is less than // the min, continue looping // continue; /*----------------------------------------------------------------- - If the mod of this equals the modvalue we defined, then - this is a boundary ------------------------------------------------------------------*/ if ((md5Hashes.get(i - 1) + md5Hashes.get(i) + md5Hashes.get(i + 1)) % divisor1 == remainder) // ck if this equals the mod value { // Hash all the values in the range (documentStart,current(i)) // Remember we only want to hash the original VALUES from the array that contains the // original // content of the file. Not the hash values in the md5Hash Array for (int j = documentStart; j <= i; ++j) { builder.append(array[j]); // store everything upto the current value } String original = builder.toString(); // if the string is a perfect match ( hash and original string) if (HashClass.is_string_match(original, table)) // iinsert the hash in the table) coverage += i - documentStart + 1; // this is the amount of bytes we saved documentStart = i + 1; // set this as the beginning of the new boundary backUpBreakPoint = -1; // reset this secondBackUpBreakPoint = -1; numOfPieces++; // increment the num of pieces i = i + (int) minBoundary - 1; // skip all the way here builder.setLength(0); // reset the stringBuilder for the next round } else if ((md5Hashes.get(i - 1) + md5Hashes.get(i) + md5Hashes.get(i + 1)) % divisor2 == remainder) { // check if this is the backup point backUpBreakPoint = i; // this is the backup breakpoint } else if ((md5Hashes.get(i - 1) + md5Hashes.get(i) + md5Hashes.get(i + 1)) % divisor2 == remainder) { secondBackUpBreakPoint = i; // set the second backup point } if ((i - documentStart + 1) >= maxBoundary) { // we have reached the maximum // ck if we have a backUpbreakpoint int point; if (backUpBreakPoint != -1) // if we do, set this as the boundary point = backUpBreakPoint; else if (secondBackUpBreakPoint != -1) { point = secondBackUpBreakPoint; // if we don't have a first break point, find the second } else point = i; // else this current value of i is the breakpoint // Hash all the values in the range (documentStart,current(i)) // Remember we only want to hash the original VALUES from the array that contains the // original // content of the file. Not the hash values in the md5Hash Array for (int j = documentStart; j <= point; ++j) { builder.append(array[j]); // store everything upto the current value } String original = builder.toString(); if (HashClass.is_string_match(original, table)) coverage += point - documentStart + 1; // this is the amount of bytes we saved numOfPieces++; // increment the num of pieces documentStart = point + 1; // set this as the beginning of the new boundary backUpBreakPoint = -1; // reset this secondBackUpBreakPoint = -1; // reset the secondBackUp point i = point + (int) minBoundary - 1; // skip all the way here ; builder.setLength(0); // reset the stringBuilder for the next round } } // end of the for loop // ------------------------------------------------------------------------------------------- // we are missing the last boundary, so hash that last value // We will also check against our values of the strings we already have, and if we encountered // this // already, then we will simply increment the counter, otherwise we will insert it in the // hashtable // and increase our miss counter // ---------------------------------------------------------------------------------------------- for (int j = documentStart; j < array.length; ++j) { builder.append(array[j]); } // only compute hash and insert into our hashtable only if the string buffer isn't empty String original = builder.toString(); if (HashClass.is_string_match(original, table)) coverage += array.length - documentStart; // this is the amount of bytes we saved numOfPieces++; // increment the num of pieces } // end of the method