/** * Generate a Similarity Matrix with archive submissions. * * <p>The result is not a square matrix. Only the input submissions are on the X axis, but the Y * axis contains both input and archive submissions. * * @param inputSubmissions Submissions used to generate matrix * @param archiveSubmissions Archive submissions - only compared to input submissions, not to each * other * @param results Results used to build matrix * @return Similarity matrix built from given results * @throws InternalAlgorithmError Thrown on missing results, or results containing a submission * not in the input */ public static SimilarityMatrix generateMatrix( Set<Submission> inputSubmissions, Set<Submission> archiveSubmissions, Set<AlgorithmResults> results) throws InternalAlgorithmError { checkNotNull(inputSubmissions); checkNotNull(archiveSubmissions); checkNotNull(results); checkArgument( !inputSubmissions.isEmpty(), "Must provide at least 1 submission to build matrix from"); checkArgument( !results.isEmpty(), "Must provide at least 1 AlgorithmResults to build matrix from!"); Set<Submission> setOfBoth = new HashSet<>(); setOfBoth.addAll(inputSubmissions); setOfBoth.addAll(archiveSubmissions); checkArgument( setOfBoth.size() == (archiveSubmissions.size() + inputSubmissions.size()), "Some submissions were found in both archive and input submissions!"); // If there are no archive submissions, just generate using the other function if (archiveSubmissions.isEmpty()) { return generateMatrix(inputSubmissions, results); } List<Submission> xSubmissions = Ordering.natural().immutableSortedCopy(inputSubmissions); List<Submission> ySubmissions = new ArrayList<>(); ySubmissions.addAll(Ordering.natural().immutableSortedCopy(inputSubmissions)); ySubmissions.addAll(Ordering.natural().immutableSortedCopy(archiveSubmissions)); AlgorithmResults[][] matrix = new AlgorithmResults[xSubmissions.size()][ySubmissions.size()]; // Generate the matrix // First, handle identical submissions for (Submission xSub : xSubmissions) { // Get the X index int xIndex = xSubmissions.indexOf(xSub); int yIndex = ySubmissions.indexOf(xSub); matrix[xIndex][yIndex] = new AlgorithmResults(Pair.of(xSub, xSub), Real.ONE, Real.ONE); } // Now iterate through all given algorithm results for (AlgorithmResults result : results) { int aXCoord = xSubmissions.indexOf(result.a); int bXCoord = xSubmissions.indexOf(result.b); if (aXCoord == -1 && bXCoord == -1) { throw new InternalAlgorithmError( "Neither submission \"" + result.a.getName() + "\" nor \"" + result.b.getName() + "\" were found in input submissions!"); } if (aXCoord != -1) { int bYCoord = ySubmissions.indexOf(result.b); matrix[aXCoord][bYCoord] = result.inverse(); } if (bXCoord != -1) { int aYCoord = ySubmissions.indexOf(result.a); matrix[bXCoord][aYCoord] = result; } } // Verification pass - ensure we built a matrix with no nulls for (int x = 0; x < xSubmissions.size(); x++) { for (int y = 0; y < ySubmissions.size(); y++) { if (matrix[x][y] == null) { throw new InternalAlgorithmError( "Missing Algorithm Results for comparison of submissions \"" + xSubmissions.get(x).getName() + "\" and \"" + ySubmissions.get(y).getName() + "\""); } } } return new SimilarityMatrix(matrix, xSubmissions, ySubmissions, results); }
/** * Generate a similarity matrix from a given set of submissions. * * @param inputSubmissions Submissions to generate from * @param results Results to build from. Must contain results for every possible unordered pair of * input submissions * @return Similarity Matrix built from given results * @throws InternalAlgorithmError Thrown on missing results, or results containing a submission * not in the input */ public static SimilarityMatrix generateMatrix( Set<Submission> inputSubmissions, Set<AlgorithmResults> results) throws InternalAlgorithmError { checkNotNull(inputSubmissions); checkNotNull(results); checkArgument( !inputSubmissions.isEmpty(), "Must provide at least 1 submission to build matrix from"); checkArgument( !results.isEmpty(), "Must provide at least 1 AlgorithmResults to build matrix from!"); // Generate the matrix we'll use AlgorithmResults[][] matrix = new AlgorithmResults[inputSubmissions.size()][inputSubmissions.size()]; // Ordering sortBy = Ordering.natural(); Ordering<Submission> sortBy = Ordering.from( new Comparator<Submission>() { public int compare(Submission a, Submission b) { return ((Double) b.getTotalCopyScore()).compareTo(a.getTotalCopyScore()); } }); // Order the submissions List<Submission> orderedSubmissions = sortBy.immutableSortedCopy(inputSubmissions); // Generate the matrix // Start with the diagonal, filling with 100% similarity for (int i = 0; i < orderedSubmissions.size(); i++) { Submission s = orderedSubmissions.get(i); matrix[i][i] = new AlgorithmResults(Pair.of(s, s), Real.ONE, Real.ONE); } // Now go through all the results, and build appropriate two MatrixEntry objects for each for (AlgorithmResults result : results) { int aIndex = orderedSubmissions.indexOf(result.a); int bIndex = orderedSubmissions.indexOf(result.b); if (aIndex == -1) { if (!result.a.testFlag("invalid")) { throw new InternalAlgorithmError( "Processed Algorithm Result with submission not in given input submissions with name \"" + result.a.getName() + "\""); } } else if (bIndex == -1) { if (!result.b.testFlag("invalid")) { throw new InternalAlgorithmError( "Processed Algorithm Result with submission not in given input submissions with name \"" + result.b.getName() + "\""); } } else { matrix[aIndex][bIndex] = result.inverse(); matrix[bIndex][aIndex] = result; } } // Verification pass: Go through and ensure that the entire array was populated for (int x = 0; x < orderedSubmissions.size(); x++) { for (int y = 0; y < orderedSubmissions.size(); y++) { if (matrix[x][y] == null) { throw new InternalAlgorithmError( "Missing Algorithm Results for comparison of submissions \"" + orderedSubmissions.get(x).getName() + "\" and \"" + orderedSubmissions.get(y).getName() + "\""); } } } return new SimilarityMatrix(matrix, orderedSubmissions, orderedSubmissions, results); }