Exemplo n.º 1
0
  /**
   * Generate a Similarity Matrix with archive submissions.
   *
   * <p>The result is not a square matrix. Only the input submissions are on the X axis, but the Y
   * axis contains both input and archive submissions.
   *
   * @param inputSubmissions Submissions used to generate matrix
   * @param archiveSubmissions Archive submissions - only compared to input submissions, not to each
   *     other
   * @param results Results used to build matrix
   * @return Similarity matrix built from given results
   * @throws InternalAlgorithmError Thrown on missing results, or results containing a submission
   *     not in the input
   */
  public static SimilarityMatrix generateMatrix(
      Set<Submission> inputSubmissions,
      Set<Submission> archiveSubmissions,
      Set<AlgorithmResults> results)
      throws InternalAlgorithmError {
    checkNotNull(inputSubmissions);
    checkNotNull(archiveSubmissions);
    checkNotNull(results);
    checkArgument(
        !inputSubmissions.isEmpty(), "Must provide at least 1 submission to build matrix from");
    checkArgument(
        !results.isEmpty(), "Must provide at least 1 AlgorithmResults to build matrix from!");

    Set<Submission> setOfBoth = new HashSet<>();
    setOfBoth.addAll(inputSubmissions);
    setOfBoth.addAll(archiveSubmissions);

    checkArgument(
        setOfBoth.size() == (archiveSubmissions.size() + inputSubmissions.size()),
        "Some submissions were found in both archive and input submissions!");

    // If there are no archive submissions, just generate using the other function
    if (archiveSubmissions.isEmpty()) {
      return generateMatrix(inputSubmissions, results);
    }

    List<Submission> xSubmissions = Ordering.natural().immutableSortedCopy(inputSubmissions);
    List<Submission> ySubmissions = new ArrayList<>();
    ySubmissions.addAll(Ordering.natural().immutableSortedCopy(inputSubmissions));
    ySubmissions.addAll(Ordering.natural().immutableSortedCopy(archiveSubmissions));

    AlgorithmResults[][] matrix = new AlgorithmResults[xSubmissions.size()][ySubmissions.size()];

    // Generate the matrix

    // First, handle identical submissions
    for (Submission xSub : xSubmissions) {
      // Get the X index
      int xIndex = xSubmissions.indexOf(xSub);
      int yIndex = ySubmissions.indexOf(xSub);

      matrix[xIndex][yIndex] = new AlgorithmResults(Pair.of(xSub, xSub), Real.ONE, Real.ONE);
    }

    // Now iterate through all given algorithm results
    for (AlgorithmResults result : results) {
      int aXCoord = xSubmissions.indexOf(result.a);
      int bXCoord = xSubmissions.indexOf(result.b);

      if (aXCoord == -1 && bXCoord == -1) {
        throw new InternalAlgorithmError(
            "Neither submission \""
                + result.a.getName()
                + "\" nor \""
                + result.b.getName()
                + "\" were found in input submissions!");
      }

      if (aXCoord != -1) {
        int bYCoord = ySubmissions.indexOf(result.b);

        matrix[aXCoord][bYCoord] = result.inverse();
      }

      if (bXCoord != -1) {
        int aYCoord = ySubmissions.indexOf(result.a);

        matrix[bXCoord][aYCoord] = result;
      }
    }

    // Verification pass - ensure we built a matrix with no nulls
    for (int x = 0; x < xSubmissions.size(); x++) {
      for (int y = 0; y < ySubmissions.size(); y++) {
        if (matrix[x][y] == null) {
          throw new InternalAlgorithmError(
              "Missing Algorithm Results for comparison of submissions \""
                  + xSubmissions.get(x).getName()
                  + "\" and \""
                  + ySubmissions.get(y).getName()
                  + "\"");
        }
      }
    }

    return new SimilarityMatrix(matrix, xSubmissions, ySubmissions, results);
  }
Exemplo n.º 2
0
  /**
   * Generate a similarity matrix from a given set of submissions.
   *
   * @param inputSubmissions Submissions to generate from
   * @param results Results to build from. Must contain results for every possible unordered pair of
   *     input submissions
   * @return Similarity Matrix built from given results
   * @throws InternalAlgorithmError Thrown on missing results, or results containing a submission
   *     not in the input
   */
  public static SimilarityMatrix generateMatrix(
      Set<Submission> inputSubmissions, Set<AlgorithmResults> results)
      throws InternalAlgorithmError {
    checkNotNull(inputSubmissions);
    checkNotNull(results);
    checkArgument(
        !inputSubmissions.isEmpty(), "Must provide at least 1 submission to build matrix from");
    checkArgument(
        !results.isEmpty(), "Must provide at least 1 AlgorithmResults to build matrix from!");

    // Generate the matrix we'll use
    AlgorithmResults[][] matrix =
        new AlgorithmResults[inputSubmissions.size()][inputSubmissions.size()];

    // Ordering sortBy = Ordering.natural();
    Ordering<Submission> sortBy =
        Ordering.from(
            new Comparator<Submission>() {
              public int compare(Submission a, Submission b) {
                return ((Double) b.getTotalCopyScore()).compareTo(a.getTotalCopyScore());
              }
            });

    // Order the submissions
    List<Submission> orderedSubmissions = sortBy.immutableSortedCopy(inputSubmissions);

    // Generate the matrix

    // Start with the diagonal, filling with 100% similarity
    for (int i = 0; i < orderedSubmissions.size(); i++) {
      Submission s = orderedSubmissions.get(i);

      matrix[i][i] = new AlgorithmResults(Pair.of(s, s), Real.ONE, Real.ONE);
    }

    // Now go through all the results, and build appropriate two MatrixEntry objects for each
    for (AlgorithmResults result : results) {
      int aIndex = orderedSubmissions.indexOf(result.a);
      int bIndex = orderedSubmissions.indexOf(result.b);

      if (aIndex == -1) {
        if (!result.a.testFlag("invalid")) {
          throw new InternalAlgorithmError(
              "Processed Algorithm Result with submission not in given input submissions with name \""
                  + result.a.getName()
                  + "\"");
        }
      } else if (bIndex == -1) {
        if (!result.b.testFlag("invalid")) {
          throw new InternalAlgorithmError(
              "Processed Algorithm Result with submission not in given input submissions with name \""
                  + result.b.getName()
                  + "\"");
        }
      } else {
        matrix[aIndex][bIndex] = result.inverse();
        matrix[bIndex][aIndex] = result;
      }
    }

    // Verification pass: Go through and ensure that the entire array was populated
    for (int x = 0; x < orderedSubmissions.size(); x++) {
      for (int y = 0; y < orderedSubmissions.size(); y++) {
        if (matrix[x][y] == null) {
          throw new InternalAlgorithmError(
              "Missing Algorithm Results for comparison of submissions \""
                  + orderedSubmissions.get(x).getName()
                  + "\" and \""
                  + orderedSubmissions.get(y).getName()
                  + "\"");
        }
      }
    }

    return new SimilarityMatrix(matrix, orderedSubmissions, orderedSubmissions, results);
  }