Beispiel #1
0
 /**
  * Get the Meteor sufficient statistics for a test give a list of references
  *
  * @param test
  * @param references
  * @return
  */
 public MeteorStats getMeteorStats(String test, ArrayList<String> references) {
   // Normalize test
   if (normalize) test = Normalizer.normalizeLine(test, langID, keepPunctuation);
   if (lowerCase) test = test.toLowerCase();
   MeteorStats stats = new MeteorStats();
   stats.score = -1;
   // Score each reference
   for (String reference : references) {
     // Normalize reference
     if (normalize) reference = Normalizer.normalizeLine(reference, langID, keepPunctuation);
     if (lowerCase) reference = reference.toLowerCase();
     Alignment alignment = aligner.align(test, reference);
     MeteorStats curStats = getMeteorStats(alignment);
     if (curStats.score > stats.score) stats = curStats;
   }
   return stats;
 }
Beispiel #2
0
  /**
   * Get the Meteor score given sufficient statistics
   *
   * @param stats
   */
  public void computeMetrics(MeteorStats stats) {

    stats.testWeightedMatches = 0;
    stats.referenceWeightedMatches = 0;

    stats.testWeightedLength =
        (delta * (stats.testLength - stats.testFunctionWords))
            + ((1.0 - delta) * (stats.testFunctionWords));
    stats.referenceWeightedLength =
        (delta * (stats.referenceLength - stats.referenceFunctionWords))
            + ((1.0 - delta) * (stats.referenceFunctionWords));

    // Apply module weights and delta to test and reference matches
    // (Content)
    for (int i = 0; i < moduleWeights.size(); i++)
      stats.testWeightedMatches +=
          stats.testStageMatchesContent.get(i) * moduleWeights.get(i) * delta;
    for (int i = 0; i < moduleWeights.size(); i++)
      stats.referenceWeightedMatches +=
          stats.referenceStageMatchesContent.get(i) * moduleWeights.get(i) * delta;

    // Apply module weights and delta to test and reference matches
    // (Function)
    for (int i = 0; i < moduleWeights.size(); i++)
      stats.testWeightedMatches +=
          stats.testStageMatchesFunction.get(i) * moduleWeights.get(i) * (1.0 - delta);
    for (int i = 0; i < moduleWeights.size(); i++)
      stats.referenceWeightedMatches +=
          stats.referenceStageMatchesFunction.get(i) * moduleWeights.get(i) * (1.0 - delta);

    // Precision = test matches / test length
    stats.precision = stats.testWeightedMatches / stats.testWeightedLength;
    // Recall = ref matches / ref length
    stats.recall = stats.referenceWeightedMatches / stats.referenceWeightedLength;
    // F1 = 2pr / (p + r) [not part of final score]
    stats.f1 = (2 * stats.precision * stats.recall) / (stats.precision + stats.recall);
    // Fmean = 1 / alpha-weighted average of p and r
    stats.fMean = 1.0 / (((1.0 - alpha) / stats.precision) + (alpha / stats.recall));
    // Fragmentation
    double frag;
    // Case if test = ref
    if (stats.testTotalMatches == stats.testLength
        && stats.referenceTotalMatches == stats.referenceLength
        && stats.chunks == 1) frag = 0;
    else
      frag =
          ((double) stats.chunks)
              / (((double) (stats.testWordMatches + stats.referenceWordMatches)) / 2);
    // Fragmentation penalty
    stats.fragPenalty = gamma * Math.pow(frag, beta);
    // Score
    double score = stats.fMean * (1.0 - stats.fragPenalty);

    // Catch division by zero
    if (Double.isNaN(score)) stats.score = 0;
    else
      // score >= 0.0
      stats.score = Math.max(score, 0.0);
  }
Beispiel #3
0
  /**
   * Get the Meteor sufficient statistics for an alignment
   *
   * @param alignment
   * @return
   */
  public MeteorStats getMeteorStats(Alignment alignment) {
    MeteorStats stats = new MeteorStats();

    // Copy alignment stats

    // Sum word lengths if evaluating by character
    if (charBased) {
      stats.testLength = 0;
      for (String word : alignment.words1) stats.testLength += word.length();
      stats.referenceLength = 0;
      for (String word : alignment.words2) stats.referenceLength += word.length();
      stats.testFunctionWords = 0;
      for (int i : alignment.line1FunctionWords)
        stats.testFunctionWords += alignment.words1.get(i).length();
      stats.referenceFunctionWords = 0;
      for (int i : alignment.line2FunctionWords)
        stats.referenceFunctionWords += alignment.words2.get(i).length();

      // Module and total matches with summed word lengths
      int[] testStageMatchesContent = new int[alignment.moduleContentMatches1.size()];
      int[] referenceStageMatchesContent = new int[alignment.moduleContentMatches1.size()];
      int[] testStageMatchesFunction = new int[alignment.moduleContentMatches1.size()];
      int[] referenceStageMatchesFunction = new int[alignment.moduleContentMatches1.size()];
      // Sum these here to avoid pushing character-level operations to the
      // aligner
      for (Match m : alignment.matches) {
        if (m != null) {
          for (int i = 0; i < m.matchLength; i++)
            if (alignment.line1FunctionWords.contains(m.matchStart + i))
              testStageMatchesFunction[m.module] += alignment.words1.get(m.matchStart + i).length();
            else
              testStageMatchesContent[m.module] += alignment.words1.get(m.matchStart + i).length();
          for (int i = 0; i < m.length; i++)
            if (alignment.line2FunctionWords.contains(m.start + i))
              referenceStageMatchesFunction[m.module] += alignment.words2.get(m.start + i).length();
            else
              referenceStageMatchesContent[m.module] += alignment.words2.get(m.start + i).length();
        }
      }
      for (int i = 0; i < alignment.moduleContentMatches1.size(); i++) {
        stats.testStageMatchesContent.add(testStageMatchesContent[i]);
        stats.referenceStageMatchesContent.add(referenceStageMatchesContent[i]);
        stats.testStageMatchesFunction.add(testStageMatchesFunction[i]);
        stats.referenceStageMatchesFunction.add(referenceStageMatchesFunction[i]);
      }
    }
    // Otherwise use word counts
    else {
      stats.testLength = alignment.words1.size();
      stats.referenceLength = alignment.words2.size();
      stats.testFunctionWords = alignment.line1FunctionWords.size();
      stats.referenceFunctionWords = alignment.line2FunctionWords.size();

      stats.testStageMatchesContent = new ArrayList<Integer>(alignment.moduleContentMatches1);
      stats.referenceStageMatchesContent = new ArrayList<Integer>(alignment.moduleContentMatches2);
      stats.testStageMatchesFunction = new ArrayList<Integer>(alignment.moduleFunctionMatches1);
      stats.referenceStageMatchesFunction =
          new ArrayList<Integer>(alignment.moduleFunctionMatches2);
    }

    // Same for word and character level
    stats.chunks = alignment.numChunks;

    // Total matches
    // Important: sum from stage matches instead of taking total from
    // alignment as alignment totals are WEIGHTED and totals here are
    // UNWEIGHTED
    for (int i = 0; i < stats.testStageMatchesContent.size(); i++) {
      stats.testTotalMatches += stats.testStageMatchesContent.get(i);
      stats.testTotalMatches += stats.testStageMatchesFunction.get(i);
      stats.referenceTotalMatches += stats.referenceStageMatchesContent.get(i);
      stats.referenceTotalMatches += stats.referenceStageMatchesFunction.get(i);
      // Total for fragmentation/reporting
      stats.testWordMatches += alignment.moduleContentMatches1.get(i);
      stats.testWordMatches += alignment.moduleFunctionMatches1.get(i);
      stats.referenceWordMatches += alignment.moduleContentMatches2.get(i);
      stats.referenceWordMatches += alignment.moduleFunctionMatches2.get(i);
    }

    // Meteor score is required to pick best reference
    computeMetrics(stats);

    // Keep underlying alignment
    stats.alignment = alignment;

    return stats;
  }