예제 #1
0
  public static void main(String[] args) throws Exception {
    if (args.length == 0) {
      printUsage(args.length);
      System.exit(0);
    } else {
      processArgsAndInitialize(args);
    }
    // non-specified args will be set to default values in processArgsAndInitialize

    if (candFileFormat.equals("plain")) {
      println("Evaluating candidate translations in plain file " + candFileName + "...");
      evaluateCands_plain(candFileName);
    } else if (candFileFormat.equals("nbest")) {
      println(
          "Evaluating set of "
              + candRank
              + "'th candidate translations from "
              + candFileName
              + "...");
      evaluateCands_nbest(candFileName, candRank);
    }
    println("");

    if (evaluateRefs) {
      // evaluate the references themselves; useful if developing a new evaluation metric

      println("");
      println("PERFORMING SANITY CHECK:");
      println("------------------------");
      println("");
      println(
          "This metric's scores range from "
              + evalMetric.worstPossibleScore()
              + " (worst) to "
              + evalMetric.bestPossibleScore()
              + " (best).");

      for (int r = 1; r <= refsPerSen; ++r) {
        println("");
        println("(*) Evaluating reference set " + r + ":");
        println("");
        evaluateRefSet(r);
        println("");
      }
    }

    System.exit(0);
  } // main(String[] args)
  @Override
  public List<ScoredFeaturizedTranslation<TK, FV>> maximize(NBestListContainer<TK, FV> nbest) {

    List<ScoredFeaturizedTranslation<TK, FV>> selected = greedyMetrixMax.maximize(nbest);
    List<List<ScoredFeaturizedTranslation<TK, FV>>> nbestLists = nbest.nbestLists();

    IncrementalEvaluationMetric<TK, FV> incrementalMetric = metric.getIncrementalMetric();
    IncrementalEvaluationMetric<TK, FV> incrementalSubMetric =
        (subMetric != null ? subMetric.getIncrementalMetric() : null);
    for (ScoredFeaturizedTranslation<TK, FV> featurizedTranslation : selected) {
      incrementalMetric.add(featurizedTranslation);
      if (incrementalSubMetric != null) incrementalSubMetric.add(featurizedTranslation);
    }

    int nbestListsSize = nbestLists.size();
    int iter = 0;
    for (int changes = nbestListsSize; changes != 0 && iter < 25; iter++) { // XXX
      changes = 0;
      for (int i = 0; i < nbestListsSize; i++) {
        List<ScoredFeaturizedTranslation<TK, FV>> nbestList = nbestLists.get(i);
        ScoredFeaturizedTranslation<TK, FV> bestFTrans = null;
        double bestScore = Double.NaN;
        double bestScoreSub = Double.NaN;
        // int tI = -1;
        for (ScoredFeaturizedTranslation<TK, FV> ftrans : nbestList) { // tI++;
          incrementalMetric.replace(i, ftrans);
          if (subMetric != null) {
            assert (incrementalSubMetric != null);
            incrementalSubMetric.replace(i, ftrans);
          }
          double score = incrementalMetric.score();
          // System.err.printf("bestScore(%d): %f score: %f \n", bestI,
          // bestScore, score);
          if (bestScore != bestScore || bestScore < score) {
            bestFTrans = ftrans;
            bestScore = score;
            if (subMetric != null) {
              bestScoreSub = incrementalSubMetric.score();
            }
          } else if (bestScore == score && subMetric != null) {
            double subMetricScore = incrementalSubMetric.score();
            if (bestScoreSub < subMetricScore) {
              bestScoreSub = subMetricScore;
              bestFTrans = ftrans;
            }
          }
        }
        incrementalMetric.replace(i, bestFTrans);
        if (incrementalSubMetric != null) incrementalSubMetric.replace(i, bestFTrans);
        if (selected.get(i) != bestFTrans) {
          changes++;
          selected.set(i, bestFTrans);
        }
      }
      if (DEBUG) {
        /*
         * IncrementalEvaluationMetric<TK,FV> iMetric =
         * metric.getIncrementalMetric(); for (ScoredFeaturizedTranslation<TK,
         * FV> trans : selected) { iMetric.add(trans); } if (iMetric.score() !=
         * incrementalMetric.score()) {
         *
         * /*System.err.printf("cnt: %d null cnt: %d\n",
         * ((TERMetric.TERIncrementalMetric)incrementalMetric).cnt,
         * ((TERMetric.TERIncrementalMetric)incrementalMetric).nullCnt);
         * System.err.printf("cnt: %d null cnt: %d\n",
         * ((TERMetric.TERIncrementalMetric)iMetric).cnt,
         * ((TERMetric.TERIncrementalMetric)iMetric).nullCnt); / throw new
         * RuntimeException(String.format("%f!=%f\n", incrementalMetric.score(),
         * iMetric.score())); }
         */

        System.err.printf(
            "%d: score: %.5f changes: %d\n", iter, incrementalMetric.score(), changes);
        /*
         * for (ScoredFeaturizedTranslation<TK,FV> t : selected) {
         * System.err.printf("%s\n", (t == null ? t : t.translation)); }
         */
      }
    }

    return selected;
  }
예제 #3
0
  private static void processArgsAndInitialize(String[] args) throws Exception {
    EvaluationMetric.set_knownMetrics();

    // set default values
    candFileName = "candidates.txt";
    candFileFormat = "plain";
    candRank = 1;
    refFileName = "references.txt";
    refsPerSen = 1;
    metricName = "BLEU";
    metricOptions = new String[2];
    metricOptions[0] = "4";
    metricOptions[1] = "closest";
    evaluateRefs = false;
    verbose = false;

    int i = 0;

    while (i < args.length) {
      String option = args[i];
      if (option.equals("-cand")) {
        candFileName = args[i + 1];
      } else if (option.equals("-format")) {
        candFileFormat = args[i + 1];
        if (!candFileFormat.equals("plain") && !candFileFormat.equals("nbest")) {
          println("candFileFormat must be either plain or nbest.");
          System.exit(10);
        }
      } else if (option.equals("-rank")) {
        candRank = Integer.parseInt(args[i + 1]);
        if (refsPerSen < 1) {
          println("Argument for -rank must be positive.");
          System.exit(10);
        }
      } else if (option.equals("-ref")) {
        refFileName = args[i + 1];
      } else if (option.equals("-rps")) {
        refsPerSen = Integer.parseInt(args[i + 1]);
        if (refsPerSen < 1) {
          println("refsPerSen must be positive.");
          System.exit(10);
        }
      } else if (option.equals("-m")) {
        metricName = args[i + 1];
        if (EvaluationMetric.knownMetricName(metricName)) {
          int optionCount = EvaluationMetric.metricOptionCount(metricName);
          metricOptions = new String[optionCount];
          for (int opt = 0; opt < optionCount; ++opt) {
            metricOptions[opt] = args[i + opt + 2];
          }
          i += optionCount;
        } else {
          println("Unknown metric name " + metricName + ".");
          System.exit(10);
        }
      }

      /*
            else if (option.equals("-m")) {
              metricName = args[i+1];
              if (!EvaluationMetric.knownMetricName(metricName)) { println("Unknown metric name " + metricName + "."); System.exit(10); }
              if (metricName.equals("BLEU")) {
                metricOptions = new String[2];
                metricOptions[0] = args[i+2];
                metricOptions[1] = args[i+3];
                i += 2;
              }
            }
      */
      else if (option.equals("-evr")) {
        int evr = Integer.parseInt(args[i + 1]);
        if (evr == 1) evaluateRefs = true;
        else if (evr == 0) evaluateRefs = false;
        else {
          println("evalRefs must be either 0 or 1.");
          System.exit(10);
        }
      } else if (option.equals("-v")) {
        int v = Integer.parseInt(args[i + 1]);
        if (v == 1) verbose = true;
        else if (v == 0) verbose = false;
        else {
          println("verbose must be either 0 or 1.");
          System.exit(10);
        }
      } else {
        println("Unknown option " + option);
        System.exit(10);
      }

      i += 2;
    } // while (i)

    if (refsPerSen > 1) {
      // the provided refFileName might be a prefix
      File dummy = new File(refFileName);
      if (!dummy.exists()) {
        refFileName = createUnifiedRefFile(refFileName, refsPerSen);
      }
    } else {
      checkFile(refFileName);
    }

    // initialize
    numSentences = countLines(refFileName) / refsPerSen;

    // read in reference sentences
    refSentences = new String[numSentences][refsPerSen];
    BufferedReader inFile_refs = new BufferedReader(new FileReader(refFileName));
    String line;

    for (i = 0; i < numSentences; ++i) {
      for (int r = 0; r < refsPerSen; ++r) {
        // read the rth reference translation for the ith sentence
        refSentences[i][r] = inFile_refs.readLine();
      }
    }

    inFile_refs.close();

    // set static data members for the EvaluationMetric class
    EvaluationMetric.set_numSentences(numSentences);
    EvaluationMetric.set_refsPerSen(refsPerSen);
    EvaluationMetric.set_refSentences(refSentences);

    // do necessary initialization for the evaluation metric
    evalMetric = EvaluationMetric.getMetric(metricName, metricOptions);

    println("Processing " + numSentences + " sentences...");
  } // processArgsAndInitialize(String[] args)
예제 #4
0
  private static void evaluate(
      String inFileName, String inFileFormat, int candPerSen, int testIndex) throws Exception {
    // candPerSen: how many candidates are provided per sentence?
    //             (if inFileFormat is nbest, then candPerSen is ignored, since it is variable)
    // testIndex: which of the candidates (for each sentence) should be tested?
    //            e.g. testIndex=1 means first candidate should be evaluated
    //                 testIndex=candPerSen means last candidate should be evaluated

    if (inFileFormat.equals("plain") && candPerSen < 1) {
      println("candPerSen must be positive for a file in plain format.");
      System.exit(30);
    }

    if (inFileFormat.equals("plain") && (testIndex < 1 || testIndex > candPerSen)) {
      println("For the plain format, testIndex must be in [1,candPerSen]");
      System.exit(31);
    }

    // read the candidates
    String[] topCand_str = new String[numSentences];

    BufferedReader inFile = new BufferedReader(new FileReader(inFileName));
    String line, candidate_str;

    if (inFileFormat.equals("plain")) {

      for (int i = 0; i < numSentences; ++i) {

        // skip candidates 1 through testIndex-1
        for (int n = 1; n < testIndex; ++n) {
          line = inFile.readLine();
        }

        // read testIndex'th candidate
        candidate_str = inFile.readLine();

        topCand_str[i] = candidate_str;

        for (int n = testIndex + 1; n <= candPerSen; ++n) {
          // skip candidates testIndex+1 through candPerSen-1
          line = inFile.readLine();
        }
      } // for (i)

    } else { // nbest format

      int i = 0;
      int n = 1;
      line = inFile.readLine();

      while (line != null && i < numSentences) {

        /*
        line format:

        .* ||| words of candidate translation . ||| feat-1_val feat-2_val ... feat-numParams_val .*

        */

        while (n < candRank) {
          line = inFile.readLine();
          ++n;
        }

        // at the moment, line stores the candRank'th candidate (1-indexed) of the i'th sentence
        // (0-indexed)

        if (line == null) {
          println(
              "Not enough candidates in "
                  + inFileName
                  + " to extract the "
                  + candRank
                  + "'th candidate for each sentence.");
          println("(Failed to extract one for the " + i + "'th sentence (0-indexed).)");
          System.exit(32);
        }

        int read_i = Integer.parseInt(line.substring(0, line.indexOf(" |||")));
        if (read_i == i) {
          line = line.substring(line.indexOf("||| ") + 4); // get rid of initial text
          candidate_str = line.substring(0, line.indexOf(" |||"));
          topCand_str[i] = candidate_str;
          if (i < numSentences - 1) {
            while (read_i == i) {
              line = inFile.readLine();
              read_i = Integer.parseInt(line.substring(0, line.indexOf(" |||")));
            }
          }
          n = 1;
          i += 1;
        } else {
          println(
              "Not enough candidates in "
                  + inFileName
                  + " to extract the "
                  + candRank
                  + "'th candidate for each sentence.");
          println("(Failed to extract one for the " + i + "'th sentence (0-indexed).)");
          System.exit(32);
        }
      } // while (line != null)

      if (i != numSentences) {
        println(
            "Not enough candidates were found (i = " + i + "; was expecting " + numSentences + ")");
        System.exit(33);
      }
    }

    inFile.close();

    evalMetric.printDetailedScore(topCand_str, false);

    if (verbose) {
      println("");
      println("Printing detailed scores for individual sentences...");
      for (int i = 0; i < numSentences; ++i) {
        print("Sentence #" + i + ": ");
        evalMetric.printDetailedScore(topCand_str[i], i, true);
        // already prints a \n
      }
    }
  } // void evaluate(...)