예제 #1
0
  private void processExamples(String group, List<Example> examples) {
    Evaluation evaluation = new Evaluation();
    if (examples.isEmpty()) return;

    final String prefix = "iter=0." + group;
    Execution.putOutput("group", group);
    LogInfo.begin_track_printAll("Processing %s: %s examples", prefix, examples.size());
    LogInfo.begin_track("Dumping metadata");
    dumpMetadata(group, examples);
    LogInfo.end_track();
    LogInfo.begin_track("Examples");

    for (int e = 0; e < examples.size(); e++) {
      Example ex = examples.get(e);
      LogInfo.begin_track_printAll("%s: example %s/%s: %s", prefix, e, examples.size(), ex.id);
      ex.log();
      Execution.putOutput("example", e);
      StopWatchSet.begin("Parser.parse");
      ParserState state = builder.parser.parse(params, ex, false);
      StopWatchSet.end();
      out.printf("########## Example %s ##########\n", ex.id);
      dumpExample(exampleToLispTree(state));
      LogInfo.logs("Current: %s", ex.evaluation.summary());
      evaluation.add(ex.evaluation);
      LogInfo.logs("Cumulative(%s): %s", prefix, evaluation.summary());
      LogInfo.end_track();
      ex.predDerivations.clear(); // To save memory
    }

    LogInfo.end_track();
    LogInfo.logs("Stats for %s: %s", prefix, evaluation.summary());
    evaluation.logStats(prefix);
    evaluation.putOutput(prefix);
    LogInfo.end_track();
  }
예제 #2
0
 @Override
 public void run() {
   builder = new Builder();
   builder.build();
   dataset = new Dataset();
   dataset.read();
   params = new Params();
   for (String group : dataset.groups()) {
     String filename = Execution.getFile("dumped-" + group + ".gz");
     out = IOUtils.openOutHard(filename);
     processExamples(group, dataset.examples(group));
     out.close();
     LogInfo.logs("Finished dumping to %s", filename);
     StopWatchSet.logStats();
   }
 }
예제 #3
0
 void processCommand(String cmd) {
   cmd = cmd.trim();
   if (cmd.equals("")) {
     // Print status
     Execution.getInfo().print(stderr);
     Execution.printOutputMapToStderr();
     StopWatchSet.getStats().print(stderr);
     stderr.println(Execution.getVirtualExecDir());
   } else if (cmd.equals("kill")) {
     stderr.println("MonitorThread: KILLING");
     Execution.setExecStatus("killed", true);
     Execution.printOutputMap(Execution.getFile("output.map"));
     throw new RuntimeException("Killed by input command");
   } else if (cmd.equals("bail")) {
     // Up to program to look at this flag and actually gracefully stop
     stderr.println("MonitorThread: BAILING OUT");
     Execution.shouldBail = true;
   } else stderr.println("Invalid command: '" + cmd + "'");
 }
예제 #4
0
  public void run() {
    try {
      while (!stop) {
        if (LogInfo.writeToStdout) readAndProcessCommand();

        // Input commands
        Execution.inputMap.readEasy(Execution.getFile("input.map"));

        boolean killed = Execution.create && new File(Execution.getFile("kill")).exists();
        if (killed) Execution.setExecStatus("killed", true);

        // Output status
        Execution.putOutput("log.note", LogInfo.note);
        Execution.putOutput("exec.memory", SysInfoUtils.getUsedMemoryStr());
        Execution.putOutput(
            "exec.time", new StopWatch(LogInfo.getWatch().getCurrTimeLong()).toString());
        Execution.putOutput("exec.errors", "" + LogInfo.getNumErrors());
        Execution.putOutput("exec.warnings", "" + LogInfo.getNumWarnings());
        Execution.setExecStatus("running", false);
        Execution.printOutputMap(Execution.getFile("output.map"));

        if (killed) throw new RuntimeException("Killed by 'kill' file");

        Utils.sleep(timeInterval);
      }
    } catch (Exception e) {
      e.printStackTrace();
      System.exit(1); // Die completely
    }
  }
예제 #5
0
class Eval {
  // Predictions text file: context, actual, predicted, rank
  private static final PrintWriter predOut = IOUtils.openOutEasy(Execution.getFile("predictions"));

  // Machine-readable entropies: file, location, isIdent, entropy, reciprocal
  // rank
  private static final PrintWriter entOut = IOUtils.openOutEasy(Execution.getFile("entropies"));

  public static Evaluation eval(InferState state) {
    // Print out information about how well we're doing.
    Evaluation evaluation = new Evaluation();
    Candidate trueCandidate = state.getTrueCandidate();
    Candidate predCandidate = state.getCandidates().get(0);

    PredictionContext context = state.getContext();
    NgramContext ngramContext = NgramContext.get(context);
    Statistics statistics = state.getStatistics();
    Corpus corpus = statistics.getProjectLangCorpus(context.getPath());
    DataSummary summary = statistics.getStatistic(NgramKNCounts.class, corpus).getSummary();
    Params params = state.getParams();
    boolean oracle = state.isOracle();
    int rank = state.getRank();
    double entropy = state.getEntropy();
    double reciprocalRank = state.getReciprocalRank();
    boolean isIdent = state.isIdent();
    boolean correct = state.isCorrect();

    String path = context.getPath();

    String trueTokenStr = trueCandidate.token;
    String predToken = predCandidate.token;
    evaluation.add("accuracy", correct);
    evaluation.add("oracle", oracle);
    evaluation.add("rank", rank);
    evaluation.add("reciprocalRank", reciprocalRank);
    if (oracle) {
      evaluation.add("entropy", entropy);
    }
    if (isIdent) {
      evaluation.add("identAccuracy", correct);
      evaluation.add("identOracle", oracle);
      if (oracle) {
        evaluation.add("identEntropy", entropy);
        evaluation.add("identReciprocalRank", reciprocalRank);
        for (int i = 0; i < Main.clusters; i++) {
          evaluation.add("identEntropy" + i, -Math.log(trueCandidate.clusterProbs[i]));
        }
      }
    }
    String contextStr = ngramContext.contextStr();
    if (Main.verbose >= 2) {
      String entropyStr = oracle ? Fmt.D(entropy) : "N/A";
      begin_track(
          "Example %s [%s]: %s (%d candidates, rank %s, entropy %s)",
          path,
          correct ? "CORRECT" : "WRONG",
          contextStr,
          state.getCandidates().size(),
          rank,
          entropyStr);
      logs("True (prob= %s): [%s]", Fmt.D(trueCandidate.prob), trueTokenStr);
      logs("Pred (prob= %s): [%s]", Fmt.D(predCandidate.prob), predToken);
      if (oracle) {
        KneserNey.logKNIs(true);
        KneserNey.computeProb(CandidateNgram.get(context, trueCandidate), summary);
        KneserNey.logKNIs(false);
      }
      // begin_track("True");
      FeatureVector.logFeatureWeights("True", trueCandidate.features.toMap(), params);
      // for (int i = 0; i < Main.clusters; i++) {
      //   logs("cluster=%d, score %s, prob %s", i, Fmt.D(trueCandidate.clusterScores[i]),
      // Fmt.D(trueCandidate.clusterProbs[i]));
      //   FeatureVector.logFeatureWeights("cluster=" + i,
      //                                   trueCandidate.clusterFeatures.toMap(),
      //                                   params, Main.clusterDecorators[i]);
      // }
      // end_track();
      KneserNey.logKNIs(true);
      KneserNey.computeProb(CandidateNgram.get(context, predCandidate), summary);
      KneserNey.logKNIs(false);
      FeatureVector.logFeatureWeights("Pred", predCandidate.features.toMap(), params);
      // for (Candidate candidate : candidates) {
      //   begin_track("Candidate " + candidate.token);
      //   for (int i = 0; i < Main.clusters; i++) {
      //     logs("cluster=%d, score %s, prob %s", i, Fmt.D(candidate.clusterScores[i]),
      // Fmt.D(candidate.clusterProbs[i]));
      //     FeatureVector.logFeatureWeights("cluster=" + i,
      //                                     candidate.clusterFeatures.toMap(),
      //                                     params, Main.clusterDecorators[i]);
      //   }
      //   end_track();
      // }
      FeatureVector.logFeatureDiff(
          "True - Pred", trueCandidate.features, predCandidate.features, params);
      end_track();
    }
    // Longest context that has been seen
    int context_max_n = ngramContext.getMax_n() - 1;
    while (context_max_n > 0
        && !summary.counts[context_max_n].containsKey(ngramContext.subContext(context_max_n + 1)))
      context_max_n--;
    evaluation.add("context_max_n", context_max_n);
    predOut.println(
        "path="
            + path
            + "\tident="
            + (isIdent ? 1 : 0)
            + "\tcontext="
            + contextStr
            + "\tcontext_max_n="
            + context_max_n
            + "\ttrue="
            + trueTokenStr
            + "\tpred="
            + predToken
            + "\trank="
            + rank
            + "\tentropy="
            + entropy);
    predOut.flush();
    entOut.println(
        path
            + "\t"
            + state.getTrueToken().loc()
            + "\t"
            + (isIdent ? 1 : 0)
            + "\t"
            + (oracle ? entropy : (state.isOov() ? "oov" : "offBeam"))
            + "\t"
            + reciprocalRank);
    entOut.flush();

    return evaluation;
  }
}
예제 #6
0
 public static void main(String[] args) {
   Execution.run(args, "SerializedDumperMain", new SerializedDumper(), Master.getOptionsParser());
 }