private void processExamples(String group, List<Example> examples) { Evaluation evaluation = new Evaluation(); if (examples.isEmpty()) return; final String prefix = "iter=0." + group; Execution.putOutput("group", group); LogInfo.begin_track_printAll("Processing %s: %s examples", prefix, examples.size()); LogInfo.begin_track("Dumping metadata"); dumpMetadata(group, examples); LogInfo.end_track(); LogInfo.begin_track("Examples"); for (int e = 0; e < examples.size(); e++) { Example ex = examples.get(e); LogInfo.begin_track_printAll("%s: example %s/%s: %s", prefix, e, examples.size(), ex.id); ex.log(); Execution.putOutput("example", e); StopWatchSet.begin("Parser.parse"); ParserState state = builder.parser.parse(params, ex, false); StopWatchSet.end(); out.printf("########## Example %s ##########\n", ex.id); dumpExample(exampleToLispTree(state)); LogInfo.logs("Current: %s", ex.evaluation.summary()); evaluation.add(ex.evaluation); LogInfo.logs("Cumulative(%s): %s", prefix, evaluation.summary()); LogInfo.end_track(); ex.predDerivations.clear(); // To save memory } LogInfo.end_track(); LogInfo.logs("Stats for %s: %s", prefix, evaluation.summary()); evaluation.logStats(prefix); evaluation.putOutput(prefix); LogInfo.end_track(); }
@Override public void run() { builder = new Builder(); builder.build(); dataset = new Dataset(); dataset.read(); params = new Params(); for (String group : dataset.groups()) { String filename = Execution.getFile("dumped-" + group + ".gz"); out = IOUtils.openOutHard(filename); processExamples(group, dataset.examples(group)); out.close(); LogInfo.logs("Finished dumping to %s", filename); StopWatchSet.logStats(); } }
void processCommand(String cmd) { cmd = cmd.trim(); if (cmd.equals("")) { // Print status Execution.getInfo().print(stderr); Execution.printOutputMapToStderr(); StopWatchSet.getStats().print(stderr); stderr.println(Execution.getVirtualExecDir()); } else if (cmd.equals("kill")) { stderr.println("MonitorThread: KILLING"); Execution.setExecStatus("killed", true); Execution.printOutputMap(Execution.getFile("output.map")); throw new RuntimeException("Killed by input command"); } else if (cmd.equals("bail")) { // Up to program to look at this flag and actually gracefully stop stderr.println("MonitorThread: BAILING OUT"); Execution.shouldBail = true; } else stderr.println("Invalid command: '" + cmd + "'"); }
public void run() { try { while (!stop) { if (LogInfo.writeToStdout) readAndProcessCommand(); // Input commands Execution.inputMap.readEasy(Execution.getFile("input.map")); boolean killed = Execution.create && new File(Execution.getFile("kill")).exists(); if (killed) Execution.setExecStatus("killed", true); // Output status Execution.putOutput("log.note", LogInfo.note); Execution.putOutput("exec.memory", SysInfoUtils.getUsedMemoryStr()); Execution.putOutput( "exec.time", new StopWatch(LogInfo.getWatch().getCurrTimeLong()).toString()); Execution.putOutput("exec.errors", "" + LogInfo.getNumErrors()); Execution.putOutput("exec.warnings", "" + LogInfo.getNumWarnings()); Execution.setExecStatus("running", false); Execution.printOutputMap(Execution.getFile("output.map")); if (killed) throw new RuntimeException("Killed by 'kill' file"); Utils.sleep(timeInterval); } } catch (Exception e) { e.printStackTrace(); System.exit(1); // Die completely } }
class Eval { // Predictions text file: context, actual, predicted, rank private static final PrintWriter predOut = IOUtils.openOutEasy(Execution.getFile("predictions")); // Machine-readable entropies: file, location, isIdent, entropy, reciprocal // rank private static final PrintWriter entOut = IOUtils.openOutEasy(Execution.getFile("entropies")); public static Evaluation eval(InferState state) { // Print out information about how well we're doing. Evaluation evaluation = new Evaluation(); Candidate trueCandidate = state.getTrueCandidate(); Candidate predCandidate = state.getCandidates().get(0); PredictionContext context = state.getContext(); NgramContext ngramContext = NgramContext.get(context); Statistics statistics = state.getStatistics(); Corpus corpus = statistics.getProjectLangCorpus(context.getPath()); DataSummary summary = statistics.getStatistic(NgramKNCounts.class, corpus).getSummary(); Params params = state.getParams(); boolean oracle = state.isOracle(); int rank = state.getRank(); double entropy = state.getEntropy(); double reciprocalRank = state.getReciprocalRank(); boolean isIdent = state.isIdent(); boolean correct = state.isCorrect(); String path = context.getPath(); String trueTokenStr = trueCandidate.token; String predToken = predCandidate.token; evaluation.add("accuracy", correct); evaluation.add("oracle", oracle); evaluation.add("rank", rank); evaluation.add("reciprocalRank", reciprocalRank); if (oracle) { evaluation.add("entropy", entropy); } if (isIdent) { evaluation.add("identAccuracy", correct); evaluation.add("identOracle", oracle); if (oracle) { evaluation.add("identEntropy", entropy); evaluation.add("identReciprocalRank", reciprocalRank); for (int i = 0; i < Main.clusters; i++) { evaluation.add("identEntropy" + i, -Math.log(trueCandidate.clusterProbs[i])); } } } String contextStr = ngramContext.contextStr(); if (Main.verbose >= 2) { String entropyStr = oracle ? Fmt.D(entropy) : "N/A"; begin_track( "Example %s [%s]: %s (%d candidates, rank %s, entropy %s)", path, correct ? "CORRECT" : "WRONG", contextStr, state.getCandidates().size(), rank, entropyStr); logs("True (prob= %s): [%s]", Fmt.D(trueCandidate.prob), trueTokenStr); logs("Pred (prob= %s): [%s]", Fmt.D(predCandidate.prob), predToken); if (oracle) { KneserNey.logKNIs(true); KneserNey.computeProb(CandidateNgram.get(context, trueCandidate), summary); KneserNey.logKNIs(false); } // begin_track("True"); FeatureVector.logFeatureWeights("True", trueCandidate.features.toMap(), params); // for (int i = 0; i < Main.clusters; i++) { // logs("cluster=%d, score %s, prob %s", i, Fmt.D(trueCandidate.clusterScores[i]), // Fmt.D(trueCandidate.clusterProbs[i])); // FeatureVector.logFeatureWeights("cluster=" + i, // trueCandidate.clusterFeatures.toMap(), // params, Main.clusterDecorators[i]); // } // end_track(); KneserNey.logKNIs(true); KneserNey.computeProb(CandidateNgram.get(context, predCandidate), summary); KneserNey.logKNIs(false); FeatureVector.logFeatureWeights("Pred", predCandidate.features.toMap(), params); // for (Candidate candidate : candidates) { // begin_track("Candidate " + candidate.token); // for (int i = 0; i < Main.clusters; i++) { // logs("cluster=%d, score %s, prob %s", i, Fmt.D(candidate.clusterScores[i]), // Fmt.D(candidate.clusterProbs[i])); // FeatureVector.logFeatureWeights("cluster=" + i, // candidate.clusterFeatures.toMap(), // params, Main.clusterDecorators[i]); // } // end_track(); // } FeatureVector.logFeatureDiff( "True - Pred", trueCandidate.features, predCandidate.features, params); end_track(); } // Longest context that has been seen int context_max_n = ngramContext.getMax_n() - 1; while (context_max_n > 0 && !summary.counts[context_max_n].containsKey(ngramContext.subContext(context_max_n + 1))) context_max_n--; evaluation.add("context_max_n", context_max_n); predOut.println( "path=" + path + "\tident=" + (isIdent ? 1 : 0) + "\tcontext=" + contextStr + "\tcontext_max_n=" + context_max_n + "\ttrue=" + trueTokenStr + "\tpred=" + predToken + "\trank=" + rank + "\tentropy=" + entropy); predOut.flush(); entOut.println( path + "\t" + state.getTrueToken().loc() + "\t" + (isIdent ? 1 : 0) + "\t" + (oracle ? entropy : (state.isOov() ? "oov" : "offBeam")) + "\t" + reciprocalRank); entOut.flush(); return evaluation; } }
public static void main(String[] args) { Execution.run(args, "SerializedDumperMain", new SerializedDumper(), Master.getOptionsParser()); }