Пример #1
0
  private void processExamples(String group, List<Example> examples) {
    Evaluation evaluation = new Evaluation();
    if (examples.isEmpty()) return;

    final String prefix = "iter=0." + group;
    Execution.putOutput("group", group);
    LogInfo.begin_track_printAll("Processing %s: %s examples", prefix, examples.size());
    LogInfo.begin_track("Dumping metadata");
    dumpMetadata(group, examples);
    LogInfo.end_track();
    LogInfo.begin_track("Examples");

    for (int e = 0; e < examples.size(); e++) {
      Example ex = examples.get(e);
      LogInfo.begin_track_printAll("%s: example %s/%s: %s", prefix, e, examples.size(), ex.id);
      ex.log();
      Execution.putOutput("example", e);
      StopWatchSet.begin("Parser.parse");
      ParserState state = builder.parser.parse(params, ex, false);
      StopWatchSet.end();
      out.printf("########## Example %s ##########\n", ex.id);
      dumpExample(exampleToLispTree(state));
      LogInfo.logs("Current: %s", ex.evaluation.summary());
      evaluation.add(ex.evaluation);
      LogInfo.logs("Cumulative(%s): %s", prefix, evaluation.summary());
      LogInfo.end_track();
      ex.predDerivations.clear(); // To save memory
    }

    LogInfo.end_track();
    LogInfo.logs("Stats for %s: %s", prefix, evaluation.summary());
    evaluation.logStats(prefix);
    evaluation.putOutput(prefix);
    LogInfo.end_track();
  }
Пример #2
0
  @Before
  public void setUp() {
    String args =
        "-modelType dmv "
            + "-Options.stage1.numIters 60 "
            + "-numThreads 1 "
            + "-inputLists "
            //                    + "../wsj/train/ "
            //                    + "wsjLists/unsupervised-train-wsj "
            + "../wsj/3.0/conll/wsj-10-noP-dev.deps "
            //                    + "-inputFileExt mrg "
            //                    + "-inputFormat mrg "
            + "-examplesInSingleFile "
            + "-inputFormat conll "
            + "-dontOutputParams "
            //                    + "-maxExampleLength 10 "
            //                    + "-removePunctuation "
            + "-connlTagPos 3 "
            + "-connlHeadPos 6 "
            + "-useTagsAsWords";

    /*initialisation procedure from Induction class*/
    Options opts = new Options();
    Execution.init(args.split(" "), new Object[] {opts}); // parse input params
    model = new GenerativeDMVModel(opts);
    model.readExamples();
    model.logStats();
    model.preInit();
    model.init(InitType.uniformz, null, "stage1");
    opts.outputIterFreq = opts.stage1.numIters;
    lopts = opts.stage1;
    name = "stage1";
  }
Пример #3
0
 @Override
 public void run() {
   builder = new Builder();
   builder.build();
   dataset = new Dataset();
   dataset.read();
   params = new Params();
   for (String group : dataset.groups()) {
     String filename = Execution.getFile("dumped-" + group + ".gz");
     out = IOUtils.openOutHard(filename);
     processExamples(group, dataset.examples(group));
     out.close();
     LogInfo.logs("Finished dumping to %s", filename);
     StopWatchSet.logStats();
   }
 }
Пример #4
0
class Eval {
  // Predictions text file: context, actual, predicted, rank
  private static final PrintWriter predOut = IOUtils.openOutEasy(Execution.getFile("predictions"));

  // Machine-readable entropies: file, location, isIdent, entropy, reciprocal
  // rank
  private static final PrintWriter entOut = IOUtils.openOutEasy(Execution.getFile("entropies"));

  public static Evaluation eval(InferState state) {
    // Print out information about how well we're doing.
    Evaluation evaluation = new Evaluation();
    Candidate trueCandidate = state.getTrueCandidate();
    Candidate predCandidate = state.getCandidates().get(0);

    PredictionContext context = state.getContext();
    NgramContext ngramContext = NgramContext.get(context);
    Statistics statistics = state.getStatistics();
    Corpus corpus = statistics.getProjectLangCorpus(context.getPath());
    DataSummary summary = statistics.getStatistic(NgramKNCounts.class, corpus).getSummary();
    Params params = state.getParams();
    boolean oracle = state.isOracle();
    int rank = state.getRank();
    double entropy = state.getEntropy();
    double reciprocalRank = state.getReciprocalRank();
    boolean isIdent = state.isIdent();
    boolean correct = state.isCorrect();

    String path = context.getPath();

    String trueTokenStr = trueCandidate.token;
    String predToken = predCandidate.token;
    evaluation.add("accuracy", correct);
    evaluation.add("oracle", oracle);
    evaluation.add("rank", rank);
    evaluation.add("reciprocalRank", reciprocalRank);
    if (oracle) {
      evaluation.add("entropy", entropy);
    }
    if (isIdent) {
      evaluation.add("identAccuracy", correct);
      evaluation.add("identOracle", oracle);
      if (oracle) {
        evaluation.add("identEntropy", entropy);
        evaluation.add("identReciprocalRank", reciprocalRank);
        for (int i = 0; i < Main.clusters; i++) {
          evaluation.add("identEntropy" + i, -Math.log(trueCandidate.clusterProbs[i]));
        }
      }
    }
    String contextStr = ngramContext.contextStr();
    if (Main.verbose >= 2) {
      String entropyStr = oracle ? Fmt.D(entropy) : "N/A";
      begin_track(
          "Example %s [%s]: %s (%d candidates, rank %s, entropy %s)",
          path,
          correct ? "CORRECT" : "WRONG",
          contextStr,
          state.getCandidates().size(),
          rank,
          entropyStr);
      logs("True (prob= %s): [%s]", Fmt.D(trueCandidate.prob), trueTokenStr);
      logs("Pred (prob= %s): [%s]", Fmt.D(predCandidate.prob), predToken);
      if (oracle) {
        KneserNey.logKNIs(true);
        KneserNey.computeProb(CandidateNgram.get(context, trueCandidate), summary);
        KneserNey.logKNIs(false);
      }
      // begin_track("True");
      FeatureVector.logFeatureWeights("True", trueCandidate.features.toMap(), params);
      // for (int i = 0; i < Main.clusters; i++) {
      //   logs("cluster=%d, score %s, prob %s", i, Fmt.D(trueCandidate.clusterScores[i]),
      // Fmt.D(trueCandidate.clusterProbs[i]));
      //   FeatureVector.logFeatureWeights("cluster=" + i,
      //                                   trueCandidate.clusterFeatures.toMap(),
      //                                   params, Main.clusterDecorators[i]);
      // }
      // end_track();
      KneserNey.logKNIs(true);
      KneserNey.computeProb(CandidateNgram.get(context, predCandidate), summary);
      KneserNey.logKNIs(false);
      FeatureVector.logFeatureWeights("Pred", predCandidate.features.toMap(), params);
      // for (Candidate candidate : candidates) {
      //   begin_track("Candidate " + candidate.token);
      //   for (int i = 0; i < Main.clusters; i++) {
      //     logs("cluster=%d, score %s, prob %s", i, Fmt.D(candidate.clusterScores[i]),
      // Fmt.D(candidate.clusterProbs[i]));
      //     FeatureVector.logFeatureWeights("cluster=" + i,
      //                                     candidate.clusterFeatures.toMap(),
      //                                     params, Main.clusterDecorators[i]);
      //   }
      //   end_track();
      // }
      FeatureVector.logFeatureDiff(
          "True - Pred", trueCandidate.features, predCandidate.features, params);
      end_track();
    }
    // Longest context that has been seen
    int context_max_n = ngramContext.getMax_n() - 1;
    while (context_max_n > 0
        && !summary.counts[context_max_n].containsKey(ngramContext.subContext(context_max_n + 1)))
      context_max_n--;
    evaluation.add("context_max_n", context_max_n);
    predOut.println(
        "path="
            + path
            + "\tident="
            + (isIdent ? 1 : 0)
            + "\tcontext="
            + contextStr
            + "\tcontext_max_n="
            + context_max_n
            + "\ttrue="
            + trueTokenStr
            + "\tpred="
            + predToken
            + "\trank="
            + rank
            + "\tentropy="
            + entropy);
    predOut.flush();
    entOut.println(
        path
            + "\t"
            + state.getTrueToken().loc()
            + "\t"
            + (isIdent ? 1 : 0)
            + "\t"
            + (oracle ? entropy : (state.isOov() ? "oov" : "offBeam"))
            + "\t"
            + reciprocalRank);
    entOut.flush();

    return evaluation;
  }
}
Пример #5
0
 public static void main(String[] args) {
   Execution.run(args, new TreeIterable());
 }
Пример #6
0
 public static void main(String[] args) {
   Execution.run(args, "SerializedDumperMain", new SerializedDumper(), Master.getOptionsParser());
 }