Esempio n. 1
0
  protected void evaluate(
      Collection<EquivClassCandRanking> cands, Dictionary testDict, String outFileName)
      throws IOException {
    BufferedWriter writer = new BufferedWriter(new FileWriter(outFileName));
    DecimalFormat df = new DecimalFormat("0.00");

    writer.write("K\tAccuracy@TopK\tNumInDict");
    writer.newLine();

    Set<EquivalenceClass> goldTrans;
    double oneInTopK, total, accInTopK;

    for (int i = 0; i < K.length; i++) {
      oneInTopK = 0;
      total = 0;

      for (EquivClassCandRanking ranking : cands) {
        goldTrans = testDict.translate(ranking.getEqClass());

        if (goldTrans != null) {
          oneInTopK += (ranking.numInTopK(goldTrans, K[i]) > 0) ? 1 : 0;
          total++;
        }
      }

      accInTopK = 100.0 * oneInTopK / total;

      writer.write(K[i] + "\t" + df.format(accInTopK) + "\t" + total);
      writer.newLine();
    }

    writer.close();
  }
Esempio n. 2
0
  protected void gogo() throws Exception {
    boolean slidingWindow = Configurator.CONFIG.getBoolean("experiments.time.SlidingWindow");
    int windowSize = Configurator.CONFIG.getInt("experiments.time.WindowSize");
    int maxNumTrgPerSrc = Configurator.CONFIG.getInt("experiments.NumTranslationsToAddPerSource");
    String outDir = Configurator.CONFIG.getString("output.Path");
    int numThreads = Configurator.CONFIG.getInt("experiments.NumRankingThreads");
    boolean doContext = Configurator.CONFIG.getBoolean("experiments.DoContext");
    boolean doTime = Configurator.CONFIG.getBoolean("experiments.DoTime");
    boolean doEditDist = Configurator.CONFIG.getBoolean("experiments.DoEditDistance");
    boolean doAggregate = Configurator.CONFIG.getBoolean("experiments.DoAggregate");

    InductPreparer preparer = new InductPreparer();

    // Prepare equivalence classes
    preparer.prepare();

    // Select a subset of src classes to actually induct
    Set<EquivalenceClass> srcSubset = preparer.getSrcEqsToInduct();
    Set<EquivalenceClass> trgSet = preparer.getTrgEqs();

    // Setup scorers
    DictScorer contextScorer =
        new FungS1Scorer(
            preparer.getSeedDict(), preparer.getMaxSrcTokCount(), preparer.getMaxTrgTokCount());
    Scorer timeScorer = new TimeDistributionCosineScorer(windowSize, slidingWindow);
    Scorer editScorer = new EditDistanceScorer();

    // Pre-process properties (i.e. project contexts, normalizes distributions)
    preparer.prepareProperties(true, srcSubset, contextScorer, timeScorer);
    preparer.prepareProperties(false, preparer.getTrgEqs(), contextScorer, timeScorer);

    Collection<EquivClassCandRanking> cands;
    Set<Collection<EquivClassCandRanking>> allCands =
        new HashSet<Collection<EquivClassCandRanking>>();

    if (doTime) {
      LOG.info("Ranking candidates using time...");
      cands = rank(timeScorer, srcSubset, trgSet, maxNumTrgPerSrc, numThreads);
      evaluate(cands, preparer.getSeedDict(), outDir + "time.eval");
      EquivClassCandRanking.dumpToFile(preparer.getSeedDict(), cands, outDir + "time.scored");
      allCands.add(cands);
    }

    if (doContext) {
      LOG.info("Ranking candidates using context...");
      cands = rank(contextScorer, srcSubset, trgSet, maxNumTrgPerSrc, 0.0, numThreads);
      evaluate(cands, preparer.getSeedDict(), outDir + "context.eval");
      EquivClassCandRanking.dumpToFile(preparer.getSeedDict(), cands, outDir + "context.scored");
      allCands.add(cands);
    }

    if (doEditDist) {
      LOG.info("Ranking candidates using edit distance...");
      cands = rank(editScorer, srcSubset, trgSet, maxNumTrgPerSrc, numThreads);
      evaluate(cands, preparer.getSeedDict(), outDir + "edit.eval");
      EquivClassCandRanking.dumpToFile(preparer.getSeedDict(), cands, outDir + "edit.scored");
      allCands.add(cands);
    }

    if (doAggregate) {
      LOG.info("Aggregating (MRR) all rankings...");
      MRRAggregator aggregator = new MRRAggregator();
      cands = aggregator.aggregate(allCands);
      evaluate(cands, preparer.getSeedDict(), outDir + "aggmrr.eval");
      EquivClassCandRanking.dumpToFile(preparer.getSeedDict(), cands, outDir + "aggmrr.scored");
      ;
    }

    LOG.info("--- Done ---");
  }