Example #1
0
  protected void gogo() throws Exception {
    boolean slidingWindow = Configurator.CONFIG.getBoolean("experiments.time.SlidingWindow");
    int windowSize = Configurator.CONFIG.getInt("experiments.time.WindowSize");
    int maxNumTrgPerSrc = Configurator.CONFIG.getInt("experiments.NumTranslationsToAddPerSource");
    String outDir = Configurator.CONFIG.getString("output.Path");
    int numThreads = Configurator.CONFIG.getInt("experiments.NumRankingThreads");
    boolean doContext = Configurator.CONFIG.getBoolean("experiments.DoContext");
    boolean doTime = Configurator.CONFIG.getBoolean("experiments.DoTime");
    boolean doEditDist = Configurator.CONFIG.getBoolean("experiments.DoEditDistance");
    boolean doAggregate = Configurator.CONFIG.getBoolean("experiments.DoAggregate");

    InductPreparer preparer = new InductPreparer();

    // Prepare equivalence classes
    preparer.prepare();

    // Select a subset of src classes to actually induct
    Set<EquivalenceClass> srcSubset = preparer.getSrcEqsToInduct();
    Set<EquivalenceClass> trgSet = preparer.getTrgEqs();

    // Setup scorers
    DictScorer contextScorer =
        new FungS1Scorer(
            preparer.getSeedDict(), preparer.getMaxSrcTokCount(), preparer.getMaxTrgTokCount());
    Scorer timeScorer = new TimeDistributionCosineScorer(windowSize, slidingWindow);
    Scorer editScorer = new EditDistanceScorer();

    // Pre-process properties (i.e. project contexts, normalizes distributions)
    preparer.prepareProperties(true, srcSubset, contextScorer, timeScorer);
    preparer.prepareProperties(false, preparer.getTrgEqs(), contextScorer, timeScorer);

    Collection<EquivClassCandRanking> cands;
    Set<Collection<EquivClassCandRanking>> allCands =
        new HashSet<Collection<EquivClassCandRanking>>();

    if (doTime) {
      LOG.info("Ranking candidates using time...");
      cands = rank(timeScorer, srcSubset, trgSet, maxNumTrgPerSrc, numThreads);
      evaluate(cands, preparer.getSeedDict(), outDir + "time.eval");
      EquivClassCandRanking.dumpToFile(preparer.getSeedDict(), cands, outDir + "time.scored");
      allCands.add(cands);
    }

    if (doContext) {
      LOG.info("Ranking candidates using context...");
      cands = rank(contextScorer, srcSubset, trgSet, maxNumTrgPerSrc, 0.0, numThreads);
      evaluate(cands, preparer.getSeedDict(), outDir + "context.eval");
      EquivClassCandRanking.dumpToFile(preparer.getSeedDict(), cands, outDir + "context.scored");
      allCands.add(cands);
    }

    if (doEditDist) {
      LOG.info("Ranking candidates using edit distance...");
      cands = rank(editScorer, srcSubset, trgSet, maxNumTrgPerSrc, numThreads);
      evaluate(cands, preparer.getSeedDict(), outDir + "edit.eval");
      EquivClassCandRanking.dumpToFile(preparer.getSeedDict(), cands, outDir + "edit.scored");
      allCands.add(cands);
    }

    if (doAggregate) {
      LOG.info("Aggregating (MRR) all rankings...");
      MRRAggregator aggregator = new MRRAggregator();
      cands = aggregator.aggregate(allCands);
      evaluate(cands, preparer.getSeedDict(), outDir + "aggmrr.eval");
      EquivClassCandRanking.dumpToFile(preparer.getSeedDict(), cands, outDir + "aggmrr.scored");
      ;
    }

    LOG.info("--- Done ---");
  }