protected void pruneContextsAccordingToScore( Set<EquivalenceClass> srcEqs, Set<EquivalenceClass> trgEqs, DictScorer scorer) { ScoreComparator comparator = new ScoreComparator(scorer); int pruneContextEqs = Configurator.CONFIG.getInt("experiments.context.PruneContextToSize"); // Prune context for (EquivalenceClass ec : srcEqs) { ((Context) ec.getProperty(Context.class.getName())).pruneContext(pruneContextEqs, comparator); } for (EquivalenceClass ec : trgEqs) { ((Context) ec.getProperty(Context.class.getName())).pruneContext(pruneContextEqs, comparator); } }
protected void gogo() throws Exception { boolean slidingWindow = Configurator.CONFIG.getBoolean("experiments.time.SlidingWindow"); int windowSize = Configurator.CONFIG.getInt("experiments.time.WindowSize"); int maxNumTrgPerSrc = Configurator.CONFIG.getInt("experiments.NumTranslationsToAddPerSource"); String outDir = Configurator.CONFIG.getString("output.Path"); int numThreads = Configurator.CONFIG.getInt("experiments.NumRankingThreads"); boolean doContext = Configurator.CONFIG.getBoolean("experiments.DoContext"); boolean doTime = Configurator.CONFIG.getBoolean("experiments.DoTime"); boolean doEditDist = Configurator.CONFIG.getBoolean("experiments.DoEditDistance"); boolean doAggregate = Configurator.CONFIG.getBoolean("experiments.DoAggregate"); InductPreparer preparer = new InductPreparer(); // Prepare equivalence classes preparer.prepare(); // Select a subset of src classes to actually induct Set<EquivalenceClass> srcSubset = preparer.getSrcEqsToInduct(); Set<EquivalenceClass> trgSet = preparer.getTrgEqs(); // Setup scorers DictScorer contextScorer = new FungS1Scorer( preparer.getSeedDict(), preparer.getMaxSrcTokCount(), preparer.getMaxTrgTokCount()); Scorer timeScorer = new TimeDistributionCosineScorer(windowSize, slidingWindow); Scorer editScorer = new EditDistanceScorer(); // Pre-process properties (i.e. project contexts, normalizes distributions) preparer.prepareProperties(true, srcSubset, contextScorer, timeScorer); preparer.prepareProperties(false, preparer.getTrgEqs(), contextScorer, timeScorer); Collection<EquivClassCandRanking> cands; Set<Collection<EquivClassCandRanking>> allCands = new HashSet<Collection<EquivClassCandRanking>>(); if (doTime) { LOG.info("Ranking candidates using time..."); cands = rank(timeScorer, srcSubset, trgSet, maxNumTrgPerSrc, numThreads); evaluate(cands, preparer.getSeedDict(), outDir + "time.eval"); EquivClassCandRanking.dumpToFile(preparer.getSeedDict(), cands, outDir + "time.scored"); allCands.add(cands); } if (doContext) { LOG.info("Ranking candidates using context..."); cands = rank(contextScorer, srcSubset, trgSet, maxNumTrgPerSrc, 0.0, numThreads); evaluate(cands, preparer.getSeedDict(), outDir + "context.eval"); EquivClassCandRanking.dumpToFile(preparer.getSeedDict(), cands, outDir + "context.scored"); allCands.add(cands); } if (doEditDist) { LOG.info("Ranking candidates using edit distance..."); cands = rank(editScorer, srcSubset, trgSet, maxNumTrgPerSrc, numThreads); evaluate(cands, preparer.getSeedDict(), outDir + "edit.eval"); EquivClassCandRanking.dumpToFile(preparer.getSeedDict(), cands, outDir + "edit.scored"); allCands.add(cands); } if (doAggregate) { LOG.info("Aggregating (MRR) all rankings..."); MRRAggregator aggregator = new MRRAggregator(); cands = aggregator.aggregate(allCands); evaluate(cands, preparer.getSeedDict(), outDir + "aggmrr.eval"); EquivClassCandRanking.dumpToFile(preparer.getSeedDict(), cands, outDir + "aggmrr.scored"); ; } LOG.info("--- Done ---"); }