@Override public void write(HadoopTwitterTokenToolOptions opts, TwitterTokenMode completedMode) throws Exception { this.stages = new MultiStagedJob( HadoopToolsUtil.getInputPaths( completedMode.finalOutput(opts), CountWordsAcrossTimeperiod.WORDCOUNT_DIR), HadoopToolsUtil.getOutputPath(outputPath), opts.getArgs()); // Three stage process // 1a. Write all the words (word per line) new WordIndex().stage(stages); final Path wordIndex = stages.runAll(); HashMap<String, IndependentPair<Long, Long>> wordCountLines = WordIndex.readWordCountLines(wordIndex.toString(), ""); StatsWordMatch matches = new StatsWordMatch(); for (Entry<String, IndependentPair<Long, Long>> entry : wordCountLines.entrySet()) { String word = entry.getKey(); IndependentPair<Long, Long> countLine = entry.getValue(); Long count = countLine.firstObject(); matches.updateStats(word, count); } System.out.println(matches); }
@Override public void write(HadoopTwitterTokenToolOptions opts, TwitterTokenMode completedMode) throws Exception { MultiStagedJob stages; stages = new MultiStagedJob( HadoopToolsUtil.getInputPaths( completedMode.finalOutput(opts), CountWordsAcrossTimeperiod.WORDCOUNT_DIR), HadoopToolsUtil.getOutputPath(outputPath), opts.getArgs()); stages.queueStage(new TimeWordJacardIndex()); stages.runAll(); }