コード例 #1
0
  @Override
  public void write(HadoopTwitterTokenToolOptions opts, TwitterTokenMode completedMode)
      throws Exception {

    this.stages =
        new MultiStagedJob(
            HadoopToolsUtil.getInputPaths(
                completedMode.finalOutput(opts), CountWordsAcrossTimeperiod.WORDCOUNT_DIR),
            HadoopToolsUtil.getOutputPath(outputPath),
            opts.getArgs());
    // Three stage process
    // 1a. Write all the words (word per line)
    new WordIndex().stage(stages);
    final Path wordIndex = stages.runAll();

    HashMap<String, IndependentPair<Long, Long>> wordCountLines =
        WordIndex.readWordCountLines(wordIndex.toString(), "");
    StatsWordMatch matches = new StatsWordMatch();
    for (Entry<String, IndependentPair<Long, Long>> entry : wordCountLines.entrySet()) {
      String word = entry.getKey();
      IndependentPair<Long, Long> countLine = entry.getValue();
      Long count = countLine.firstObject();
      matches.updateStats(word, count);
    }

    System.out.println(matches);
  }
コード例 #2
0
  @Override
  public void write(HadoopTwitterTokenToolOptions opts, TwitterTokenMode completedMode)
      throws Exception {
    MultiStagedJob stages;

    stages =
        new MultiStagedJob(
            HadoopToolsUtil.getInputPaths(
                completedMode.finalOutput(opts), CountWordsAcrossTimeperiod.WORDCOUNT_DIR),
            HadoopToolsUtil.getOutputPath(outputPath),
            opts.getArgs());
    stages.queueStage(new TimeWordJacardIndex());
    stages.runAll();
  }