Java HadoopToolsUtil示例

编程语言: Java

命名空间/包名称: org.openimaj.hadoop.tools

类/类型: HadoopToolsUtil

hotexamples.com的示例: 4

Java HadoopToolsUtil - 已找到4个示例。这些是从开源项目中提取的最受好评的org.openimaj.hadoop.tools.HadoopToolsUtil现实Java示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

getInputPaths(3)

getFileSystem(2)

getOutputPath(2)

示例#1

显示文件

文件： StatsOutputMode.java 项目： NounVannakGitHub/OpenIMAJ-Master

  @Override
  public void write(HadoopTwitterTokenToolOptions opts, TwitterTokenMode completedMode)
      throws Exception {

    this.stages =
        new MultiStagedJob(
            HadoopToolsUtil.getInputPaths(
                completedMode.finalOutput(opts), CountWordsAcrossTimeperiod.WORDCOUNT_DIR),
            HadoopToolsUtil.getOutputPath(outputPath),
            opts.getArgs());
    // Three stage process
    // 1a. Write all the words (word per line)
    new WordIndex().stage(stages);
    final Path wordIndex = stages.runAll();

    HashMap<String, IndependentPair<Long, Long>> wordCountLines =
        WordIndex.readWordCountLines(wordIndex.toString(), "");
    StatsWordMatch matches = new StatsWordMatch();
    for (Entry<String, IndependentPair<Long, Long>> entry : wordCountLines.entrySet()) {
      String word = entry.getKey();
      IndependentPair<Long, Long> countLine = entry.getValue();
      Long count = countLine.firstObject();
      matches.updateStats(word, count);
    }

    System.out.println(matches);
  }

示例#2

显示文件

文件： JacardIndexOutputMode.java 项目： NounVannakGitHub/OpenIMAJ-Master

  @Override
  public void write(HadoopTwitterTokenToolOptions opts, TwitterTokenMode completedMode)
      throws Exception {
    MultiStagedJob stages;

    stages =
        new MultiStagedJob(
            HadoopToolsUtil.getInputPaths(
                completedMode.finalOutput(opts), CountWordsAcrossTimeperiod.WORDCOUNT_DIR),
            HadoopToolsUtil.getOutputPath(outputPath),
            opts.getArgs());
    stages.queueStage(new TimeWordJacardIndex());
    stages.runAll();
  }

示例#3

显示文件

文件： WordIndex.java 项目： NounVannakGitHub/OpenIMAJ-Master

 /**
  * from a report output path get the words
  *
  * @param path report output path
  * @param ext where the words are in the path
  * @return map of words to counts and index
  * @throws IOException
  */
 public static LinkedHashMap<String, IndependentPair<Long, Long>> readWordCountLines(
     String path, String ext) throws IOException {
   String wordPath = path + ext;
   Path p = HadoopToolsUtil.getInputPaths(wordPath)[0];
   FileSystem fs = HadoopToolsUtil.getFileSystem(p);
   FSDataInputStream toRead = fs.open(p);
   BufferedReader reader = new BufferedReader(new InputStreamReader(toRead, "UTF-8"));
   CSVParser csvreader = new CSVParser(reader);
   long lineN = 0;
   String[] next = null;
   LinkedHashMap<String, IndependentPair<Long, Long>> toRet =
       new LinkedHashMap<String, IndependentPair<Long, Long>>();
   while ((next = csvreader.getLine()) != null && next.length > 0) {
     if (next.length != 2) {
       System.out.println("PROBLEM READLINE LINE: " + Arrays.toString(next));
       continue;
     }
     toRet.put(next[0], IndependentPair.pair(Long.parseLong(next[1]), lineN));
     lineN++;
   }
   return toRet;
 }

示例#4

显示文件

文件： WordIndex.java 项目： NounVannakGitHub/OpenIMAJ-Master

  /**
   * Write a CSV wordIndex to a {@link MLCell} writen to a .mat data file
   *
   * @param path
   * @throws IOException
   */
  public static void writeToMatlab(String path) throws IOException {
    Path wordMatPath = new Path(path + "/words/wordIndex.mat");
    FileSystem fs = HadoopToolsUtil.getFileSystem(wordMatPath);
    LinkedHashMap<String, IndependentPair<Long, Long>> wordIndex = readWordCountLines(path);
    MLCell wordCell = new MLCell("words", new int[] {wordIndex.size(), 2});

    System.out.println("... reading words");
    for (Entry<String, IndependentPair<Long, Long>> ent : wordIndex.entrySet()) {
      String word = ent.getKey();
      int wordCellIndex = (int) (long) ent.getValue().secondObject();
      long count = ent.getValue().firstObject();
      wordCell.set(new MLChar(null, word), wordCellIndex, 0);
      wordCell.set(new MLDouble(null, new double[][] {new double[] {count}}), wordCellIndex, 1);
    }
    ArrayList<MLArray> list = new ArrayList<MLArray>();
    list.add(wordCell);
    new MatFileWriter(fs.create(wordMatPath), list);
  }