Exemplo n.º 1
0
  /**
   * return the x*y
   *
   * @param url
   * @return
   */
  public Double[] getR(String url) {
    List<Double> list = new ArrayList<Double>();
    Path path = new Path(url);
    Configuration conf = HUtils.getConf();
    SequenceFile.Reader reader = null;
    try {
      reader =
          new SequenceFile.Reader(
              conf, Reader.file(path), Reader.bufferSize(4096), Reader.start(0));
      DoubleArrStrWritable dkey =
          (DoubleArrStrWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
      DoublePairWritable dvalue =
          (DoublePairWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

      while (reader.next(dkey, dvalue)) { // 循环读取文件
        //				list.add(dvalue.getSum()*dvalue.getDistance());
      }
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      IOUtils.closeStream(reader);
    }
    Double[] dList = new Double[list.size()];
    dList = list.toArray(dList);
    Arrays.sort(dList);
    return dList;
  }
Exemplo n.º 2
0
  public static XYSeries getXY(String url) {
    XYSeries xyseries = new XYSeries("");

    Path path = new Path(url);
    Configuration conf = HUtils.getConf();
    SequenceFile.Reader reader = null;
    try {
      reader =
          new SequenceFile.Reader(
              conf, Reader.file(path), Reader.bufferSize(4096), Reader.start(0));
      DoubleArrStrWritable dkey =
          (DoubleArrStrWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
      DoublePairWritable dvalue =
          (DoublePairWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

      while (reader.next(dkey, dvalue)) { // 循环读取文件
        xyseries.add(dvalue.getFirst(), dvalue.getSecond());
      }
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      IOUtils.closeStream(reader);
    }
    return xyseries;
  }
Exemplo n.º 3
0
  private static List<IDistanceDensityMul> getIDistanceDensityMulList(String url)
      throws FileNotFoundException, IOException {
    Configuration conf = HUtils.getConf();
    SequenceFile.Reader reader = null;
    // 多个文件整合,需排序
    List<IDistanceDensityMul> allList = new ArrayList<IDistanceDensityMul>();
    // 单个文件
    List<IDistanceDensityMul> fileList = new ArrayList<IDistanceDensityMul>();

    FileStatus[] fss =
        HUtils.getHDFSPath(url, "true")
            .getFileSystem(conf)
            .listStatus(HUtils.getHDFSPath(url, "true"));
    for (FileStatus f : fss) {
      if (!f.toString().contains("part")) {
        continue; // 排除其他文件
      }
      try {
        reader =
            new SequenceFile.Reader(
                conf, Reader.file(f.getPath()), Reader.bufferSize(4096), Reader.start(0));
        //				 <density_i*min_distancd_j> <first:density_i,second:min_distance_j,third:i>
        //				 	DoubleWritable,  IntDoublePairWritable
        CustomDoubleWritable dkey =
            (CustomDoubleWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        IntDoublePairWritable dvalue =
            (IntDoublePairWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
        int i = Utils.GETDRAWPICRECORDS_EVERYFILE;
        while (reader.next(dkey, dvalue) && i > 0) { // 循环读取文件
          i--;
          fileList.add(
              new IDistanceDensityMul(
                  dvalue.getSecond(),
                  dvalue.getFirst(),
                  dvalue.getThird(),
                  dkey.get())); // 每个文件都是从小到大排序的
        }
      } catch (Exception e) {
        e.printStackTrace();
      } finally {
        IOUtils.closeStream(reader);
      }

      // 整合当前文件的前面若干条记录(Utils.GETDRAWPICRECORDS_EVERYFILE 	)
      if (allList.size() <= 0) { // 第一次可以全部添加
        allList.addAll(fileList);
      } else {
        combineLists(allList, fileList);
      }
    } // for
    // 第一个点太大了,选择去掉
    return allList.subList(1, allList.size());
  }
Exemplo n.º 4
0
  private int readFile() throws IllegalArgumentException, IOException {
    int count = 0;
    final FileSystem fs = FileSystem.get(MapReduceTestUtils.getConfiguration());
    final FileStatus[] fss =
        fs.listStatus(
            new Path(
                TestUtils.TEMP_DIR
                    + File.separator
                    + MapReduceTestEnvironment.HDFS_BASE_DIRECTORY
                    + "/t1/pairs"));
    for (final FileStatus ifs : fss) {
      if (ifs.isFile() && ifs.getPath().toString().matches(".*part-r-0000[0-9]")) {
        try (SequenceFile.Reader reader =
            new SequenceFile.Reader(
                MapReduceTestUtils.getConfiguration(), Reader.file(ifs.getPath()))) {

          final Text key = new Text();
          final Text val = new Text();

          while (reader.next(key, val)) {
            count++;
            System.err.println(key + "\t" + val);
          }
        }
      }
    }
    return count;
  }
Exemplo n.º 5
0
 @Override
 public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) {
   try {
     Closeable rin;
     HdfsInfo hdfsInfo = HdfsInfoFactory.newHdfsInfo(hdfsPath);
     rin = new SequenceFile.Reader(hdfsInfo.getConf(), Reader.file(hdfsInfo.getPath()));
     return rin;
   } catch (IOException ex) {
     throw new RuntimeCamelException(ex);
   }
 }
  /**
   * Reads back the evaluations.
   *
   * @param fs File System
   * @param conf Job configuration
   * @param outpath output <code>Path</code>
   * @param evaluations <code>List&lt;Fitness&gt;</code> that contains the evaluated fitness for
   *     each candidate from the input population, sorted in the same order as the candidates.
   * @throws IOException
   */
  private static void importEvaluations(
      FileSystem fs, JobConf conf, Path outpath, List<CDFitness> evaluations) throws IOException {
    Sorter sorter = new Sorter(fs, LongWritable.class, CDFitness.class, conf);

    // merge and sort the outputs
    Path[] outfiles = OutputUtils.listOutputFiles(fs, outpath);
    Path output = new Path(outpath, "output.sorted");
    sorter.merge(outfiles, output);

    // import the evaluations
    LongWritable key = new LongWritable();
    CDFitness value = new CDFitness();
    Reader reader = new Reader(fs, output, conf);

    while (reader.next(key, value)) {
      evaluations.add(new CDFitness(value));
    }

    reader.close();
  }
Exemplo n.º 7
0
  private void cloneOutput() throws IOException {

    List<FileStatus> listStatus = getOutputMappings();

    /*
     * Initialize to empty list, in which case swap() will be a no-op. The reference is then replaced with a real list, which is
     * used in the subsequent iterations.
     */
    List<Path> crushInput = emptyList();

    Text srcFile = new Text();
    Text crushOut = new Text();
    Text prevCrushOut = new Text();

    for (FileStatus partFile : listStatus) {
      Path path = partFile.getPath();

      Reader reader = new Reader(fs, path, fs.getConf());

      try {
        while (reader.next(srcFile, crushOut)) {
          if (!crushOut.equals(prevCrushOut)) {
            swap(crushInput, prevCrushOut.toString());

            prevCrushOut.set(crushOut);
            crushInput = new LinkedList<Path>();
          }

          crushInput.add(new Path(srcFile.toString()));
        }
      } finally {
        try {
          reader.close();
        } catch (IOException e) {
          LOG.warn("Trapped exception when closing " + path, e);
        }
      }

      swap(crushInput, prevCrushOut.toString());
    }
  }
  @Test
  public void testSequenceFile() throws Exception {
    populateFile();

    Pipeline p = Pipeline.create(pipelineOptions.getOptions());
    @SuppressWarnings("unchecked")
    Class<? extends FileInputFormat<IntWritable, Text>> inputFormatClass =
        (Class<? extends FileInputFormat<IntWritable, Text>>)
            (Class<?>) SequenceFileInputFormat.class;
    HadoopIO.Read.Bound<IntWritable, Text> read =
        HadoopIO.Read.from(
            inputFile.getAbsolutePath(), inputFormatClass, IntWritable.class, Text.class);
    PCollection<KV<IntWritable, Text>> input =
        p.apply(read)
            .setCoder(
                KvCoder.of(WritableCoder.of(IntWritable.class), WritableCoder.of(Text.class)));
    @SuppressWarnings("unchecked")
    Class<? extends FileOutputFormat<IntWritable, Text>> outputFormatClass =
        (Class<? extends FileOutputFormat<IntWritable, Text>>)
            (Class<?>) TemplatedSequenceFileOutputFormat.class;
    @SuppressWarnings("unchecked")
    HadoopIO.Write.Bound<IntWritable, Text> write =
        HadoopIO.Write.to(
            outputFile.getAbsolutePath(), outputFormatClass, IntWritable.class, Text.class);
    input.apply(write.withoutSharding());
    p.run();

    IntWritable key = new IntWritable();
    Text value = new Text();
    try (Reader reader =
        new Reader(new Configuration(), Reader.file(new Path(outputFile.toURI())))) {
      int i = 0;
      while (reader.next(key, value)) {
        assertEquals(i, key.get());
        assertEquals("value-" + i, value.toString());
        i++;
      }
    }
  }
Exemplo n.º 9
0
  /**
   * Moves the skipped files to the output directory. Called when operation in normal (non-clone)
   * mode.
   */
  private void moveOutput() throws IOException {

    List<FileStatus> listStatus = getOutputMappings();

    Text srcFile = new Text();
    Text crushOut = new Text();

    Set<String> crushOutputFiles = new HashSet<String>(nBuckets);

    for (FileStatus partFile : listStatus) {
      Path path = partFile.getPath();

      Reader reader = new Reader(fs, path, fs.getConf());

      try {
        while (reader.next(srcFile, crushOut)) {
          crushOutputFiles.add(new Path(crushOut.toString()).toUri().getPath());
        }
      } finally {
        try {
          reader.close();
        } catch (IOException e) {
          LOG.warn("Trapped exception when closing " + path, e);
        }
      }
    }

    assert crushOutputFiles.size() == nBuckets;

    /*
     * The crushoutput files will appear in a subdirectory of the output directory. The subdirectory will be the full path of the
     * input directory that was crushed. E.g.
     *
     * Crush input:
     * /user/me/input/dir1/file1
     * /user/me/input/dir1/file2
     * /user/me/input/dir2/file3
     * /user/me/input/dir2/file4
     * /user/me/input/dir3/dir4/file5
     * /user/me/input/dir3/dir4/file6
     *
     * Crush output:
     * /user/me/output/user/me/input/dir1/crushed_file ...
     * /user/me/output/user/me/input/dir2/crushed_file ...
     * /user/me/output/user/me/input/dir2/dir3/dir4/crushed_file ...
     *
     * We need to collapse this down to:
     * /user/me/output/dir1/crushed_file ...
     * /user/me/output/dir2/crushed_file ...
     * /user/me/output/dir2/dir3/dir4/crushed_file ...
     */
    String srcDirName = fs.makeQualified(srcDir).toUri().getPath();

    String destName = fs.makeQualified(dest).toUri().getPath();
    String partToReplace = fs.makeQualified(outDir).toUri().getPath() + "/crush" + srcDirName;

    print(Verbosity.INFO, "\n\nCopying crush files to " + destName);

    for (String crushOutputFile : crushOutputFiles) {
      Path srcPath = new Path(crushOutputFile);
      Path destPath =
          new Path(destName + crushOutputFile.substring(partToReplace.length())).getParent();

      rename(srcPath, destPath, null);
    }

    print(Verbosity.INFO, "\n\nMoving skipped files to " + destName);

    /*
     * Don't forget to move the files that were not crushed to the output dir so that the output dir has all the data that was in
     * the input dir, the difference being there are fewer files in the output dir.
     */
    for (String name : skippedFiles) {
      Path srcPath = new Path(name);
      Path destPath = new Path(destName + name.substring(srcDirName.length())).getParent();

      rename(srcPath, destPath, null);
    }
  }
  /** Runs this tool. */
  public int run(String[] args) throws Exception {

    long startWholeProgram = System.currentTimeMillis();
    boolean hasConverged = false;
    int counter = 0;
    Reader sequenceFilereader;

    // Must have four arguments
    if (args.length != 4) {
      MapReduce.printUsage();
      return -1;
    }

    // Set input and output file paths
    String inputPathToAdjacencyTextFile = args[0];
    String outputPathToNodeSequenceFileFormat = args[1];

    // Configure Job 1:
    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);
    int reduceTasksSetup = 1;

    // Configure Job Setup
    JobConf conf1 = new JobConf(MapReduce.class);
    conf1.setInt("numberOfNodes", numberNodes);
    conf1.setJobName("Setup Job");
    conf1.setNumMapTasks(mapTasks);
    conf1.setNumReduceTasks(reduceTasksSetup);

    FileInputFormat.setInputPaths(conf1, new Path(inputPathToAdjacencyTextFile));
    FileOutputFormat.setOutputPath(conf1, new Path(outputPathToNodeSequenceFileFormat));
    FileOutputFormat.setCompressOutput(conf1, false);

    conf1.setOutputKeyClass(Text.class);
    conf1.setOutputValueClass(MapReduceNode.class);

    conf1.setOutputFormat(SequenceFileOutputFormat.class);
    conf1.setInputFormat(TextInputFormat.class);

    conf1.setMapperClass(ConfigureMapper.class);
    conf1.setReducerClass(ConfigureReducer.class);

    // Delete the output directory if it exists already
    Path tempDir = new Path(outputPathToNodeSequenceFileFormat);
    FileSystem.get(tempDir.toUri(), conf1).delete(tempDir, true);

    long startTime = System.currentTimeMillis();

    // Run Configure Job
    RunningJob job = JobClient.runJob(conf1);

    sLogger.info(
        "Config Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    String inputPath = args[1];
    String outputPath = null;
    String outputPathForNormalisedPagerank = null;
    String outputPathforFinalSortedPagerank = null;

    // Page Rank Calculation Job 2 (Iterative until convergence has been reached)

    while (!hasConverged) {
      System.out.println("*** ITERATION " + counter + ", number of nodes: " + numberNodes);
      counter++;

      sLogger.info("***** ITERATION " + counter);

      outputPath = args[1] + counter;

      // Pure Page Rank Calculation Job Setup
      JobConf pageRankJob = new JobConf(getConf(), MapReduce.class);
      pageRankJob.setInt("numberOfNodes", numberNodes);

      FileInputFormat.setInputPaths(pageRankJob, new Path(inputPath));
      FileOutputFormat.setOutputPath(pageRankJob, new Path(outputPath));
      FileOutputFormat.setCompressOutput(pageRankJob, false);

      pageRankJob.setJobName("PP Iteration " + counter);
      pageRankJob.setNumMapTasks(mapTasks);
      pageRankJob.setNumReduceTasks(reduceTasks);

      pageRankJob.setOutputKeyClass(Text.class);
      pageRankJob.setOutputValueClass(MapReduceNode.class);

      pageRankJob.setOutputFormat(SequenceFileOutputFormat.class);
      pageRankJob.setInputFormat(SequenceFileInputFormat.class);

      pageRankJob.setMapperClass(MapReduce.PageRankCalcMapper.class);
      pageRankJob.setReducerClass(MapReduce.PageRankCalcReducer.class);

      // Delete the output directory if it exists already
      Path tempPageRankDir = new Path(outputPath);
      FileSystem.get(tempDir.toUri(), conf1).delete(tempPageRankDir, true);

      startTime = System.currentTimeMillis();

      // Run Pure Page Rank Calculation Job
      RunningJob runningJob = JobClient.runJob(pageRankJob);

      sLogger.info(
          "PP Job"
              + counter
              + "Finished in "
              + (System.currentTimeMillis() - startTime) / 1000.0
              + " seconds");

      // Delete the input directory if it exists already
      Path tempInputPageRankDir = new Path(inputPath);
      FileSystem.get(tempDir.toUri(), conf1).delete(tempInputPageRankDir, true);

      // Set the output path of this iteration to be the inputpath for the next iteration
      inputPath = outputPath;

      // Check for convergence after every five iterations
      if (counter % 5 == 0) {

        Configuration conf = getConf();
        if (outputPath != null) {
          sLogger.info("Attempting to open file: " + outputPath + File.separator + "part-00000");
          System.out.println(
              "Attempting to open file: " + outputPath + File.separator + "part-00000");
        } else {
          sLogger.info("OUTPUT PATH IS NULL");
          System.out.println("OUTPUT PATH IS NULL");
        }
        Path cFile = new Path(outputPath + File.separator + "part-00000");
        FileSystem fs = FileSystem.get(cFile.toUri(), conf);

        sequenceFilereader = new Reader(fs, cFile, conf);

        for (int i = 0; i < 5; i++) {
          MapReduceNode readValue = new MapReduceNode();
          Text readKey = new Text();

          sequenceFilereader.next(readKey, readValue);
          if (!(readValue.hasConverged())) {
            break;
          }

          if (i == 4) {
            hasConverged = true;
            sequenceFilereader.close();
          }
        }
        sequenceFilereader.close();
      }
      if (counter == 75) {
        sLogger.info("****************** Exiting (purposefully) after 75th iteration");
        hasConverged = true;
      }
    }

    // Normalised Page Rank Calculation Job 3
    outputPathForNormalisedPagerank = args[1] + "normalizedPageRank";

    // Normalised Page Rank Calculation Job Setup
    JobConf normalizationJob = new JobConf(getConf(), MapReduce.class);

    FileInputFormat.setInputPaths(normalizationJob, new Path(inputPath));
    FileOutputFormat.setOutputPath(normalizationJob, new Path(outputPathForNormalisedPagerank));
    FileOutputFormat.setCompressOutput(normalizationJob, false);

    normalizationJob.setJobName("Normalised Pagerank Output");
    normalizationJob.setNumMapTasks(mapTasks);
    normalizationJob.setNumReduceTasks(1);

    normalizationJob.setOutputKeyClass(Text.class);
    normalizationJob.setOutputValueClass(DoubleWritable.class);

    normalizationJob.setInputFormat(SequenceFileInputFormat.class);
    normalizationJob.setOutputFormat(SequenceFileOutputFormat.class);

    normalizationJob.setMapperClass(NormalisationMapper.class);
    normalizationJob.setReducerClass(NormalisationReducer.class);

    // Delete the output directory if it exists already
    Path tempUpdatedPageRankDir = new Path(outputPathForNormalisedPagerank);
    FileSystem.get(tempDir.toUri(), conf1).delete(tempUpdatedPageRankDir, true);

    startTime = System.currentTimeMillis();

    // Run Normalised Page Rank Calculation Job
    RunningJob runningUpdateJob = JobClient.runJob(normalizationJob);

    sLogger.info(
        "Normalisation Job Finished in "
            + (System.currentTimeMillis() - startTime) / 1000.0
            + " seconds");

    // Sorting and Output Job 4

    // Delete the intermediary files created
    Path tempNormalizationInputPath = new Path(inputPath);
    FileSystem.get(tempDir.toUri(), conf1).delete(tempNormalizationInputPath, true);

    inputPath = outputPathForNormalisedPagerank;
    outputPathforFinalSortedPagerank = args[1] + "FinalSortedPageRank";

    // Sorting and Output Job Setup
    JobConf outputJob = new JobConf(getConf(), MapReduce.class);

    FileInputFormat.setInputPaths(outputJob, new Path(inputPath));
    FileOutputFormat.setOutputPath(outputJob, new Path(outputPathforFinalSortedPagerank));
    FileOutputFormat.setCompressOutput(outputJob, false);

    outputJob.setJobName("Final Pagerank Output");
    sLogger.info("Starting final sotirng job -> this will output a single file");
    outputJob.setNumMapTasks(1);
    outputJob.setNumReduceTasks(1);

    outputJob.setOutputKeyClass(DoubleWritable.class);
    outputJob.setOutputValueClass(Text.class);

    outputJob.setInputFormat(SequenceFileInputFormat.class);
    outputJob.setOutputFormat(TextOutputFormat.class);

    outputJob.setMapperClass(OutputMapper.class);
    outputJob.setReducerClass(OutputReducer.class);

    outputJob.setOutputKeyComparatorClass(ReverseComparator.class);

    startTime = System.currentTimeMillis();

    // Run Sorting and Output Job
    RunningJob runningSortingJob = JobClient.runJob(outputJob);

    // Delete the intermediary files created
    Path tempFinalSortedInputPath = new Path(inputPath);
    FileSystem.get(tempDir.toUri(), conf1).delete(tempFinalSortedInputPath, true);

    sLogger.info(
        "Final Sorting Job Finished in "
            + (System.currentTimeMillis() - startTime) / 1000.0
            + " seconds");

    sLogger.info(
        "The program lasted "
            + (System.currentTimeMillis() - startWholeProgram) / 1000.0
            + "s ("
            + (System.currentTimeMillis() - startWholeProgram) / 60000.0
            + " mins)");

    return 0;
  }