Esempio n. 1
0
  public static XYSeries getXY(String url) {
    XYSeries xyseries = new XYSeries("");

    Path path = new Path(url);
    Configuration conf = HUtils.getConf();
    SequenceFile.Reader reader = null;
    try {
      reader =
          new SequenceFile.Reader(
              conf, Reader.file(path), Reader.bufferSize(4096), Reader.start(0));
      DoubleArrStrWritable dkey =
          (DoubleArrStrWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
      DoublePairWritable dvalue =
          (DoublePairWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

      while (reader.next(dkey, dvalue)) { // 循环读取文件
        xyseries.add(dvalue.getFirst(), dvalue.getSecond());
      }
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      IOUtils.closeStream(reader);
    }
    return xyseries;
  }
Esempio n. 2
0
  /**
   * return the x*y
   *
   * @param url
   * @return
   */
  public Double[] getR(String url) {
    List<Double> list = new ArrayList<Double>();
    Path path = new Path(url);
    Configuration conf = HUtils.getConf();
    SequenceFile.Reader reader = null;
    try {
      reader =
          new SequenceFile.Reader(
              conf, Reader.file(path), Reader.bufferSize(4096), Reader.start(0));
      DoubleArrStrWritable dkey =
          (DoubleArrStrWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
      DoublePairWritable dvalue =
          (DoublePairWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

      while (reader.next(dkey, dvalue)) { // 循环读取文件
        //				list.add(dvalue.getSum()*dvalue.getDistance());
      }
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      IOUtils.closeStream(reader);
    }
    Double[] dList = new Double[list.size()];
    dList = list.toArray(dList);
    Arrays.sort(dList);
    return dList;
  }
Esempio n. 3
0
  private int readFile() throws IllegalArgumentException, IOException {
    int count = 0;
    final FileSystem fs = FileSystem.get(MapReduceTestUtils.getConfiguration());
    final FileStatus[] fss =
        fs.listStatus(
            new Path(
                TestUtils.TEMP_DIR
                    + File.separator
                    + MapReduceTestEnvironment.HDFS_BASE_DIRECTORY
                    + "/t1/pairs"));
    for (final FileStatus ifs : fss) {
      if (ifs.isFile() && ifs.getPath().toString().matches(".*part-r-0000[0-9]")) {
        try (SequenceFile.Reader reader =
            new SequenceFile.Reader(
                MapReduceTestUtils.getConfiguration(), Reader.file(ifs.getPath()))) {

          final Text key = new Text();
          final Text val = new Text();

          while (reader.next(key, val)) {
            count++;
            System.err.println(key + "\t" + val);
          }
        }
      }
    }
    return count;
  }
Esempio n. 4
0
 @Override
 public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) {
   try {
     Closeable rin;
     HdfsInfo hdfsInfo = HdfsInfoFactory.newHdfsInfo(hdfsPath);
     rin = new SequenceFile.Reader(hdfsInfo.getConf(), Reader.file(hdfsInfo.getPath()));
     return rin;
   } catch (IOException ex) {
     throw new RuntimeCamelException(ex);
   }
 }
Esempio n. 5
0
  private static List<IDistanceDensityMul> getIDistanceDensityMulList(String url)
      throws FileNotFoundException, IOException {
    Configuration conf = HUtils.getConf();
    SequenceFile.Reader reader = null;
    // 多个文件整合,需排序
    List<IDistanceDensityMul> allList = new ArrayList<IDistanceDensityMul>();
    // 单个文件
    List<IDistanceDensityMul> fileList = new ArrayList<IDistanceDensityMul>();

    FileStatus[] fss =
        HUtils.getHDFSPath(url, "true")
            .getFileSystem(conf)
            .listStatus(HUtils.getHDFSPath(url, "true"));
    for (FileStatus f : fss) {
      if (!f.toString().contains("part")) {
        continue; // 排除其他文件
      }
      try {
        reader =
            new SequenceFile.Reader(
                conf, Reader.file(f.getPath()), Reader.bufferSize(4096), Reader.start(0));
        //				 <density_i*min_distancd_j> <first:density_i,second:min_distance_j,third:i>
        //				 	DoubleWritable,  IntDoublePairWritable
        CustomDoubleWritable dkey =
            (CustomDoubleWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        IntDoublePairWritable dvalue =
            (IntDoublePairWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
        int i = Utils.GETDRAWPICRECORDS_EVERYFILE;
        while (reader.next(dkey, dvalue) && i > 0) { // 循环读取文件
          i--;
          fileList.add(
              new IDistanceDensityMul(
                  dvalue.getSecond(),
                  dvalue.getFirst(),
                  dvalue.getThird(),
                  dkey.get())); // 每个文件都是从小到大排序的
        }
      } catch (Exception e) {
        e.printStackTrace();
      } finally {
        IOUtils.closeStream(reader);
      }

      // 整合当前文件的前面若干条记录(Utils.GETDRAWPICRECORDS_EVERYFILE 	)
      if (allList.size() <= 0) { // 第一次可以全部添加
        allList.addAll(fileList);
      } else {
        combineLists(allList, fileList);
      }
    } // for
    // 第一个点太大了,选择去掉
    return allList.subList(1, allList.size());
  }
  @Test
  public void testSequenceFile() throws Exception {
    populateFile();

    Pipeline p = Pipeline.create(pipelineOptions.getOptions());
    @SuppressWarnings("unchecked")
    Class<? extends FileInputFormat<IntWritable, Text>> inputFormatClass =
        (Class<? extends FileInputFormat<IntWritable, Text>>)
            (Class<?>) SequenceFileInputFormat.class;
    HadoopIO.Read.Bound<IntWritable, Text> read =
        HadoopIO.Read.from(
            inputFile.getAbsolutePath(), inputFormatClass, IntWritable.class, Text.class);
    PCollection<KV<IntWritable, Text>> input =
        p.apply(read)
            .setCoder(
                KvCoder.of(WritableCoder.of(IntWritable.class), WritableCoder.of(Text.class)));
    @SuppressWarnings("unchecked")
    Class<? extends FileOutputFormat<IntWritable, Text>> outputFormatClass =
        (Class<? extends FileOutputFormat<IntWritable, Text>>)
            (Class<?>) TemplatedSequenceFileOutputFormat.class;
    @SuppressWarnings("unchecked")
    HadoopIO.Write.Bound<IntWritable, Text> write =
        HadoopIO.Write.to(
            outputFile.getAbsolutePath(), outputFormatClass, IntWritable.class, Text.class);
    input.apply(write.withoutSharding());
    p.run();

    IntWritable key = new IntWritable();
    Text value = new Text();
    try (Reader reader =
        new Reader(new Configuration(), Reader.file(new Path(outputFile.toURI())))) {
      int i = 0;
      while (reader.next(key, value)) {
        assertEquals(i, key.get());
        assertEquals("value-" + i, value.toString());
        i++;
      }
    }
  }