public static XYSeries getXY(String url) { XYSeries xyseries = new XYSeries(""); Path path = new Path(url); Configuration conf = HUtils.getConf(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader( conf, Reader.file(path), Reader.bufferSize(4096), Reader.start(0)); DoubleArrStrWritable dkey = (DoubleArrStrWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); DoublePairWritable dvalue = (DoublePairWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(dkey, dvalue)) { // 循环读取文件 xyseries.add(dvalue.getFirst(), dvalue.getSecond()); } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); } return xyseries; }
/** * return the x*y * * @param url * @return */ public Double[] getR(String url) { List<Double> list = new ArrayList<Double>(); Path path = new Path(url); Configuration conf = HUtils.getConf(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader( conf, Reader.file(path), Reader.bufferSize(4096), Reader.start(0)); DoubleArrStrWritable dkey = (DoubleArrStrWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); DoublePairWritable dvalue = (DoublePairWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(dkey, dvalue)) { // 循环读取文件 // list.add(dvalue.getSum()*dvalue.getDistance()); } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); } Double[] dList = new Double[list.size()]; dList = list.toArray(dList); Arrays.sort(dList); return dList; }
private int readFile() throws IllegalArgumentException, IOException { int count = 0; final FileSystem fs = FileSystem.get(MapReduceTestUtils.getConfiguration()); final FileStatus[] fss = fs.listStatus( new Path( TestUtils.TEMP_DIR + File.separator + MapReduceTestEnvironment.HDFS_BASE_DIRECTORY + "/t1/pairs")); for (final FileStatus ifs : fss) { if (ifs.isFile() && ifs.getPath().toString().matches(".*part-r-0000[0-9]")) { try (SequenceFile.Reader reader = new SequenceFile.Reader( MapReduceTestUtils.getConfiguration(), Reader.file(ifs.getPath()))) { final Text key = new Text(); final Text val = new Text(); while (reader.next(key, val)) { count++; System.err.println(key + "\t" + val); } } } } return count; }
@Override public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) { try { Closeable rin; HdfsInfo hdfsInfo = HdfsInfoFactory.newHdfsInfo(hdfsPath); rin = new SequenceFile.Reader(hdfsInfo.getConf(), Reader.file(hdfsInfo.getPath())); return rin; } catch (IOException ex) { throw new RuntimeCamelException(ex); } }
private static List<IDistanceDensityMul> getIDistanceDensityMulList(String url) throws FileNotFoundException, IOException { Configuration conf = HUtils.getConf(); SequenceFile.Reader reader = null; // 多个文件整合,需排序 List<IDistanceDensityMul> allList = new ArrayList<IDistanceDensityMul>(); // 单个文件 List<IDistanceDensityMul> fileList = new ArrayList<IDistanceDensityMul>(); FileStatus[] fss = HUtils.getHDFSPath(url, "true") .getFileSystem(conf) .listStatus(HUtils.getHDFSPath(url, "true")); for (FileStatus f : fss) { if (!f.toString().contains("part")) { continue; // 排除其他文件 } try { reader = new SequenceFile.Reader( conf, Reader.file(f.getPath()), Reader.bufferSize(4096), Reader.start(0)); // <density_i*min_distancd_j> <first:density_i,second:min_distance_j,third:i> // DoubleWritable, IntDoublePairWritable CustomDoubleWritable dkey = (CustomDoubleWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); IntDoublePairWritable dvalue = (IntDoublePairWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); int i = Utils.GETDRAWPICRECORDS_EVERYFILE; while (reader.next(dkey, dvalue) && i > 0) { // 循环读取文件 i--; fileList.add( new IDistanceDensityMul( dvalue.getSecond(), dvalue.getFirst(), dvalue.getThird(), dkey.get())); // 每个文件都是从小到大排序的 } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); } // 整合当前文件的前面若干条记录(Utils.GETDRAWPICRECORDS_EVERYFILE ) if (allList.size() <= 0) { // 第一次可以全部添加 allList.addAll(fileList); } else { combineLists(allList, fileList); } } // for // 第一个点太大了,选择去掉 return allList.subList(1, allList.size()); }
@Test public void testSequenceFile() throws Exception { populateFile(); Pipeline p = Pipeline.create(pipelineOptions.getOptions()); @SuppressWarnings("unchecked") Class<? extends FileInputFormat<IntWritable, Text>> inputFormatClass = (Class<? extends FileInputFormat<IntWritable, Text>>) (Class<?>) SequenceFileInputFormat.class; HadoopIO.Read.Bound<IntWritable, Text> read = HadoopIO.Read.from( inputFile.getAbsolutePath(), inputFormatClass, IntWritable.class, Text.class); PCollection<KV<IntWritable, Text>> input = p.apply(read) .setCoder( KvCoder.of(WritableCoder.of(IntWritable.class), WritableCoder.of(Text.class))); @SuppressWarnings("unchecked") Class<? extends FileOutputFormat<IntWritable, Text>> outputFormatClass = (Class<? extends FileOutputFormat<IntWritable, Text>>) (Class<?>) TemplatedSequenceFileOutputFormat.class; @SuppressWarnings("unchecked") HadoopIO.Write.Bound<IntWritable, Text> write = HadoopIO.Write.to( outputFile.getAbsolutePath(), outputFormatClass, IntWritable.class, Text.class); input.apply(write.withoutSharding()); p.run(); IntWritable key = new IntWritable(); Text value = new Text(); try (Reader reader = new Reader(new Configuration(), Reader.file(new Path(outputFile.toURI())))) { int i = 0; while (reader.next(key, value)) { assertEquals(i, key.get()); assertEquals("value-" + i, value.toString()); i++; } } }