public <T> Job createJob( Path basePath, Path outputPath, long beginNS, long endNS, int numReducers, T fields, Configuration conf, FileSystem fs, PcapFilterConfigurator<T> filterImpl) throws IOException { conf.set(START_TS_CONF, Long.toUnsignedString(beginNS)); conf.set(END_TS_CONF, Long.toUnsignedString(endNS)); conf.set(WIDTH_CONF, "" + findWidth(beginNS, endNS, numReducers)); filterImpl.addToConfig(fields, conf); Job job = new Job(conf); job.setJarByClass(PcapJob.class); job.setMapperClass(PcapJob.PcapMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setNumReduceTasks(numReducers); job.setReducerClass(PcapReducer.class); job.setPartitionerClass(PcapPartitioner.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BytesWritable.class); SequenceFileInputFormat.addInputPaths( job, Joiner.on(',').join(getPaths(fs, basePath, beginNS, endNS))); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); return job; }
public <T> List<byte[]> query( Path basePath, Path baseOutputPath, long beginNS, long endNS, int numReducers, T fields, Configuration conf, FileSystem fs, PcapFilterConfigurator<T> filterImpl) throws IOException, ClassNotFoundException, InterruptedException { String fileName = Joiner.on("_") .join(beginNS, endNS, filterImpl.queryToString(fields), UUID.randomUUID().toString()); if (LOG.isDebugEnabled()) { DateFormat format = SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.LONG, SimpleDateFormat.LONG); String from = format.format(new Date(Long.divideUnsigned(beginNS, 1000000))); String to = format.format(new Date(Long.divideUnsigned(endNS, 1000000))); LOG.debug( "Executing query " + filterImpl.queryToString(fields) + " on timerange " + from + " to " + to); } Path outputPath = new Path(baseOutputPath, fileName); Job job = createJob(basePath, outputPath, beginNS, endNS, numReducers, fields, conf, fs, filterImpl); boolean completed = job.waitForCompletion(true); if (completed) { return readResults(outputPath, conf, fs); } else { throw new RuntimeException( "Unable to complete query due to errors. Please check logs for full errors."); } }