public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: test.icde12.HadoopJoin <in> <out>"); System.exit(2); } Job job = new Job(conf, "hadoop join"); job.setJarByClass(HadoopJoin.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setPartitionerClass(ICDEPartitioner.class); // WritableComparator.define(Text.class,new ICDEComparator()); job.setSortComparatorClass(ICDEComparator.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(8); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { if (ToolRunner.run(new FeatureMatching(), args) == 1) { System.out.println(".......Feature Match failure........"); System.exit(1); } System.exit(0); }
public Object invoke(Object proxy, Method method, Object[] args) throws Throwable { final boolean logDebug = LOG.isDebugEnabled(); long startTime = 0; if (logDebug) { startTime = System.currentTimeMillis(); } ObjectWritable value = null; try { value = (ObjectWritable) client.call( new Invocation(method, args), getAddress(), protocol, ticket, rpcTimeout); } catch (RemoteException re) { throw re; } catch (ConnectException ce) { needCheckDnsUpdate = true; throw ce; } catch (NoRouteToHostException nrhe) { needCheckDnsUpdate = true; throw nrhe; } catch (PortUnreachableException pue) { needCheckDnsUpdate = true; throw pue; } catch (UnknownHostException uhe) { needCheckDnsUpdate = true; throw uhe; } if (logDebug) { long callTime = System.currentTimeMillis() - startTime; LOG.debug("Call: " + method.getName() + " " + callTime); } return value.get(); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (remainArgs.length != 2) { System.err.println("Usage: wordcount <input> <output>"); System.exit(1); } Job job = new Job(conf, "wordcount"); job.setJarByClass(WordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(4); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileSystem.get(conf).delete(new Path(remainArgs[1]), true); FileInputFormat.setInputPaths(job, new Path(remainArgs[0])); FileOutputFormat.setOutputPath(job, new Path(remainArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public Writable call(Class<?> protocol, Writable param, long receivedTime) throws IOException { try { Invocation call = (Invocation) param; if (verbose) log("Call: " + call); Method method = protocol.getMethod(call.getMethodName(), call.getParameterClasses()); method.setAccessible(true); int qTime = (int) (System.currentTimeMillis() - receivedTime); long startNanoTime = System.nanoTime(); Object value = method.invoke(instance, call.getParameters()); long processingMicroTime = (System.nanoTime() - startNanoTime) / 1000; if (LOG.isDebugEnabled()) { LOG.debug( "Served: " + call.getMethodName() + " queueTime (millisec)= " + qTime + " procesingTime (microsec)= " + processingMicroTime); } rpcMetrics.rpcQueueTime.inc(qTime); rpcMetrics.rpcProcessingTime.inc(processingMicroTime); MetricsTimeVaryingRate m = (MetricsTimeVaryingRate) rpcMetrics.registry.get(call.getMethodName()); if (m == null) { try { m = new MetricsTimeVaryingRate(call.getMethodName(), rpcMetrics.registry); } catch (IllegalArgumentException iae) { // the metrics has been registered; re-fetch the handle LOG.debug("Error register " + call.getMethodName(), iae); m = (MetricsTimeVaryingRate) rpcMetrics.registry.get(call.getMethodName()); } } // record call time in microseconds m.inc(processingMicroTime); if (verbose) log("Return: " + value); return new ObjectWritable(method.getReturnType(), value); } catch (InvocationTargetException e) { Throwable target = e.getTargetException(); if (target instanceof IOException) { throw (IOException) target; } else { IOException ioe = new IOException(target.toString()); ioe.setStackTrace(target.getStackTrace()); throw ioe; } } catch (Throwable e) { if (!(e instanceof IOException)) { LOG.error("Unexpected throwable object ", e); } IOException ioe = new IOException(e.toString()); ioe.setStackTrace(e.getStackTrace()); throw ioe; } }
public void inject(Path crawlDb, Path urlDir) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); if (LOG.isInfoEnabled()) { LOG.info("Injector: starting at " + sdf.format(start)); LOG.info("Injector: crawlDb: " + crawlDb); LOG.info("Injector: urlDir: " + urlDir); } Path tempDir = new Path( getConf().get("mapred.temp.dir", ".") + "/inject-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); // map text input file to a <url,CrawlDatum> file if (LOG.isInfoEnabled()) { LOG.info("Injector: Converting injected urls to crawl db entries."); } JobConf sortJob = new NutchJob(getConf()); sortJob.setJobName("inject " + urlDir); FileInputFormat.addInputPath(sortJob, urlDir); sortJob.setMapperClass(InjectMapper.class); FileOutputFormat.setOutputPath(sortJob, tempDir); sortJob.setOutputFormat(SequenceFileOutputFormat.class); sortJob.setOutputKeyClass(Text.class); sortJob.setOutputValueClass(CrawlDatum.class); sortJob.setLong("injector.current.time", System.currentTimeMillis()); RunningJob mapJob = JobClient.runJob(sortJob); long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue(); long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue(); LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered); LOG.info( "Injector: total number of urls injected after normalization and filtering: " + urlsInjected); // merge with existing crawl db if (LOG.isInfoEnabled()) { LOG.info("Injector: Merging injected urls into crawl db."); } JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb); FileInputFormat.addInputPath(mergeJob, tempDir); mergeJob.setReducerClass(InjectReducer.class); JobClient.runJob(mergeJob); CrawlDb.install(mergeJob, crawlDb); // clean up FileSystem fs = FileSystem.get(getConf()); fs.delete(tempDir, true); long end = System.currentTimeMillis(); LOG.info( "Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
static { System.load( (new File("/home/gathors/proj/v-opencv/FeatureMatching/libs/libopencv_java2412.so")) .getAbsolutePath()); System.load( (new File("/home/gathors/proj/v-opencv/FeatureMatching/libs/libopencv_highgui.so")) .getAbsolutePath()); }
/** ***********************Driver function****************** */ public static void main(String args[]) throws Exception { if (args.length != 3) { System.out.println( "provide sufficient arguments <input matrix file path> <input vector file path> <output file path>"); System.exit(-1); } int res = ToolRunner.run(new Configuration(), new Mat_vect_mul(), args); System.exit(res); }
/** Sets up configuration based on params */ private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) { if (argConf.get("file") == null) { logger.fatal("Missing file parameter"); System.exit(1); } if (argConf.get("hdfs_base_path") == null) { logger.fatal("Missing HDFS base path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("hdfs_temp_path") == null) { logger.fatal("Missing HDFS temp path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("local_temp_path") == null) { logger.fatal("Missing local temp path, check gestore-conf.xml"); System.exit(1); } // Input paramaters curConf.put("run_id", argConf.get("run", "")); curConf.put("task_id", argConf.get("task", "")); curConf.put("file_id", argConf.get("file")); curConf.put("local_path", argConf.get("path", "")); curConf.put("type", argConf.get("type", "l2r")); curConf.put("timestamp_start", argConf.get("timestamp_start", "1")); curConf.put( "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE))); curConf.put("delimiter", argConf.get("regex", "ID=.*")); curConf.put("taxon", argConf.get("taxon", "all")); curConf.put("intermediate", argConf.get("full_run", "false")); curConf.put("quick_add", argConf.get("quick_add", "false")); Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*"); curConf.put("format", argConf.get("format", "unknown")); curConf.put("split", argConf.get("split", "1")); curConf.put("copy", argConf.get("copy", "true")); // Constants curConf.put("base_path", argConf.get("hdfs_base_path")); curConf.put("temp_path", argConf.get("hdfs_temp_path")); curConf.put("local_temp_path", argConf.get("local_temp_path")); curConf.put("db_name_files", argConf.get("hbase_file_table")); curConf.put("db_name_runs", argConf.get("hbase_run_table")); curConf.put("db_name_updates", argConf.get("hbase_db_update_table")); // Timestamps Date currentTime = new Date(); Date endDate = new Date(new Long(curConf.get("timestamp_stop"))); curConf.put("timestamp_real", Long.toString(currentTime.getTime())); return true; }
public void setLocalElasticSearchInstallation(JobConf conf) { String esConfigPath = conf.get(ES_CONFIG_OPT, ES_CONFIG); String esPluginsPath = conf.get(ES_PLUGINS_OPT, ES_PLUGINS); System.setProperty(ES_CONFIG_OPT, esConfigPath); System.setProperty(ES_PLUGINS_OPT, esPluginsPath); LOG.info( "Using Elasticsearch configuration file at " + esConfigPath + " and plugin directory " + esPluginsPath); }
public void setup(Context context) { try { // System.setProperty("java.library.path", "/home/gathors/proj/libs"); // System.loadLibrary(Core.NATIVE_xxx); // System.loacLibrary("/home/gathors/proj/libs/opencv-300.jar"); } catch (UnsatisfiedLinkError e) { System.err.println("\nNATIVE LIBRARY failed to load..."); System.err.println("ERROR:" + e); System.err.println("NATIVE_LIBRARY_NAME:" + Core.NATIVE_LIBRARY_NAME); System.err.println("#" + System.getProperty("java.library.path")); System.exit(1); } }
public static void main(String args[]) throws Exception { Configuration c = new Configuration(); if (args.length != 2) { System.out.println("provide sufficient arguments"); System.exit(-1); } Job job = Job.getInstance(c, "Wordcount"); job.setJarByClass(Wordcount.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // job.submit(); job.waitForCompletion(true); }
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("UFO count"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: avro UFO counter <in> <out>"); System.exit(2); } FileInputFormat.addInputPath(conf, new Path(otherArgs[0])); Path outputPath = new Path(otherArgs[1]); FileOutputFormat.setOutputPath(conf, outputPath); outputPath.getFileSystem(conf).delete(outputPath); Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc")); AvroJob.setInputSchema(conf, input_schema); AvroJob.setMapOutputSchema( conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG))); AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA); AvroJob.setMapperClass(conf, AvroRecordMapper.class); AvroJob.setReducerClass(conf, AvroRecordReducer.class); conf.setInputFormat(AvroInputFormat.class); JobClient.runJob(conf); return 0; }
/** * Get a proxy connection to a remote server * * @param protocol protocol class * @param clientVersion client version * @param addr remote address * @param conf configuration to use * @param rpcTimeout timeout for each RPC * @param timeout time in milliseconds before giving up * @return the proxy * @throws IOException if the far end through a RemoteException */ static <T extends VersionedProtocol> ProtocolProxy<T> waitForProtocolProxy( Class<T> protocol, long clientVersion, InetSocketAddress addr, Configuration conf, long timeout, int rpcTimeout) throws IOException { long startTime = System.currentTimeMillis(); UserGroupInformation ugi = null; try { ugi = UserGroupInformation.login(conf); } catch (LoginException le) { throw new RuntimeException("Couldn't login!"); } IOException ioe; while (true) { try { return getProtocolProxy( protocol, clientVersion, addr, ugi, conf, NetUtils.getDefaultSocketFactory(conf), rpcTimeout); } catch (ConnectException se) { // namenode has not been started LOG.info("Server at " + addr + " not available yet, Zzzzz..."); ioe = se; } catch (SocketTimeoutException te) { // namenode is busy LOG.info("Problem connecting to server: " + addr); ioe = te; } // check if timed out if (System.currentTimeMillis() - timeout >= startTime) { throw ioe; } // wait for retry try { Thread.sleep(1000); } catch (InterruptedException ie) { // IGNORE } } }
/** * Load an edit log, and apply the changes to the in-memory structure * * <p>This is where we apply edits that we've been writing to disk all along. */ int loadFSEdits(File edits) throws IOException { int numEdits = 0; if (edits.exists()) { DataInputStream in = new DataInputStream(new BufferedInputStream(new FileInputStream(edits))); try { while (in.available() > 0) { byte opcode = in.readByte(); numEdits++; switch (opcode) { case OP_ADD: { UTF8 name = new UTF8(); name.readFields(in); ArrayWritable aw = new ArrayWritable(Block.class); aw.readFields(in); Writable writables[] = (Writable[]) aw.get(); Block blocks[] = new Block[writables.length]; System.arraycopy(writables, 0, blocks, 0, blocks.length); unprotectedAddFile(name, blocks); break; } case OP_RENAME: { UTF8 src = new UTF8(); UTF8 dst = new UTF8(); src.readFields(in); dst.readFields(in); unprotectedRenameTo(src, dst); break; } case OP_DELETE: { UTF8 src = new UTF8(); src.readFields(in); unprotectedDelete(src); break; } case OP_MKDIR: { UTF8 src = new UTF8(); src.readFields(in); unprotectedMkdir(src.toString()); break; } default: { throw new IOException("Never seen opcode " + opcode); } } } } finally { in.close(); } } return numEdits; }
private synchronized InetSocketAddress getAddress() { if (needCheckDnsUpdate && address != null && address.getHostName() != null && System.currentTimeMillis() - this.timeLastDnsCheck > MIN_DNS_CHECK_INTERVAL_MSEC) { try { InetSocketAddress newAddr = NetUtils.resolveAddress(address); if (newAddr != null) { LOG.info("DNS change: " + newAddr); address = newAddr; } } finally { this.timeLastDnsCheck = System.currentTimeMillis(); } } needCheckDnsUpdate = false; return address; }
public void configure(JobConf job) { this.jobConf = job; urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT); interval = jobConf.getInt("db.fetch.interval.default", 2592000); filters = new URLFilters(jobConf); scfilters = new ScoringFilters(jobConf); scoreInjected = jobConf.getFloat("db.score.injected", 1.0f); curTime = job.getLong("injector.current.time", System.currentTimeMillis()); }
public void testInputFormat() { try { JobConf conf = new JobConf(); String TMP_DIR = System.getProperty("test.build.data", "/tmp"); Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile"); SequenceFile.Writer sfw = SequenceFile.createWriter( FileSystem.getLocal(conf), conf, filename, ChukwaArchiveKey.class, ChunkImpl.class, SequenceFile.CompressionType.NONE, Reporter.NULL); StringBuilder buf = new StringBuilder(); int offsets[] = new int[lines.length]; for (int i = 0; i < lines.length; ++i) { buf.append(lines[i]); buf.append("\n"); offsets[i] = buf.length() - 1; } ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0); ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null); val.setRecordOffsets(offsets); sfw.append(key, val); sfw.append(key, val); // write it twice sfw.close(); long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen(); InputSplit split = new FileSplit(filename, 0, len, (String[]) null); ChukwaInputFormat in = new ChukwaInputFormat(); RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL); LongWritable l = r.createKey(); Text line = r.createValue(); for (int i = 0; i < lines.length * 2; ++i) { boolean succeeded = r.next(l, line); assertTrue(succeeded); assertEquals(i, l.get()); assertEquals(lines[i % lines.length], line.toString()); System.out.println("read line: " + l.get() + " " + line); } boolean succeeded = r.next(l, line); assertFalse(succeeded); } catch (IOException e) { e.printStackTrace(); fail("IO exception " + e); } }
public static boolean stopIteration(Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path preFile = new Path("preX/Result"); Path curFile = new Path("curX/part-00000"); if (!(fs.exists(preFile) && fs.exists(curFile))) { System.exit(1); } boolean stop = true; String line1, line2; FSDataInputStream in1 = fs.open(preFile); FSDataInputStream in2 = fs.open(curFile); InputStreamReader isr1 = new InputStreamReader(in1); InputStreamReader isr2 = new InputStreamReader(in2); BufferedReader br1 = new BufferedReader(isr1); BufferedReader br2 = new BufferedReader(isr2); while ((line1 = br1.readLine()) != null && (line2 = br2.readLine()) != null) { String[] str1 = line1.split("\\s+"); String[] str2 = line2.split("\\s+"); double preElem = Double.parseDouble(str1[1]); double curElem = Double.parseDouble(str2[1]); if (Math.abs(preElem - curElem) > eps) { stop = false; break; } } if (stop == false) { fs.delete(preFile, true); if (fs.rename(curFile, preFile) == false) { System.exit(1); } } return stop; }
public static void main(String[] args) throws Exception { Job job = new Job(); job.setJarByClass(Sort.class); job.setJobName("Sort"); FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/input/")); FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/output/")); job.setMapperClass(Map.class); // job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(2); System.exit(job.waitForCompletion(true) ? 0 : 1); }
VerProtocolImpl[] getSupportedProtocolVersions(RPC.RpcKind rpcKind, String protocolName) { VerProtocolImpl[] resultk = new VerProtocolImpl[getProtocolImplMap(rpcKind).size()]; int i = 0; for (Map.Entry<ProtoNameVer, ProtoClassProtoImpl> pv : getProtocolImplMap(rpcKind).entrySet()) { if (pv.getKey().protocol.equals(protocolName)) { resultk[i++] = new VerProtocolImpl(pv.getKey().version, pv.getValue()); } } if (i == 0) { return null; } VerProtocolImpl[] result = new VerProtocolImpl[i]; System.arraycopy(resultk, 0, result, 0, i); return result; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job jobA = Job.getInstance(conf, "delaysum"); jobA.setOutputKeyClass(Text.class); jobA.setOutputValueClass(IntWritable.class); jobA.setMapperClass(ArrivalMap.class); jobA.setReducerClass(ArrivalReduce.class); FileInputFormat.setInputPaths(jobA, new Path(args[0])); FileOutputFormat.setOutputPath(jobA, new Path(args[1])); jobA.setJarByClass(MeanDelayOriginDestination.class); System.exit(jobA.waitForCompletion(true) ? 0 : 1); }
public int run(String[] args) throws Exception { Path tempDir = new Path("/user/akhfa/temp"); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(AuthorCounter.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, tempDir); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
public static void main(String[] args) throws IOException { if (args.length != 2) { System.err.println("Usage: OldMaxTemperature <input path> <output path>"); System.exit(-1); } /*[*/ JobConf conf = new JobConf(OldMaxTemperature.class); /*]*/ /*[*/ conf /*]*/.setJobName("Max temperature"); FileInputFormat.addInputPath(/*[*/ conf /*]*/, new Path(args[0])); FileOutputFormat.setOutputPath(/*[*/ conf /*]*/, new Path(args[1])); /*[*/ conf /*]*/.setMapperClass(OldMaxTemperatureMapper.class); /*[*/ conf /*]*/.setReducerClass(OldMaxTemperatureReducer.class); /*[*/ conf /*]*/.setOutputKeyClass(Text.class); /*[*/ conf /*]*/.setOutputValueClass(IntWritable.class); /*[*/ JobClient.runJob(conf); /*]*/ }
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = getConf(); if (args.length != 3) { System.err.println("Usage: WordCountDriver <inpath> <outpath>"); System.exit(0); } GenericOptionsParser gop = new GenericOptionsParser(args); args = gop.getRemainingArgs(); Path inpath = new Path(args[1]); Path outpath = new Path(args[2]); Job job = new Job(conf, "WordCountDriver"); job.setJarByClass(WordCountDriver.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, inpath); FileOutputFormat.setOutputPath(job, outpath); job.setNumReduceTasks(2); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setPartitionerClass(WordCountPart.class); job.waitForCompletion(true); return 0; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path tmpPath = new Path("/w1/tmp"); fs.delete(tmpPath, true); Job jobA = Job.getInstance(conf, "delaysum"); jobA.setOutputKeyClass(Text.class); jobA.setOutputValueClass(IntWritable.class); jobA.setMapperClass(ArrivalMap.class); jobA.setReducerClass(ArrivalReduce.class); FileInputFormat.setInputPaths(jobA, new Path(args[0])); FileOutputFormat.setOutputPath(jobA, tmpPath); jobA.setJarByClass(TopArrivalPerformanceCarriersOD.class); jobA.waitForCompletion(true); Job jobB = Job.getInstance(conf, "Best Airline"); jobB.setOutputKeyClass(Text.class); jobB.setOutputValueClass(FloatWritable.class); jobB.setMapOutputKeyClass(NullWritable.class); jobB.setMapOutputValueClass(TextArrayWritable.class); jobB.setMapperClass(DelayMap.class); jobB.setReducerClass(DelayReduce.class); jobB.setNumReduceTasks(1); FileInputFormat.setInputPaths(jobB, tmpPath); FileOutputFormat.setOutputPath(jobB, new Path(args[1])); jobB.setInputFormatClass(KeyValueTextInputFormat.class); jobB.setOutputFormatClass(TextOutputFormat.class); jobB.setJarByClass(TopArrivalPerformanceCarriersOD.class); System.exit(jobB.waitForCompletion(true) ? 0 : 1); }
// Transform the json-type feature to mat-type public static Mat json2mat(String json) { JsonParser parser = new JsonParser(); JsonElement parseTree = parser.parse(json); // Verify the input is JSON type if (!parseTree.isJsonObject()) { System.out.println("The input is not a JSON type...\nExiting..."); System.exit(1); } JsonObject jobj = parser.parse(json).getAsJsonObject(); if (jobj == null || !jobj.isJsonObject() || jobj.isJsonNull()) { return null; } // Detect broken/null features JsonElement r = jobj.get("rows"); if (r == null) { return null; } int rows = jobj.get("rows").getAsInt(); int cols = jobj.get("cols").getAsInt(); int type = jobj.get("type").getAsInt(); String data = jobj.get("data").getAsString(); String[] pixs = data.split(","); Mat descriptor = new Mat(rows, cols, type); for (String pix : pixs) { String[] tmp = pix.split(" "); int r_pos = Integer.valueOf(tmp[0]); int c_pos = Integer.valueOf(tmp[1]); double rgb = Double.valueOf(tmp[2]); descriptor.put(r_pos, c_pos, rgb); } return descriptor; }
public void testcheckOutputSpecsForbidRecordCompression() throws IOException { Job job = Job.getInstance(new Configuration(), "testcheckOutputSpecsForbidRecordCompression"); FileSystem fs = FileSystem.getLocal(job.getConfiguration()); Path outputdir = new Path(System.getProperty("test.build.data", "/tmp") + "/output"); fs.delete(outputdir, true); // Without outputpath, FileOutputFormat.checkoutputspecs will throw // InvalidJobConfException FileOutputFormat.setOutputPath(job, outputdir); // SequenceFileAsBinaryOutputFormat doesn't support record compression // It should throw an exception when checked by checkOutputSpecs SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); try { new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job); } catch (Exception e) { fail( "Block compression should be allowed for " + "SequenceFileAsBinaryOutputFormat:Caught " + e.getClass().getName()); } SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.RECORD); try { new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job); fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat"); } catch (InvalidJobConfException ie) { // expected } catch (Exception e) { fail( "Expected " + InvalidJobConfException.class.getName() + "but caught " + e.getClass().getName()); } }
public void testFormat() throws Exception { JobConf job = new JobConf(conf); FileSystem fs = FileSystem.getLocal(conf); Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Reporter reporter = Reporter.NULL; int seed = new Random().nextInt(); // LOG.info("seed = "+seed); Random random = new Random(seed); fs.delete(dir, true); FileInputFormat.setInputPaths(job, dir); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) { // LOG.info("creating; entries = " + length); // create a file with length entries SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class); try { for (int i = 0; i < length; i++) { IntWritable key = new IntWritable(i); byte[] data = new byte[random.nextInt(10)]; random.nextBytes(data); BytesWritable value = new BytesWritable(data); writer.append(key, value); } } finally { writer.close(); } // try splitting the file in a variety of sizes InputFormat<IntWritable, BytesWritable> format = new SequenceFileInputFormat<IntWritable, BytesWritable>(); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1; // LOG.info("splitting: requesting = " + numSplits); InputSplit[] splits = format.getSplits(job, numSplits); // LOG.info("splitting: got = " + splits.length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.length; j++) { RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(splits[j], job, reporter); try { int count = 0; while (reader.next(key, value)) { // if (bits.get(key.get())) { // LOG.info("splits["+j+"]="+splits[j]+" : " + // key.get()); // LOG.info("@"+reader.getPos()); // } assertFalse("Key in multiple partitions.", bits.get(key.get())); bits.set(key.get()); count++; } // LOG.info("splits["+j+"]="+splits[j]+" count=" + // count); } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } } }
public class TestFileOutputCommitter extends TestCase { private static Path outDir = new Path(System.getProperty("test.build.data", "/tmp"), "output"); // A random task attempt id for testing. private static String attempt = "attempt_200707121733_0001_m_000000_0"; private static TaskAttemptID taskID = TaskAttemptID.forName(attempt); private Text key1 = new Text("key1"); private Text key2 = new Text("key2"); private Text val1 = new Text("val1"); private Text val2 = new Text("val2"); @SuppressWarnings("unchecked") private void writeOutput(RecordWriter theRecordWriter, Reporter reporter) throws IOException { NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(reporter); } } private void setConfForFileOutputCommitter(JobConf job) { job.set(JobContext.TASK_ATTEMPT_ID, attempt); job.setOutputCommitter(FileOutputCommitter.class); FileOutputFormat.setOutputPath(job, outDir); } @SuppressWarnings("unchecked") public void testCommitter() throws Exception { JobConf job = new JobConf(); setConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext)); committer.setupJob(jContext); committer.setupTask(tContext); String file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.NULL; // write output FileSystem localFs = FileSystem.getLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter); writeOutput(theRecordWriter, reporter); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output File expectedFile = new File(new Path(outDir, file).toString()); StringBuffer expectedOutput = new StringBuffer(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); expectedOutput.append(val2).append("\n"); expectedOutput.append(key2).append("\n"); expectedOutput.append(key1).append("\n"); expectedOutput.append(key2).append('\t').append(val2).append("\n"); String output = UtilsForTests.slurp(expectedFile); assertEquals(output, expectedOutput.toString()); FileUtil.fullyDelete(new File(outDir.toString())); } public void testAbort() throws IOException { JobConf job = new JobConf(); setConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext)); // do setup committer.setupJob(jContext); committer.setupTask(tContext); String file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.NULL; // write output FileSystem localFs = FileSystem.getLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter); writeOutput(theRecordWriter, reporter); // do abort committer.abortTask(tContext); File expectedFile = new File(new Path(committer.getTempTaskOutputPath(tContext), file).toString()); assertFalse("task temp dir still exists", expectedFile.exists()); committer.abortJob(jContext, JobStatus.State.FAILED); expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString()); assertFalse("job temp dir still exists", expectedFile.exists()); assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length); FileUtil.fullyDelete(new File(outDir.toString())); } public static class FakeFileSystem extends RawLocalFileSystem { public FakeFileSystem() { super(); } public URI getUri() { return URI.create("faildel:///"); } @Override public boolean delete(Path p, boolean recursive) throws IOException { throw new IOException("fake delete failed"); } } public void testFailAbort() throws IOException { JobConf job = new JobConf(); job.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///"); job.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class); setConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext)); // do setup committer.setupJob(jContext); committer.setupTask(tContext); String file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.NULL; // write output FileSystem localFs = new FakeFileSystem(); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter); writeOutput(theRecordWriter, reporter); // do abort Throwable th = null; try { committer.abortTask(tContext); } catch (IOException ie) { th = ie; } assertNotNull(th); assertTrue(th instanceof IOException); assertTrue(th.getMessage().contains("fake delete failed")); File jobTmpDir = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString()); File taskTmpDir = new File(jobTmpDir, "_" + taskID); File expectedFile = new File(taskTmpDir, file); assertTrue(expectedFile + " does not exists", expectedFile.exists()); th = null; try { committer.abortJob(jContext, JobStatus.State.FAILED); } catch (IOException ie) { th = ie; } assertNotNull(th); assertTrue(th instanceof IOException); assertTrue(th.getMessage().contains("fake delete failed")); assertTrue("job temp dir does not exists", jobTmpDir.exists()); } }