/** Process incoming heartbeat messages from the task trackers. */ public synchronized int emitHeartbeat(TaskTrackerStatus trackerStatus, boolean initialContact) { String trackerName = trackerStatus.getTrackerName(); trackerStatus.setLastSeen(System.currentTimeMillis()); synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { boolean seenBefore = updateTaskTrackerStatus(trackerName, trackerStatus); if (initialContact) { // If it's first contact, then clear out any state hanging around if (seenBefore) { lostTaskTracker(trackerName); } } else { // If not first contact, there should be some record of the tracker if (!seenBefore) { return InterTrackerProtocol.UNKNOWN_TASKTRACKER; } } if (initialContact) { trackerExpiryQueue.add(trackerStatus); } } } updateTaskStatuses(trackerStatus); // LOG.info("Got heartbeat from "+trackerName); return InterTrackerProtocol.TRACKERS_OK; }
/** For debugging. */ public static void main(String[] args) throws Exception { final String usage = "NutchBean query"; if (args.length == 0) { System.err.println(usage); System.exit(-1); } final Configuration conf = NutchConfiguration.create(); final NutchBean bean = new NutchBean(conf); try { final Query query = Query.parse(args[0], conf); final Hits hits = bean.search(query, 10); System.out.println("Total hits: " + hits.getTotal()); final int length = (int) Math.min(hits.getTotal(), 10); final Hit[] show = hits.getHits(0, length); final HitDetails[] details = bean.getDetails(show); final Summary[] summaries = bean.getSummary(details, query); for (int i = 0; i < hits.getLength(); i++) { System.out.println(" " + i + " " + details[i] + "\n" + summaries[i]); } } catch (Throwable t) { LOG.error("Exception occured while executing search: " + t, t); System.exit(1); } System.exit(0); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: test.icde12.HadoopJoin <in> <out>"); System.exit(2); } Job job = new Job(conf, "hadoop join"); job.setJarByClass(HadoopJoin.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setPartitionerClass(ICDEPartitioner.class); // WritableComparator.define(Text.class,new ICDEComparator()); job.setSortComparatorClass(ICDEComparator.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(8); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
private static void run(Callable c, boolean read, int size) { // Count all i/o time from here, including all retry overheads long start_io_ms = System.currentTimeMillis(); while (true) { try { long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats c.call(); TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS); break; // Explicitly ignore the following exceptions but // fail on the rest IOExceptions } catch (EOFException e) { ignoreAndWait(e, false); } catch (SocketTimeoutException e) { ignoreAndWait(e, false); } catch (S3Exception e) { // Preserve S3Exception before IOException // Since this is tricky code - we are supporting different HDFS version // New version declares S3Exception as IOException // But old versions (0.20.xxx) declares it as RuntimeException // So we have to catch it before IOException !!! ignoreAndWait(e, false); } catch (IOException e) { ignoreAndWait(e, true); } catch (Exception e) { throw Log.errRTExcept(e); } } }
public static void main(String[] args) throws Exception { if (ToolRunner.run(new FeatureMatching(), args) == 1) { System.out.println(".......Feature Match failure........"); System.exit(1); } System.exit(0); }
@Override public int run(String[] args) throws Exception { if (args.length < 4) { writeUsage(); return 1; } Path secretsPath = new Path(args[0]); Path saltFilePath = new Path(args[1]); Path inputPath = new Path(args[2]); Path outputPath = new Path(args[3]); // Make sure the salt file exists generateSaltIfNeeded(saltFilePath, secretsPath); // Configure the job Job job = configureJob(secretsPath, saltFilePath, inputPath, outputPath); // Run it long startTime = System.currentTimeMillis(); job.submit(); if (job.waitForCompletion(true)) { System.out.printf( "Done obfuscating - took %d seconds.\n", (System.currentTimeMillis() - startTime) / 1000); } else { System.err.printf("Job finished with errors: %s\n", job.getStatus().getFailureInfo()); return 2; } return 0; }
public void inject(Path crawlDb, Path urlDir) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); if (LOG.isInfoEnabled()) { LOG.info("Injector: starting at " + sdf.format(start)); LOG.info("Injector: crawlDb: " + crawlDb); LOG.info("Injector: urlDir: " + urlDir); } Path tempDir = new Path( getConf().get("mapred.temp.dir", ".") + "/inject-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); // map text input file to a <url,CrawlDatum> file if (LOG.isInfoEnabled()) { LOG.info("Injector: Converting injected urls to crawl db entries."); } JobConf sortJob = new NutchJob(getConf()); sortJob.setJobName("inject " + urlDir); FileInputFormat.addInputPath(sortJob, urlDir); sortJob.setMapperClass(InjectMapper.class); FileOutputFormat.setOutputPath(sortJob, tempDir); sortJob.setOutputFormat(SequenceFileOutputFormat.class); sortJob.setOutputKeyClass(Text.class); sortJob.setOutputValueClass(CrawlDatum.class); sortJob.setLong("injector.current.time", System.currentTimeMillis()); RunningJob mapJob = JobClient.runJob(sortJob); long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue(); long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue(); LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered); LOG.info( "Injector: total number of urls injected after normalization and filtering: " + urlsInjected); // merge with existing crawl db if (LOG.isInfoEnabled()) { LOG.info("Injector: Merging injected urls into crawl db."); } JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb); FileInputFormat.addInputPath(mergeJob, tempDir); mergeJob.setReducerClass(InjectReducer.class); JobClient.runJob(mergeJob); CrawlDb.install(mergeJob, crawlDb); // clean up FileSystem fs = FileSystem.get(getConf()); fs.delete(tempDir, true); long end = System.currentTimeMillis(); LOG.info( "Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
static { System.load( (new File("/home/gathors/proj/v-opencv/FeatureMatching/libs/libopencv_java2412.so")) .getAbsolutePath()); System.load( (new File("/home/gathors/proj/v-opencv/FeatureMatching/libs/libopencv_highgui.so")) .getAbsolutePath()); }
/** Sets up configuration based on params */ private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) { if (argConf.get("file") == null) { logger.fatal("Missing file parameter"); System.exit(1); } if (argConf.get("hdfs_base_path") == null) { logger.fatal("Missing HDFS base path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("hdfs_temp_path") == null) { logger.fatal("Missing HDFS temp path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("local_temp_path") == null) { logger.fatal("Missing local temp path, check gestore-conf.xml"); System.exit(1); } // Input paramaters curConf.put("run_id", argConf.get("run", "")); curConf.put("task_id", argConf.get("task", "")); curConf.put("file_id", argConf.get("file")); curConf.put("local_path", argConf.get("path", "")); curConf.put("type", argConf.get("type", "l2r")); curConf.put("timestamp_start", argConf.get("timestamp_start", "1")); curConf.put( "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE))); curConf.put("delimiter", argConf.get("regex", "ID=.*")); curConf.put("taxon", argConf.get("taxon", "all")); curConf.put("intermediate", argConf.get("full_run", "false")); curConf.put("quick_add", argConf.get("quick_add", "false")); Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*"); curConf.put("format", argConf.get("format", "unknown")); curConf.put("split", argConf.get("split", "1")); curConf.put("copy", argConf.get("copy", "true")); // Constants curConf.put("base_path", argConf.get("hdfs_base_path")); curConf.put("temp_path", argConf.get("hdfs_temp_path")); curConf.put("local_temp_path", argConf.get("local_temp_path")); curConf.put("db_name_files", argConf.get("hbase_file_table")); curConf.put("db_name_runs", argConf.get("hbase_run_table")); curConf.put("db_name_updates", argConf.get("hbase_db_update_table")); // Timestamps Date currentTime = new Date(); Date endDate = new Date(new Long(curConf.get("timestamp_stop"))); curConf.put("timestamp_real", Long.toString(currentTime.getTime())); return true; }
public void setup(Context context) { try { // System.setProperty("java.library.path", "/home/gathors/proj/libs"); // System.loadLibrary(Core.NATIVE_xxx); // System.loacLibrary("/home/gathors/proj/libs/opencv-300.jar"); } catch (UnsatisfiedLinkError e) { System.err.println("\nNATIVE LIBRARY failed to load..."); System.err.println("ERROR:" + e); System.err.println("NATIVE_LIBRARY_NAME:" + Core.NATIVE_LIBRARY_NAME); System.err.println("#" + System.getProperty("java.library.path")); System.exit(1); } }
/** * The run method lives for the life of the JobTracker, and removes Jobs that are not still * running, but which finished a long time ago. */ public void run() { while (shouldRun) { try { Thread.sleep(RETIRE_JOB_CHECK_INTERVAL); } catch (InterruptedException ie) { } synchronized (jobs) { synchronized (jobInitQueue) { synchronized (jobsByArrival) { for (Iterator it = jobs.keySet().iterator(); it.hasNext(); ) { String jobid = (String) it.next(); JobInProgress job = (JobInProgress) jobs.get(jobid); if (job.getStatus().getRunState() != JobStatus.RUNNING && job.getStatus().getRunState() != JobStatus.PREP && (job.getFinishTime() + RETIRE_JOB_INTERVAL < System.currentTimeMillis())) { it.remove(); jobInitQueue.remove(job); jobsByArrival.remove(job); } } } } } } }
/** Implements basic throttling capabilities. */ public static class Throttler { double bytesPerSec; long lastTime = System.currentTimeMillis(); public Throttler(double bytesPerSec) { this.bytesPerSec = bytesPerSec; } public void incrementAndThrottle(int bytes) { if (bytesPerSec < 1) { // no throttle at all return; } long currentTime = System.currentTimeMillis(); long timeDiff = currentTime - lastTime; if (timeDiff == 0) { timeDiff = 1; } double bytesPerSec = (bytes / (double) timeDiff) * 1000; if (bytesPerSec > this.bytesPerSec) { // Throttle double exceededByFactorOf = bytesPerSec / this.bytesPerSec; try { long mustSleep = (long) ((exceededByFactorOf - 1) * timeDiff); Thread.sleep(mustSleep); } catch (InterruptedException e) { e.printStackTrace(); } } lastTime = System.currentTimeMillis(); } }
/** * Start the JobTracker process. This is used only for debugging. As a rule, JobTracker should be * run as part of the DFS Namenode process. */ public static void main(String argv[]) throws IOException, InterruptedException { if (argv.length != 0) { System.out.println("usage: JobTracker"); System.exit(-1); } startTracker(new Configuration()); }
/** Start the JobTracker process, listen on the indicated port */ JobTracker(Configuration conf) throws IOException { // // Grab some static constants // maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2); RETIRE_JOB_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.interval", 24 * 60 * 60 * 1000); RETIRE_JOB_CHECK_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.check", 60 * 1000); TASK_ALLOC_EPSILON = conf.getFloat("mapred.jobtracker.taskalloc.loadbalance.epsilon", 0.2f); PAD_FRACTION = conf.getFloat("mapred.jobtracker.taskalloc.capacitypad", 0.1f); MIN_SLOTS_FOR_PADDING = 3 * maxCurrentTasks; // This is a directory of temporary submission files. We delete it // on startup, and can delete any files that we're done with this.conf = conf; JobConf jobConf = new JobConf(conf); this.systemDir = jobConf.getSystemDir(); this.fs = FileSystem.get(conf); FileUtil.fullyDelete(fs, systemDir); fs.mkdirs(systemDir); // Same with 'localDir' except it's always on the local disk. jobConf.deleteLocalFiles(SUBDIR); // Set ports, start RPC servers, etc. InetSocketAddress addr = getAddress(conf); this.localMachine = addr.getHostName(); this.port = addr.getPort(); this.interTrackerServer = RPC.getServer(this, addr.getPort(), 10, false, conf); this.interTrackerServer.start(); Properties p = System.getProperties(); for (Iterator it = p.keySet().iterator(); it.hasNext(); ) { String key = (String) it.next(); String val = (String) p.getProperty(key); LOG.info("Property '" + key + "' is " + val); } this.infoPort = conf.getInt("mapred.job.tracker.info.port", 50030); this.infoServer = new JobTrackerInfoServer(this, infoPort); this.infoServer.start(); this.startTime = System.currentTimeMillis(); new Thread(this.expireTrackers).start(); new Thread(this.retireJobs).start(); new Thread(this.initJobs).start(); }
public void configure(JobConf job) { this.jobConf = job; urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT); interval = jobConf.getInt("db.fetch.interval.default", 2592000); filters = new URLFilters(jobConf); scfilters = new ScoringFilters(jobConf); scoreInjected = jobConf.getFloat("db.score.injected", 1.0f); curTime = job.getLong("injector.current.time", System.currentTimeMillis()); }
/** * Add a file path to the current set of classpath entries. It adds the file to cache as well. * Intended to be used by user code. * * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archivefile} should be interpreted. */ public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs) throws IOException { String filepath = file.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.files"); conf.set( "mapred.job.classpath.files", classpath == null ? filepath : classpath + System.getProperty("path.separator") + filepath); URI uri = fs.makeQualified(file).toUri(); addCacheFile(uri, conf); }
/** * Get the archive entries in classpath as an array of Path * * @param conf Configuration that contains the classpath setting */ public static Path[] getArchiveClassPaths(Configuration conf) { String classpath = conf.get("mapred.job.classpath.archives"); if (classpath == null) return null; ArrayList list = Collections.list(new StringTokenizer(classpath, System.getProperty("path.separator"))); Path[] paths = new Path[list.size()]; for (int i = 0; i < list.size(); i++) { paths[i] = new Path((String) list.get(i)); } return paths; }
@Override public int doWork() { try { setupProcedureStore(); ExecutorService executor = Executors.newFixedThreadPool(numThreads); Future<?>[] futures = new Future<?>[numThreads]; // Start worker threads. long start = System.currentTimeMillis(); for (int i = 0; i < numThreads; i++) { futures[i] = executor.submit(this.new Worker(start)); } boolean failure = false; try { for (Future<?> future : futures) { long timeout = start + WORKER_THREADS_TIMEOUT_SEC * 1000 - System.currentTimeMillis(); failure |= (future.get(timeout, TimeUnit.MILLISECONDS).equals(EXIT_FAILURE)); } } catch (Exception e) { System.err.println("Exception in worker thread."); e.printStackTrace(); return EXIT_FAILURE; } executor.shutdown(); if (failure) { return EXIT_FAILURE; } long timeTaken = System.currentTimeMillis() - start; System.out.println("******************************************"); System.out.println("Num threads : " + numThreads); System.out.println("Num procedures : " + numProcs); System.out.println("Sync type : " + syncType); System.out.println("Time taken : " + (timeTaken / 1000.0f) + "sec"); System.out.println("******************************************"); return EXIT_SUCCESS; } catch (IOException e) { e.printStackTrace(); return EXIT_FAILURE; } finally { tearDownProcedureStore(); } }
private static void run(Callable c, boolean read, int size) { // Count all i/o time from here, including all retry overheads long start_io_ms = System.currentTimeMillis(); while (true) { try { long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats c.call(); TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS); break; // Explicitly ignore the following exceptions but // fail on the rest IOExceptions } catch (EOFException e) { ignoreAndWait(e, false); } catch (SocketTimeoutException e) { ignoreAndWait(e, false); } catch (IOException e) { ignoreAndWait(e, true); } catch (Exception e) { throw Log.errRTExcept(e); } } }
/** * Add an archive path to the current set of classpath entries. It adds the archive to cache as * well. Intended to be used by user code. * * @param archive Path of the archive to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archive} should be interpreted. */ public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs) throws IOException { String archivepath = archive.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.archives"); conf.set( "mapred.job.classpath.archives", classpath == null ? archivepath : classpath + System.getProperty("path.separator") + archivepath); URI uri = fs.makeQualified(archive).toUri(); addCacheArchive(uri, conf); }
/** * default class initialization * * @param fsuri path to Swift * @param conf Hadoop configuration * @throws IOException */ @Override public void initialize(URI fsuri, Configuration conf) throws IOException { super.initialize(fsuri, conf); setConf(conf); if (store == null) { store = new SwiftNativeFileSystemStore(); } this.uri = fsuri; this.workingDir = new Path("/user", System.getProperty("user.name")) .makeQualified(uri, new Path(System.getProperty("user.name"))); if (LOG.isDebugEnabled()) { LOG.debug( "Initializing SwiftNativeFileSystem against URI " + uri + " and working dir " + workingDir); } store.initialize(uri, conf); LOG.debug("SwiftFileSystem initialized"); }
private static URI addArchiveToClassPathHelper(Path archive, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.archives"); // the scheme/authority use ':' as separator. put the unqualified path in classpath String archivePath = archive.toUri().getPath(); conf.set( "mapred.job.classpath.archives", classpath == null ? archivePath : classpath + System.getProperty("path.separator") + archivePath); return archive.makeQualified(archive.getFileSystem(conf)).toUri(); }
/** * The run method lives for the life of the JobTracker, and removes TaskTrackers that have not * checked in for some time. */ public void run() { while (shouldRun) { // // Thread runs periodically to check whether trackers should be expired. // The sleep interval must be no more than half the maximum expiry time // for a task tracker. // try { Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL / 3); } catch (InterruptedException ie) { } // // Loop through all expired items in the queue // synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { long now = System.currentTimeMillis(); TaskTrackerStatus leastRecent = null; while ((trackerExpiryQueue.size() > 0) && ((leastRecent = (TaskTrackerStatus) trackerExpiryQueue.first()) != null) && (now - leastRecent.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL)) { // Remove profile from head of queue trackerExpiryQueue.remove(leastRecent); String trackerName = leastRecent.getTrackerName(); // Figure out if last-seen time should be updated, or if tracker is dead TaskTrackerStatus newProfile = (TaskTrackerStatus) taskTrackers.get(leastRecent.getTrackerName()); // Items might leave the taskTracker set through other means; the // status stored in 'taskTrackers' might be null, which means the // tracker has already been destroyed. if (newProfile != null) { if (now - newProfile.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL) { // Remove completely updateTaskTrackerStatus(trackerName, null); lostTaskTracker(leastRecent.getTrackerName()); } else { // Update time by inserting latest profile trackerExpiryQueue.add(newProfile); } } } } } } }
/** Prints out usage */ static void usage() { System.err.println( "Usage: hadoop com.tripadvisor.hadoop.BackupHdfs args\n" + " --hdfs-path path/on/hdfs\n" + " --local-path path/on/local/fs: path to hdfs backup\n" + " --preserve-path path/on/local/fs: path to preserve old files\n" + " [--no-preserve FILE]: list of file substrings to skip preserving\n" + " [--ignore-tables FILE]: list of tables to ignore\n" + " [--dry-run]: don't create any files on local fs\n" + " --date yesterday|last-day|last-week|UNIX-time-T\n" + " [--max-date UNIX-time-T]: don't backup any files newer than T\n" + " [--sleep N]: sleep N seconds after each file copy\n" + " [--max-bytes N]: don't back up more than N bytes\n"); System.exit(1); }
public int run(String[] args) throws Exception { Path tempDir = new Path("/user/akhfa/temp"); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(AuthorCounter.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, tempDir); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
public void configure(JobConf job) { // 'key' == sortInput for sort-input; key == sortOutput for sort-output key = deduceInputFile(job); if (key == sortOutput) { partitioner = new HashPartitioner<WritableComparable, Writable>(); // Figure the 'current' partition and no. of reduces of the 'sort' try { URI inputURI = new URI(job.get("map.input.file")); String inputFile = inputURI.getPath(); partition = Integer.valueOf(inputFile.substring(inputFile.lastIndexOf("part") + 5)).intValue(); noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1); } catch (Exception e) { System.err.println("Caught: " + e); System.exit(-1); } } }
// Transform the json-type feature to mat-type public static Mat json2mat(String json) { JsonParser parser = new JsonParser(); JsonElement parseTree = parser.parse(json); // Verify the input is JSON type if (!parseTree.isJsonObject()) { System.out.println("The input is not a JSON type...\nExiting..."); System.exit(1); } JsonObject jobj = parser.parse(json).getAsJsonObject(); if (jobj == null || !jobj.isJsonObject() || jobj.isJsonNull()) { return null; } // Detect broken/null features JsonElement r = jobj.get("rows"); if (r == null) { return null; } int rows = jobj.get("rows").getAsInt(); int cols = jobj.get("cols").getAsInt(); int type = jobj.get("type").getAsInt(); String data = jobj.get("data").getAsString(); String[] pixs = data.split(","); Mat descriptor = new Mat(rows, cols, type); for (String pix : pixs) { String[] tmp = pix.split(" "); int r_pos = Integer.valueOf(tmp[0]); int c_pos = Integer.valueOf(tmp[1]); double rgb = Double.valueOf(tmp[2]); descriptor.put(r_pos, c_pos, rgb); } return descriptor; }
public void testcheckOutputSpecsForbidRecordCompression() throws IOException { Job job = Job.getInstance(new Configuration(), "testcheckOutputSpecsForbidRecordCompression"); FileSystem fs = FileSystem.getLocal(job.getConfiguration()); Path outputdir = new Path(System.getProperty("test.build.data", "/tmp") + "/output"); fs.delete(outputdir, true); // Without outputpath, FileOutputFormat.checkoutputspecs will throw // InvalidJobConfException FileOutputFormat.setOutputPath(job, outputdir); // SequenceFileAsBinaryOutputFormat doesn't support record compression // It should throw an exception when checked by checkOutputSpecs SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); try { new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job); } catch (Exception e) { fail( "Block compression should be allowed for " + "SequenceFileAsBinaryOutputFormat:Caught " + e.getClass().getName()); } SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.RECORD); try { new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job); fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat"); } catch (InvalidJobConfException ie) { // expected } catch (Exception e) { fail( "Expected " + InvalidJobConfException.class.getName() + "but caught " + e.getClass().getName()); } }
// TODO: Can also collect #procs, time taken by each thread to measure fairness. @Override public Integer call() throws IOException { while (true) { if (workersFailed.get()) { return EXIT_FAILURE; } long procId = procIds.getAndIncrement(); if (procId >= numProcs) { break; } if (procId != 0 && procId % 10000 == 0) { long ms = System.currentTimeMillis() - start; System.out.println("Wrote " + procId + " procedures in " + StringUtils.humanTimeDiff(ms)); } try { if (procId > 0 && procId % numProcsPerWal == 0) { store.rollWriterForTesting(); System.out.println( "Starting new log : " + store.getActiveLogs().get(store.getActiveLogs().size() - 1)); } } catch (IOException ioe) { // Ask other threads to quit too. workersFailed.set(true); System.err.println("Exception when rolling log file. Current procId = " + procId); ioe.printStackTrace(); return EXIT_FAILURE; } ProcedureTestingUtility.TestProcedure proc = new ProcedureTestingUtility.TestProcedure(procId); proc.setData(serializedState); store.insert(proc, null); store.update(proc); } return EXIT_SUCCESS; }
public void testFormat() throws Exception { JobConf job = new JobConf(conf); FileSystem fs = FileSystem.getLocal(conf); Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Reporter reporter = Reporter.NULL; int seed = new Random().nextInt(); // LOG.info("seed = "+seed); Random random = new Random(seed); fs.delete(dir, true); FileInputFormat.setInputPaths(job, dir); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) { // LOG.info("creating; entries = " + length); // create a file with length entries SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class); try { for (int i = 0; i < length; i++) { IntWritable key = new IntWritable(i); byte[] data = new byte[random.nextInt(10)]; random.nextBytes(data); BytesWritable value = new BytesWritable(data); writer.append(key, value); } } finally { writer.close(); } // try splitting the file in a variety of sizes InputFormat<IntWritable, BytesWritable> format = new SequenceFileInputFormat<IntWritable, BytesWritable>(); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1; // LOG.info("splitting: requesting = " + numSplits); InputSplit[] splits = format.getSplits(job, numSplits); // LOG.info("splitting: got = " + splits.length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.length; j++) { RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(splits[j], job, reporter); try { int count = 0; while (reader.next(key, value)) { // if (bits.get(key.get())) { // LOG.info("splits["+j+"]="+splits[j]+" : " + // key.get()); // LOG.info("@"+reader.getPos()); // } assertFalse("Key in multiple partitions.", bits.get(key.get())); bits.set(key.get()); count++; } // LOG.info("splits["+j+"]="+splits[j]+" count=" + // count); } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } } }