int start( Configuration conf, String traceIn, Path ioPath, long genbytes, UserResolver userResolver) throws IOException, InterruptedException { InputStream trace = null; try { Path scratchDir = new Path(ioPath, conf.get(GRIDMIX_OUT_DIR, "gridmix")); final FileSystem scratchFs = scratchDir.getFileSystem(conf); scratchFs.mkdirs(scratchDir, new FsPermission((short) 0777)); scratchFs.setPermission(scratchDir, new FsPermission((short) 0777)); // add shutdown hook for SIGINT, etc. Runtime.getRuntime().addShutdownHook(sdh); CountDownLatch startFlag = new CountDownLatch(1); try { // Create, start job submission threads startThreads(conf, traceIn, ioPath, scratchDir, startFlag, userResolver); // Write input data if specified if (genbytes > 0) { writeInputData(genbytes, ioPath); } // scan input dir contents submitter.refreshFilePool(); factory.start(); statistics.start(); } catch (Throwable e) { LOG.error("Startup failed", e); if (factory != null) factory.abort(); // abort pipeline } finally { // signal for factory to start; sets start time startFlag.countDown(); } if (factory != null) { // wait for input exhaustion factory.join(Long.MAX_VALUE); final Throwable badTraceException = factory.error(); if (null != badTraceException) { LOG.error("Error in trace", badTraceException); throw new IOException("Error in trace", badTraceException); } // wait for pending tasks to be submitted submitter.shutdown(); submitter.join(Long.MAX_VALUE); // wait for running tasks to complete monitor.shutdown(); monitor.join(Long.MAX_VALUE); statistics.shutdown(); statistics.join(Long.MAX_VALUE); } } finally { IOUtils.cleanup(LOG, trace); } return 0; }
/** * Write random bytes at the path provided. * * @see org.apache.hadoop.mapred.gridmix.GenerateData */ protected void writeInputData(long genbytes, Path ioPath) throws IOException, InterruptedException { final Configuration conf = getConf(); final GridmixJob genData = new GenerateData(conf, ioPath, genbytes); submitter.add(genData); LOG.info("Generating " + StringUtils.humanReadableInt(genbytes) + " of test data..."); // TODO add listeners, use for job dependencies TimeUnit.SECONDS.sleep(10); try { genData.getJob().waitForCompletion(false); } catch (ClassNotFoundException e) { throw new IOException("Internal error", e); } if (!genData.getJob().isSuccessful()) { throw new IOException("Data generation failed!"); } FsShell shell = new FsShell(conf); try { LOG.info("Changing the permissions for inputPath " + ioPath.toString()); shell.run(new String[] {"-chmod", "-R", "777", ioPath.toString()}); } catch (Exception e) { LOG.error("Couldnt change the file permissions ", e); throw new IOException(e); } LOG.info("Done."); }
/** * Create each component in the pipeline and start it. * * @param conf Configuration data, no keys specific to this context * @param traceIn Either a Path to the trace data or "-" for stdin * @param ioPath <ioPath>/input/ is the dir from which input data is read and * <ioPath>/distributedCache/ is the gridmix distributed cache directory. * @param scratchDir Path into which job output is written * @param startFlag Semaphore for starting job trace pipeline */ private void startThreads( Configuration conf, String traceIn, Path ioPath, Path scratchDir, CountDownLatch startFlag, UserResolver userResolver) throws IOException { try { Path inputDir = getGridmixInputDataPath(ioPath); GridmixJobSubmissionPolicy policy = getJobSubmissionPolicy(conf); LOG.info(" Submission policy is " + policy.name()); statistics = new Statistics(conf, policy.getPollingInterval(), startFlag); monitor = createJobMonitor(statistics, conf); int noOfSubmitterThreads = (policy == GridmixJobSubmissionPolicy.SERIAL) ? 1 : Runtime.getRuntime().availableProcessors() + 1; int numThreads = conf.getInt(GRIDMIX_SUB_THR, noOfSubmitterThreads); int queueDep = conf.getInt(GRIDMIX_QUE_DEP, 5); submitter = createJobSubmitter( monitor, numThreads, queueDep, new FilePool(conf, inputDir), userResolver, statistics); distCacheEmulator = new DistributedCacheEmulator(conf, ioPath); factory = createJobFactory(submitter, traceIn, scratchDir, conf, startFlag, userResolver); factory.jobCreator.setDistCacheEmulator(distCacheEmulator); if (policy == GridmixJobSubmissionPolicy.SERIAL) { statistics.addJobStatsListeners(factory); } else { statistics.addClusterStatsObservers(factory); } // add the gridmix run summarizer to the statistics statistics.addJobStatsListeners(summarizer.getExecutionSummarizer()); statistics.addClusterStatsObservers(summarizer.getClusterSummarizer()); monitor.start(); submitter.start(); } catch (Exception e) { LOG.error(" Exception at start ", e); throw new IOException(e); } }
/** * Called when a new submission message is received. Unpacks the job data and sends the * information as a SubmissionData to a JobSubmitter for bridging into the Heritrix submission * system. */ public synchronized void onMessage(Message msg) { if (msg instanceof MapMessage) { UrlSubmissionMsg subMsg = new UrlSubmissionMsg((MapMessage) msg); try { if (!subMsg.validate()) { throw new JMSException("Invalid UrlSubmissionMsg"); } SubmissionData data = new SubmissionData(); data.setMetaName(subMsg.getJobID()); data.setSeeds(subMsg.getUrl()); data.setJobDescription(subMsg.getDescription()); CrawlJob job = jobSubmitter.submit(data); JobFinishedListener listener = new JobFinishedListener(data, msg, this); inProcess = true; handler.addJob(job); CrawlController controller = job.getController(); // This sucks, but an alternative would require heavy hacking. while (controller == null) { Thread.sleep(50); controller = job.getController(); } controller.addCrawlStatusListener(listener); sendJobStarted(subMsg.getJobID(), subMsg.getUrl()); // Only take one message at a time. while (inProcess) { try { this.wait(); if (!inProcess) { break; } } catch (InterruptedException e) { } } } catch (JMSException e) { e.printStackTrace(); } catch (Exception e) { // Submission went awry e.printStackTrace(); } } } // - onMessage
/** * Create each component in the pipeline and start it. * * @param conf Configuration data, no keys specific to this context * @param traceIn Either a Path to the trace data or "-" for stdin * @param ioPath Path from which input data is read * @param scratchDir Path into which job output is written * @param startFlag Semaphore for starting job trace pipeline */ private void startThreads( Configuration conf, String traceIn, Path ioPath, Path scratchDir, CountDownLatch startFlag, UserResolver userResolver) throws IOException { try { GridmixJobSubmissionPolicy policy = GridmixJobSubmissionPolicy.getPolicy(conf, GridmixJobSubmissionPolicy.STRESS); LOG.info(" Submission policy is " + policy.name()); statistics = new Statistics(conf, policy.getPollingInterval(), startFlag); monitor = createJobMonitor(statistics); int noOfSubmitterThreads = (policy == GridmixJobSubmissionPolicy.SERIAL) ? 1 : Runtime.getRuntime().availableProcessors() + 1; submitter = createJobSubmitter( monitor, conf.getInt(GRIDMIX_SUB_THR, noOfSubmitterThreads), conf.getInt(GRIDMIX_QUE_DEP, 5), new FilePool(conf, ioPath), userResolver, statistics); factory = createJobFactory(submitter, traceIn, scratchDir, conf, startFlag, userResolver); if (policy == GridmixJobSubmissionPolicy.SERIAL) { statistics.addJobStatsListeners(factory); } else { statistics.addClusterStatsObservers(factory); } monitor.start(); submitter.start(); } catch (Exception e) { LOG.error(" Exception at start ", e); throw new IOException(e); } }
// Launch Input/DistCache Data Generation job and wait for completion void launchGridmixJob(GridmixJob job) throws IOException, InterruptedException { submitter.add(job); // TODO add listeners, use for job dependencies try { while (!job.isSubmitted()) { try { Thread.sleep(100); // sleep } catch (InterruptedException ie) { } } // wait for completion job.getJob().waitForCompletion(false); } catch (ClassNotFoundException e) { throw new IOException("Internal error", e); } if (!job.getJob().isSuccessful()) { throw new IOException(job.getJob().getJobName() + " job failed!"); } }
/** * @param conf gridmix configuration * @param traceIn trace file path(if it is '-', then trace comes from the stream stdin) * @param ioPath Working directory for gridmix. GenerateData job will generate data in the * directory <ioPath>/input/ and distributed cache data is generated in the directory * <ioPath>/distributedCache/, if -generate option is specified. * @param genbytes size of input data to be generated under the directory <ioPath>/input/ * @param userResolver gridmix user resolver * @param generate true if -generate option was specified * @return exit code * @throws IOException * @throws InterruptedException */ int start( Configuration conf, String traceIn, Path ioPath, long genbytes, UserResolver userResolver, boolean generate) throws IOException, InterruptedException { DataStatistics stats = null; InputStream trace = null; ioPath = ioPath.makeQualified(ioPath.getFileSystem(conf)); try { Path scratchDir = new Path(ioPath, conf.get(GRIDMIX_OUT_DIR, "gridmix")); // add shutdown hook for SIGINT, etc. Runtime.getRuntime().addShutdownHook(sdh); CountDownLatch startFlag = new CountDownLatch(1); try { // Create, start job submission threads startThreads(conf, traceIn, ioPath, scratchDir, startFlag, userResolver); Path inputDir = getGridmixInputDataPath(ioPath); // Write input data if specified if (genbytes > 0) { writeInputData(genbytes, inputDir); } // publish the data statistics stats = GenerateData.publishDataStatistics(inputDir, genbytes, conf); // scan input dir contents submitter.refreshFilePool(); // set up the needed things for emulation of various loads int exitCode = setupEmulation(conf, traceIn, scratchDir, ioPath, generate); if (exitCode != 0) { return exitCode; } // start the summarizer summarizer.start(conf); factory.start(); statistics.start(); } catch (Throwable e) { LOG.error("Startup failed", e); if (factory != null) factory.abort(); // abort pipeline } finally { // signal for factory to start; sets start time startFlag.countDown(); } if (factory != null) { // wait for input exhaustion factory.join(Long.MAX_VALUE); final Throwable badTraceException = factory.error(); if (null != badTraceException) { LOG.error("Error in trace", badTraceException); throw new IOException("Error in trace", badTraceException); } // wait for pending tasks to be submitted submitter.shutdown(); submitter.join(Long.MAX_VALUE); // wait for running tasks to complete monitor.shutdown(); monitor.join(Long.MAX_VALUE); statistics.shutdown(); statistics.join(Long.MAX_VALUE); } } finally { if (factory != null) { summarizer.finalize(factory, traceIn, genbytes, userResolver, stats, conf); } IOUtils.cleanup(LOG, trace); } return 0; }