Beispiel #1
0
  int start(
      Configuration conf, String traceIn, Path ioPath, long genbytes, UserResolver userResolver)
      throws IOException, InterruptedException {
    InputStream trace = null;
    try {
      Path scratchDir = new Path(ioPath, conf.get(GRIDMIX_OUT_DIR, "gridmix"));
      final FileSystem scratchFs = scratchDir.getFileSystem(conf);
      scratchFs.mkdirs(scratchDir, new FsPermission((short) 0777));
      scratchFs.setPermission(scratchDir, new FsPermission((short) 0777));
      // add shutdown hook for SIGINT, etc.
      Runtime.getRuntime().addShutdownHook(sdh);
      CountDownLatch startFlag = new CountDownLatch(1);
      try {
        // Create, start job submission threads
        startThreads(conf, traceIn, ioPath, scratchDir, startFlag, userResolver);
        // Write input data if specified
        if (genbytes > 0) {
          writeInputData(genbytes, ioPath);
        }
        // scan input dir contents
        submitter.refreshFilePool();
        factory.start();
        statistics.start();
      } catch (Throwable e) {
        LOG.error("Startup failed", e);
        if (factory != null) factory.abort(); // abort pipeline
      } finally {
        // signal for factory to start; sets start time
        startFlag.countDown();
      }
      if (factory != null) {
        // wait for input exhaustion
        factory.join(Long.MAX_VALUE);
        final Throwable badTraceException = factory.error();
        if (null != badTraceException) {
          LOG.error("Error in trace", badTraceException);
          throw new IOException("Error in trace", badTraceException);
        }
        // wait for pending tasks to be submitted
        submitter.shutdown();
        submitter.join(Long.MAX_VALUE);
        // wait for running tasks to complete
        monitor.shutdown();
        monitor.join(Long.MAX_VALUE);

        statistics.shutdown();
        statistics.join(Long.MAX_VALUE);
      }
    } finally {
      IOUtils.cleanup(LOG, trace);
    }
    return 0;
  }
Beispiel #2
0
  /**
   * Write random bytes at the path provided.
   *
   * @see org.apache.hadoop.mapred.gridmix.GenerateData
   */
  protected void writeInputData(long genbytes, Path ioPath)
      throws IOException, InterruptedException {
    final Configuration conf = getConf();
    final GridmixJob genData = new GenerateData(conf, ioPath, genbytes);
    submitter.add(genData);
    LOG.info("Generating " + StringUtils.humanReadableInt(genbytes) + " of test data...");
    // TODO add listeners, use for job dependencies
    TimeUnit.SECONDS.sleep(10);
    try {
      genData.getJob().waitForCompletion(false);
    } catch (ClassNotFoundException e) {
      throw new IOException("Internal error", e);
    }
    if (!genData.getJob().isSuccessful()) {
      throw new IOException("Data generation failed!");
    }

    FsShell shell = new FsShell(conf);
    try {
      LOG.info("Changing the permissions for inputPath " + ioPath.toString());
      shell.run(new String[] {"-chmod", "-R", "777", ioPath.toString()});
    } catch (Exception e) {
      LOG.error("Couldnt change the file permissions ", e);
      throw new IOException(e);
    }
    LOG.info("Done.");
  }
Beispiel #3
0
  /**
   * Create each component in the pipeline and start it.
   *
   * @param conf Configuration data, no keys specific to this context
   * @param traceIn Either a Path to the trace data or "-" for stdin
   * @param ioPath <ioPath>/input/ is the dir from which input data is read and
   *     <ioPath>/distributedCache/ is the gridmix distributed cache directory.
   * @param scratchDir Path into which job output is written
   * @param startFlag Semaphore for starting job trace pipeline
   */
  private void startThreads(
      Configuration conf,
      String traceIn,
      Path ioPath,
      Path scratchDir,
      CountDownLatch startFlag,
      UserResolver userResolver)
      throws IOException {
    try {
      Path inputDir = getGridmixInputDataPath(ioPath);
      GridmixJobSubmissionPolicy policy = getJobSubmissionPolicy(conf);
      LOG.info(" Submission policy is " + policy.name());
      statistics = new Statistics(conf, policy.getPollingInterval(), startFlag);
      monitor = createJobMonitor(statistics, conf);
      int noOfSubmitterThreads =
          (policy == GridmixJobSubmissionPolicy.SERIAL)
              ? 1
              : Runtime.getRuntime().availableProcessors() + 1;

      int numThreads = conf.getInt(GRIDMIX_SUB_THR, noOfSubmitterThreads);
      int queueDep = conf.getInt(GRIDMIX_QUE_DEP, 5);
      submitter =
          createJobSubmitter(
              monitor,
              numThreads,
              queueDep,
              new FilePool(conf, inputDir),
              userResolver,
              statistics);
      distCacheEmulator = new DistributedCacheEmulator(conf, ioPath);

      factory = createJobFactory(submitter, traceIn, scratchDir, conf, startFlag, userResolver);
      factory.jobCreator.setDistCacheEmulator(distCacheEmulator);

      if (policy == GridmixJobSubmissionPolicy.SERIAL) {
        statistics.addJobStatsListeners(factory);
      } else {
        statistics.addClusterStatsObservers(factory);
      }

      // add the gridmix run summarizer to the statistics
      statistics.addJobStatsListeners(summarizer.getExecutionSummarizer());
      statistics.addClusterStatsObservers(summarizer.getClusterSummarizer());

      monitor.start();
      submitter.start();
    } catch (Exception e) {
      LOG.error(" Exception at start ", e);
      throw new IOException(e);
    }
  }
Beispiel #4
0
  /**
   * Called when a new submission message is received. Unpacks the job data and sends the
   * information as a SubmissionData to a JobSubmitter for bridging into the Heritrix submission
   * system.
   */
  public synchronized void onMessage(Message msg) {
    if (msg instanceof MapMessage) {
      UrlSubmissionMsg subMsg = new UrlSubmissionMsg((MapMessage) msg);
      try {
        if (!subMsg.validate()) {
          throw new JMSException("Invalid UrlSubmissionMsg");
        }
        SubmissionData data = new SubmissionData();
        data.setMetaName(subMsg.getJobID());
        data.setSeeds(subMsg.getUrl());
        data.setJobDescription(subMsg.getDescription());

        CrawlJob job = jobSubmitter.submit(data);
        JobFinishedListener listener = new JobFinishedListener(data, msg, this);
        inProcess = true;
        handler.addJob(job);

        CrawlController controller = job.getController();
        // This sucks, but an alternative would require heavy hacking.
        while (controller == null) {
          Thread.sleep(50);
          controller = job.getController();
        }
        controller.addCrawlStatusListener(listener);

        sendJobStarted(subMsg.getJobID(), subMsg.getUrl());

        // Only take one message at a time.
        while (inProcess) {
          try {
            this.wait();
            if (!inProcess) {
              break;
            }
          } catch (InterruptedException e) {
          }
        }

      } catch (JMSException e) {
        e.printStackTrace();
      } catch (Exception e) {
        // Submission went awry
        e.printStackTrace();
      }
    }
  } // - onMessage
Beispiel #5
0
  /**
   * Create each component in the pipeline and start it.
   *
   * @param conf Configuration data, no keys specific to this context
   * @param traceIn Either a Path to the trace data or "-" for stdin
   * @param ioPath Path from which input data is read
   * @param scratchDir Path into which job output is written
   * @param startFlag Semaphore for starting job trace pipeline
   */
  private void startThreads(
      Configuration conf,
      String traceIn,
      Path ioPath,
      Path scratchDir,
      CountDownLatch startFlag,
      UserResolver userResolver)
      throws IOException {
    try {
      GridmixJobSubmissionPolicy policy =
          GridmixJobSubmissionPolicy.getPolicy(conf, GridmixJobSubmissionPolicy.STRESS);
      LOG.info(" Submission policy is " + policy.name());
      statistics = new Statistics(conf, policy.getPollingInterval(), startFlag);
      monitor = createJobMonitor(statistics);
      int noOfSubmitterThreads =
          (policy == GridmixJobSubmissionPolicy.SERIAL)
              ? 1
              : Runtime.getRuntime().availableProcessors() + 1;

      submitter =
          createJobSubmitter(
              monitor,
              conf.getInt(GRIDMIX_SUB_THR, noOfSubmitterThreads),
              conf.getInt(GRIDMIX_QUE_DEP, 5),
              new FilePool(conf, ioPath),
              userResolver,
              statistics);

      factory = createJobFactory(submitter, traceIn, scratchDir, conf, startFlag, userResolver);
      if (policy == GridmixJobSubmissionPolicy.SERIAL) {
        statistics.addJobStatsListeners(factory);
      } else {
        statistics.addClusterStatsObservers(factory);
      }

      monitor.start();
      submitter.start();
    } catch (Exception e) {
      LOG.error(" Exception at start ", e);
      throw new IOException(e);
    }
  }
Beispiel #6
0
  // Launch Input/DistCache Data Generation job and wait for completion
  void launchGridmixJob(GridmixJob job) throws IOException, InterruptedException {
    submitter.add(job);

    // TODO add listeners, use for job dependencies
    try {
      while (!job.isSubmitted()) {
        try {
          Thread.sleep(100); // sleep
        } catch (InterruptedException ie) {
        }
      }
      // wait for completion
      job.getJob().waitForCompletion(false);
    } catch (ClassNotFoundException e) {
      throw new IOException("Internal error", e);
    }
    if (!job.getJob().isSuccessful()) {
      throw new IOException(job.getJob().getJobName() + " job failed!");
    }
  }
Beispiel #7
0
  /**
   * @param conf gridmix configuration
   * @param traceIn trace file path(if it is '-', then trace comes from the stream stdin)
   * @param ioPath Working directory for gridmix. GenerateData job will generate data in the
   *     directory <ioPath>/input/ and distributed cache data is generated in the directory
   *     <ioPath>/distributedCache/, if -generate option is specified.
   * @param genbytes size of input data to be generated under the directory <ioPath>/input/
   * @param userResolver gridmix user resolver
   * @param generate true if -generate option was specified
   * @return exit code
   * @throws IOException
   * @throws InterruptedException
   */
  int start(
      Configuration conf,
      String traceIn,
      Path ioPath,
      long genbytes,
      UserResolver userResolver,
      boolean generate)
      throws IOException, InterruptedException {
    DataStatistics stats = null;
    InputStream trace = null;
    ioPath = ioPath.makeQualified(ioPath.getFileSystem(conf));

    try {
      Path scratchDir = new Path(ioPath, conf.get(GRIDMIX_OUT_DIR, "gridmix"));

      // add shutdown hook for SIGINT, etc.
      Runtime.getRuntime().addShutdownHook(sdh);
      CountDownLatch startFlag = new CountDownLatch(1);
      try {
        // Create, start job submission threads
        startThreads(conf, traceIn, ioPath, scratchDir, startFlag, userResolver);

        Path inputDir = getGridmixInputDataPath(ioPath);

        // Write input data if specified
        if (genbytes > 0) {
          writeInputData(genbytes, inputDir);
        }

        // publish the data statistics
        stats = GenerateData.publishDataStatistics(inputDir, genbytes, conf);

        // scan input dir contents
        submitter.refreshFilePool();

        // set up the needed things for emulation of various loads
        int exitCode = setupEmulation(conf, traceIn, scratchDir, ioPath, generate);
        if (exitCode != 0) {
          return exitCode;
        }

        // start the summarizer
        summarizer.start(conf);

        factory.start();
        statistics.start();
      } catch (Throwable e) {
        LOG.error("Startup failed", e);
        if (factory != null) factory.abort(); // abort pipeline
      } finally {
        // signal for factory to start; sets start time
        startFlag.countDown();
      }
      if (factory != null) {
        // wait for input exhaustion
        factory.join(Long.MAX_VALUE);
        final Throwable badTraceException = factory.error();
        if (null != badTraceException) {
          LOG.error("Error in trace", badTraceException);
          throw new IOException("Error in trace", badTraceException);
        }
        // wait for pending tasks to be submitted
        submitter.shutdown();
        submitter.join(Long.MAX_VALUE);
        // wait for running tasks to complete
        monitor.shutdown();
        monitor.join(Long.MAX_VALUE);

        statistics.shutdown();
        statistics.join(Long.MAX_VALUE);
      }
    } finally {
      if (factory != null) {
        summarizer.finalize(factory, traceIn, genbytes, userResolver, stats, conf);
      }
      IOUtils.cleanup(LOG, trace);
    }
    return 0;
  }