/** * Implements the core-execution. Creates the file-list for copy, and launches the Hadoop-job, to * do the copy. * * @return Job handle * @throws Exception, on failure. */ public Job execute() throws Exception { assert inputOptions != null; assert getConf() != null; Job job = null; try { metaFolder = createMetaFolderPath(); jobFS = metaFolder.getFileSystem(getConf()); job = createJob(); createInputFileListing(job); job.submit(); submitted = true; } finally { if (!submitted) { cleanup(); } } String jobID = getJobID(job); job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID); LOG.info("DistCp job-id: " + jobID); LOG.info("DistCp job may be tracked at: " + job.getTrackingURL()); LOG.info("To cancel, run the following command:\thadoop job -kill " + jobID); long jobStartTime = System.nanoTime(); if (inputOptions.shouldBlock() && !job.waitForCompletion(true)) { updateJobTimeInNanos(jobStartTime); throw new IOException("DistCp failure: Job " + jobID + " has failed. "); } updateJobTimeInNanos(jobStartTime); return job; }
@Override public int run(String[] args) throws Exception { if (args.length < 4) { writeUsage(); return 1; } Path secretsPath = new Path(args[0]); Path saltFilePath = new Path(args[1]); Path inputPath = new Path(args[2]); Path outputPath = new Path(args[3]); // Make sure the salt file exists generateSaltIfNeeded(saltFilePath, secretsPath); // Configure the job Job job = configureJob(secretsPath, saltFilePath, inputPath, outputPath); // Run it long startTime = System.currentTimeMillis(); job.submit(); if (job.waitForCompletion(true)) { System.out.printf( "Done obfuscating - took %d seconds.\n", (System.currentTimeMillis() - startTime) / 1000); } else { System.err.printf("Job finished with errors: %s\n", job.getStatus().getFailureInfo()); return 2; } return 0; }
/** Run a job. */ static void runJob(String name, Job job, Machine machine, String startmessage, Util.Timer timer) { JOB_SEMAPHORE.acquireUninterruptibly(); Long starttime = null; try { try { starttime = timer.tick("starting " + name + " ...\n " + startmessage); // initialize and submit a job machine.init(job); job.submit(); // Separate jobs final long sleeptime = 1000L * job.getConfiguration().getInt(JOB_SEPARATION_PROPERTY, 10); if (sleeptime > 0) { Util.out.println(name + "> sleep(" + Util.millis2String(sleeptime) + ")"); Thread.sleep(sleeptime); } } finally { JOB_SEMAPHORE.release(); } if (!job.waitForCompletion(false)) throw new RuntimeException(name + " failed."); } catch (Exception e) { throw e instanceof RuntimeException ? (RuntimeException) e : new RuntimeException(e); } finally { if (starttime != null) timer.tick(name + "> timetaken=" + Util.millis2String(timer.tick() - starttime)); } }
/** * Entry point to start job. * * @param args Command line parameters. * @throws Exception If fails. */ public static void main(String[] args) throws Exception { if (args.length != 2) { System.out.println("usage: [input] [output]"); System.exit(-1); } Job job = getJob(args[0], args[1]); job.submit(); }
static void waitForJob(Job job) throws Exception { job.submit(); while (!job.isComplete()) { LOG.debug("waiting for job " + job.getJobName()); sleep(50); } LOG.debug( "status for job " + job.getJobName() + ": " + (job.isSuccessful() ? "SUCCESS" : "FAILURE")); if (!job.isSuccessful()) { throw new RuntimeException("job failed " + job.getJobName()); } }
/** * Submit the job to the cluster and wait for it to finish. * * @param verbose print the progress to the user * @return true if the job succeeded * @throws IOException thrown if the communication with the <code>JobTracker</code> is lost */ public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException { if (state == JobState.DEFINE) { submit(); } if (verbose) { jobClient.monitorAndPrintJob(conf, info); } else { info.waitForCompletion(); } return isSuccessful(); }
@SuppressWarnings({"unchecked", "rawtypes"}) public void start( Path outputDir, int numReducers, boolean concurrent, String accessKey, String secretKey) throws GoraException, IOException, Exception { LOG.info("Running Verify with outputDir=" + outputDir + ", numReducers=" + numReducers); // DataStore<Long,CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class, new // Configuration()); auth = new BasicAWSCredentials(accessKey, secretKey); DataStore<Long, cidynamonode> store = WSDataStoreFactory.createDataStore( DynamoDBStore.class, DynamoDBKey.class, cidynamonode.class, auth); job = new Job(getConf()); if (!job.getConfiguration() .get("io.serializations") .contains("org.apache.hadoop.io.serializer.JavaSerialization")) { job.getConfiguration() .set( "io.serializations", job.getConfiguration().get("io.serializations") + ",org.apache.hadoop.io.serializer.JavaSerialization"); } job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); Query query = store.newQuery(); // if (!concurrent) { // no concurrency filtering, only need prev field // query.setFields("prev"); // } else { // readFlushed(job.getCon figuration()); // } GoraMapper.initMapperJob( job, query, store, DynamoDBKey.class, VLongWritable.class, VerifyMapper.class, true); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); store.close(); job.submit(); }
@Override public RunningJob submitJob(org.pentaho.hadoop.shim.api.Configuration c) throws IOException { ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); try { Job job = ((org.pentaho.hadoop.shim.cdh54.ConfigurationProxyV2) c).getJob(); job.submit(); return new RunningJobProxyV2(job); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } finally { Thread.currentThread().setContextClassLoader(cl); } }
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length != 2) { System.err.printf( "Usage: %s <comma separated paths> <output path>\n", this.getClass().getName()); return -1; } Job job = Job.getInstance(); job.setJobName("PasmJoin"); job.setJarByClass(PsamXY.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(AvroValue.class); job.setOutputKeyClass(AvroKey.class); job.setOutputValueClass(NullWritable.class); job.setMapperClass(PsamXYMapper.class); job.setReducerClass(PsamXYReducer.class); job.setInputFormatClass(AvroKeyInputFormat.class); job.setOutputFormatClass(AvroKeyOutputFormat.class); FileInputFormat.setInputPaths(job, args[0]); Path output = new Path(args[1]); FileOutputFormat.setOutputPath(job, output); FileSystem fs = FileSystem.get(conf); fs.delete(output, true); AvroJob.setOutputKeySchema(job, outputSchema); AvroJob.setMapOutputValueSchema(job, outputSchema); // DistributedCache.addCacheFile(new Path("BM_TERM_TYPE_DMT.avro").toUri(), // job.getConfiguration()); job.setNumReduceTasks(1); job.submit(); job.waitForCompletion(true); return 0; }
public static void run( Configuration conf, Path[] inputPaths, Path outputPath, int k, int p, long seed) throws ClassNotFoundException, InterruptedException, IOException { Job job = new Job(conf); job.setJobName("YtY-job"); job.setJarByClass(YtYJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, outputPath); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(YtYMapper.class); job.getConfiguration().setLong(PROP_OMEGA_SEED, seed); job.getConfiguration().setInt(PROP_K, k); job.getConfiguration().setInt(PROP_P, p); /* * we must reduce to just one matrix which means we need only one reducer. * But it's ok since each mapper outputs only one vector (a packed * UpperTriangular) so even if there're thousands of mappers, one reducer * should cope just fine. */ job.setNumReduceTasks(1); job.submit(); job.waitForCompletion(false); if (!job.isSuccessful()) { throw new IOException("YtY job unsuccessful."); } }
/** Runs a GridMix data-generation job. */ private static void runDataGenJob(Configuration conf, Path tempDir) throws IOException, ClassNotFoundException, InterruptedException { JobClient client = new JobClient(conf); // get the local job runner conf.setInt(MRJobConfig.NUM_MAPS, 1); Job job = new Job(conf); CompressionEmulationUtil.configure(job); job.setInputFormatClass(CustomInputFormat.class); // set the output path FileOutputFormat.setOutputPath(job, tempDir); // submit and wait for completion job.submit(); int ret = job.waitForCompletion(true) ? 0 : 1; assertEquals("Job Failed", 0, ret); }
public Job call() throws IOException, InterruptedException, ClassNotFoundException { job.setMapperClass(GridmixMapper.class); job.setReducerClass(GridmixReducer.class); job.setNumReduceTasks(jobdesc.getNumberReduces()); job.setMapOutputKeyClass(GridmixKey.class); job.setMapOutputValueClass(GridmixRecord.class); job.setSortComparatorClass(GridmixKey.Comparator.class); job.setGroupingComparatorClass(SpecGroupingComparator.class); job.setInputFormatClass(GridmixInputFormat.class); job.setOutputFormatClass(RawBytesOutputFormat.class); job.setPartitionerClass(DraftPartitioner.class); job.setJarByClass(GridmixJob.class); job.getConfiguration().setInt("gridmix.job.seq", seq); job.getConfiguration() .set(ORIGNAME, null == jobdesc.getJobID() ? "<unknown>" : jobdesc.getJobID().toString()); job.getConfiguration().setBoolean("mapred.used.genericoptionsparser", true); FileInputFormat.addInputPath(job, new Path("ignored")); FileOutputFormat.setOutputPath(job, outdir); job.submit(); return job; }
/** * Refer to {@link ReconstructionErrJob} for explanation of the job * * @param conf the configuration * @param yPath the path to input matrix Y * @param y2xPath the path to in-memory matrix Y2X, where X = Y * Y2X * @param yCols the number of columns in Y * @param xCols the number of columns in X * @param cPath the path to in-memory matrix C, where ReconY = Xc * C' * @param zmPath the path to vector Zm, where Zm = Ym * Y2X * C' - Ym * @param ymPath the path the the mean vector Ym * @param outPath the output path * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void run( Configuration conf, Path yPath, Path y2xPath, int yCols, int xCols, Path cPath, String zmPath, String ymPath, Path outPath, final float ERR_SAMPLE_RATE) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MATRIXY2X, y2xPath.toString()); conf.set(RECONSTRUCTIONMATRIX, cPath.toString()); conf.set(ZMPATH, zmPath); conf.set(YMPATH, ymPath); conf.setInt(YCOLS, yCols); conf.setInt(XCOLS, xCols); conf.set(ERRSAMPLERATE, "" + ERR_SAMPLE_RATE); FileSystem fs = FileSystem.get(yPath.toUri(), conf); yPath = fs.makeQualified(yPath); outPath = fs.makeQualified(outPath); Job job = new Job(conf); FileInputFormat.addInputPath(job, yPath); FileOutputFormat.setOutputPath(job, outPath); job.setJobName("ReconErrJob-" + yPath.getName()); job.setJarByClass(ReconstructionErrJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(1); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setNumReduceTasks(1); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(DoubleWritable.class); job.submit(); job.waitForCompletion(true); }
/*package*/ static Job sortOne( Configuration conf, Path inputFile, Path outputDir, String commandName, String samplingInfo) throws IOException, ClassNotFoundException, InterruptedException { conf.set(Utils.WORK_FILENAME_PROPERTY, inputFile.getName()); Utils.configureSampling(outputDir, inputFile.getName(), conf); final Job job = new Job(conf); job.setJarByClass(Summarize.class); job.setMapperClass(Mapper.class); job.setReducerClass(SortReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SortInputFormat.class); job.setOutputFormatClass(SortOutputFormat.class); FileInputFormat.setInputPaths(job, inputFile); FileOutputFormat.setOutputPath(job, outputDir); job.setPartitionerClass(TotalOrderPartitioner.class); final Timer t = new Timer(); System.out.printf("%s :: Sampling%s...\n", commandName, samplingInfo); t.start(); InputSampler.<LongWritable, Text>writePartitionFile( job, new InputSampler.SplitSampler<LongWritable, Text>( Math.max(1 << 16, conf.getInt("mapred.reduce.tasks", 1)), 10)); System.out.printf("%s :: Sampling complete in %d.%03d s.\n", commandName, t.stopS(), t.fms()); job.submit(); return job; }
@Override public int run(String[] arg) throws Exception { Job extractor = new Job(getConf()); extractor.setMapperClass(MapClass.class); // no reduce, just identity extractor.setJobName("x-trace indexer"); extractor.setJarByClass(this.getClass()); extractor.setMapOutputKeyClass(BytesWritable.class); extractor.setMapOutputValueClass(TextArrayWritable.class); extractor.setOutputKeyClass(BytesWritable.class); extractor.setOutputValueClass(TextArrayWritable.class); extractor.setInputFormatClass(SequenceFileInputFormat.class); extractor.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(extractor, new Path(arg[0])); FileOutputFormat.setOutputPath(extractor, new Path(arg[1])); System.out.println("looks OK. Submitting."); extractor.submit(); // extractor.waitForCompletion(false); return 0; }
@Override protected int run(CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("fixmate :: WORKDIR not given."); return 3; } if (args.size() == 1) { System.err.println("fixmate :: INPATH not given."); return 3; } if (!cacheAndSetProperties(parser)) return 3; final SAMFileReader.ValidationStringency stringency = Utils.toStringency( parser.getOptionValue( stringencyOpt, SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY.toString()), "fixmate"); if (stringency == null) return 3; Path wrkDir = new Path(args.get(0)); final List<String> strInputs = args.subList(1, args.size()); final List<Path> inputs = new ArrayList<Path>(strInputs.size()); for (final String in : strInputs) inputs.add(new Path(in)); final Configuration conf = getConf(); // Used by Utils.getMergeableWorkFile() to name the output files. final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName(); conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName); if (stringency != null) conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString()); final boolean globalSort = parser.getBoolean(sortOpt); if (globalSort) Utils.setHeaderMergerSortOrder(conf, SAMFileHeader.SortOrder.queryname); conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0])); final Timer t = new Timer(); try { // Required for path ".", for example. wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir); if (globalSort) Utils.configureSampling(wrkDir, intermediateOutName, conf); final Job job = new Job(conf); job.setJarByClass(FixMate.class); job.setMapperClass(FixMateMapper.class); job.setReducerClass(FixMateReducer.class); if (!parser.getBoolean(noCombinerOpt)) job.setCombinerClass(FixMateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(AnySAMInputFormat.class); job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class); for (final Path in : inputs) FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, wrkDir); if (globalSort) { job.setPartitionerClass(TotalOrderPartitioner.class); System.out.println("fixmate :: Sampling..."); t.start(); InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile( job, new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>( 0.01, 10000, Math.max(100, reduceTasks))); System.out.printf("fixmate :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms()); } job.submit(); System.out.println("fixmate :: Waiting for job completion..."); t.start(); if (!job.waitForCompletion(verbose)) { System.err.println("fixmate :: Job failed."); return 4; } System.out.printf("fixmate :: Job complete in %d.%03d s.\n", t.stopS(), t.fms()); } catch (IOException e) { System.err.printf("fixmate :: Hadoop error: %s\n", e); return 4; } catch (ClassNotFoundException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } if (outPath != null) try { Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "fixmate"); } catch (IOException e) { System.err.printf("fixmate :: Output merging failed: %s\n", e); return 5; } return 0; }
public List<DataSegment> run() throws IOException { final JobConf jobConf = new JobConf(); jobConf.setKeepFailedTaskFiles(false); for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) { jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()"); } final List<DataSegment> segments = converterConfig.getSegments(); if (segments.isEmpty()) { throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource()); } converterConfigIntoConfiguration(converterConfig, segments, jobConf); jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); setJobName(jobConf, segments); if (converterConfig.getJobPriority() != null) { jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority())); } final Job job = Job.getInstance(jobConf); job.setInputFormatClass(ConfigInputFormat.class); job.setMapperClass(ConvertingMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapSpeculativeExecution(false); job.setOutputFormatClass(ConvertingOutputFormat.class); JobHelper.setupClasspath( JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath( getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job); Throwable throwable = null; try { job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); final boolean success = job.waitForCompletion(true); if (!success) { final TaskReport[] reports = job.getTaskReports(TaskType.MAP); if (reports != null) { for (final TaskReport report : reports) { log.error( "Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics())); } } return null; } try { loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue(); writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue(); } catch (IOException ex) { log.error(ex, "Could not fetch counters"); } final JobID jobID = job.getJobID(); final Path jobDir = getJobPath(jobID, job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true); final List<Path> goodPaths = new ArrayList<>(); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); if (locatedFileStatus.isFile()) { final Path myPath = locatedFileStatus.getPath(); if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY)); } } } if (goodPaths.isEmpty()) { log.warn("No good data found at [%s]", jobDir); return null; } final List<DataSegment> returnList = ImmutableList.copyOf( Lists.transform( goodPaths, new Function<Path, DataSegment>() { @Nullable @Override public DataSegment apply(final Path input) { try { if (!fs.exists(input)) { throw new ISE( "Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir); } } catch (final IOException e) { throw Throwables.propagate(e); } try (final InputStream stream = fs.open(input)) { return HadoopDruidConverterConfig.jsonMapper.readValue( stream, DataSegment.class); } catch (final IOException e) { throw Throwables.propagate(e); } } })); if (returnList.size() == segments.size()) { return returnList; } else { throw new ISE( "Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir); } } catch (InterruptedException | ClassNotFoundException e) { RuntimeException exception = Throwables.propagate(e); throwable = exception; throw exception; } catch (Throwable t) { throwable = t; throw t; } finally { try { cleanup(job); } catch (IOException e) { if (throwable != null) { throwable.addSuppressed(e); } else { log.error(e, "Could not clean up job [%s]", job.getJobID()); } } } }
public boolean run() { try { /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) { throw new ISE( "DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]", config.getPartitionsSpec()); } if (!config.getPartitionsSpec().isAssumeGrouped()) { final Job groupByJob = Job.getInstance( new Configuration(), String.format( "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())); JobHelper.injectSystemProperties(groupByJob); config.addJobProperties(groupByJob); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); JobHelper.setupClasspath( JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info( "Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); return false; } } else { log.info("Skipping group-by job."); } /* * Read grouped data and determine appropriate partitions. */ final Job dimSelectionJob = Job.getInstance( new Configuration(), String.format( "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); JobHelper.injectSystemProperties(dimSelectionJob); config.addJobProperties(dimSelectionJob); if (!config.getPartitionsSpec().isAssumeGrouped()) { // Read grouped data from the groupByJob. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); } else { // Directly read the source data, since we assume it's already grouped. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); config.addInputPaths(dimSelectionJob); } SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob); dimSelectionJob.setMapOutputValueClass(Text.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setOutputValueClass(Text.class); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setPartitionerClass(DeterminePartitionsDimSelectionPartitioner.class); dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size()); JobHelper.setupClasspath( JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob); config.intoConfiguration(dimSelectionJob); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); log.info( "Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL()); if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); return false; } /* * Load partitions determined by the previous job. */ log.info( "Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) { final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); } if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) { List<ShardSpec> specs = config.JSON_MAPPER.readValue( Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() {}); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); for (int i = 0; i < specs.size(); ++i) { actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); log.info( "DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i)); } shardSpecs.put(segmentGranularity.getStart(), actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); return true; } catch (Exception e) { throw Throwables.propagate(e); } }
/** * start a vina hadoop job * * @param confLocalPath * @param receptorLocalPath * @param ligandPath * @param seed * @param topK * @param vinaJobID * @param node * @return */ public HashMap<String, String> startJob( String confLocalPath, String receptorLocalPath, ArrayList<String> ligandPath, String seed, int topK, String vinaJobID, int numPerNode, boolean verbose) { HashMap<String, String> hm = new HashMap<String, String>(); if (confLocalPath == null || receptorLocalPath == null || ligandPath == null || seed == null || vinaJobID == null || ligandPath.size() == 0 || topK < 0) { hm.put("flag", "false"); hm.put("hadoopID", "null"); hm.put("vinaJobID", vinaJobID); hm.put("log", "error arguments"); return hm; } GeneratePath gp = new GeneratePath(jobPath, srcDataPath); String confName = confLocalPath.substring(confLocalPath.lastIndexOf("/")); String confHDFSPath = jobPath + vinaJobID + confName; String receptorName = receptorLocalPath.substring(receptorLocalPath.lastIndexOf("/")); String receptorHDFSPATH = jobPath + vinaJobID + receptorName; HadoopFile hf; final String input = jobPath + vinaJobID + "/metadata"; final String output = jobPath + vinaJobID + "/order"; Path path = new Path(output); Configuration conf; FileSystem fs; Job job; try { gp.createMeta(ligandPath, vinaJobID, numPerNode); hf = new HadoopFile(); hf.mkdir(jobPath + "/" + vinaJobID + "/exception"); hf.mkdir(jobPath + "/" + vinaJobID + "/exceptionBackup"); hf.localToHadoop(confLocalPath, confHDFSPath); hf.localToHadoop(receptorLocalPath, receptorHDFSPATH); conf = (new HadoopConf()).getConf(); fs = FileSystem.get(conf); // set heart beat time 45min long milliSeconds = 45 * 60 * 1000; conf.setLong("mapred.task.timeout", milliSeconds); conf.set("vinaJobID", vinaJobID); conf.setInt("k", topK); conf.set("conf2HDFS", confHDFSPath); conf.set("receptorHDFS", receptorHDFSPATH); conf.set("seed", seed); if (fs.exists(path)) { fs.delete(path, true); } job = new Job(conf, vinaJobID); job.setNumReduceTasks(1); job.setJarByClass(VinaHadoop.class); job.setMapperClass(VinaMapper.class); job.setReducerClass(VinaReducer.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(DataPair.class); job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); } catch (IOException e) { // TODO Auto-generated catch block hm.put("flag", "false"); hm.put("hadoopID", "null"); hm.put("vinaJobID", vinaJobID); hm.put("log", e.getMessage()); return hm; } try { if (verbose) { // System.exit(job.waitForCompletion(true) ? 0 : 1); job.waitForCompletion(true); } else { job.submit(); } } catch (ClassNotFoundException | IOException | InterruptedException e) { // TODO Auto-generated catch block hm.put("flag", "false"); hm.put("hadoopID", "null"); hm.put("vinaJobID", vinaJobID); hm.put("log", e.getMessage()); return hm; } hm.put("flag", "true"); hm.put("hadoopID", job.getJobID().toString()); hm.put("vinaJobID", vinaJobID); hm.put("log", "null"); return hm; }
// Can be overridded by tests. void submitJob(Job job, List<String> filesInJob, int priority) throws IOException, InterruptedException, ClassNotFoundException { job.submit(); LOG.info("Job " + job.getID() + "(" + job.getJobName() + ") started"); jobIndex.put(job, null); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // Configure intermediate reduces conf.setInt(MRJobConfig.MRR_INTERMEDIATE_STAGES, 1); // Set reducer class for intermediate reduce conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.job.reduce.class"), MyGroupByReducer.class, Reducer.class); // Set reducer output key class conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage( 1, "mapreduce.map.output.key.class"), IntWritable.class, Object.class); // Set reducer output value class conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage( 1, "mapreduce.map.output.value.class"), Text.class, Object.class); conf.setInt( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.job.reduces"), 2); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: groupbyorderbymrrtest <in> <out>"); System.exit(2); } @SuppressWarnings("deprecation") Job job = new Job(conf, "groupbyorderbymrrtest"); job.setJarByClass(GroupByOrderByMRRTest.class); // Configure map job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // Configure reduce job.setReducerClass(MyOrderByNoOpReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); TezClient tezClient = new TezClient(new TezConfiguration(conf)); job.submit(); JobID jobId = job.getJobID(); ApplicationId appId = TypeConverter.toYarn(jobId).getAppId(); DAGClient dagClient = tezClient.getDAGClient(appId); DAGStatus dagStatus = null; while (true) { dagStatus = dagClient.getDAGStatus(); if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) { break; } try { Thread.sleep(500); } catch (InterruptedException e) { // continue; } } while (dagStatus.getState() == DAGStatus.State.RUNNING) { try { ExampleDriver.printMRRDAGStatus(dagStatus); try { Thread.sleep(1000); } catch (InterruptedException e) { // continue; } dagStatus = dagClient.getDAGStatus(); } catch (TezException e) { LOG.fatal("Failed to get application progress. Exiting"); System.exit(-1); } } ExampleDriver.printMRRDAGStatus(dagStatus); LOG.info("Application completed. " + "FinalState=" + dagStatus.getState()); System.exit(dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1); }