/** * This method call when injected into a class will add the GenericOptionParser with '-libjars' * parameter * * @param job JobConf to which the classpath to be added * @param jars comma separated jars to be added to the classpath * @param resources comma separated files to be added to the classpath * @throws IOException */ public static void addClassPath(final JobConf job, String jars, String resources) throws IOException { LOGGER.debug( "Libraries being added to the classpath: " + job.get(JOB_CONF_JARS) + "Resources : " + job.get(JOB_CONF_RESOURCES)); // taking libjars and files values passed from console while executing // job StringBuilder oldJars = new StringBuilder().append(job.get(JOB_CONF_JARS)); StringBuilder oldFiles = new StringBuilder().append(job.get(JOB_CONF_RESOURCES)); String jarsTmp = jars; String resourcesTmp = resources; if (oldJars != null && !oldJars.toString().equals(NULL) && oldJars.length() > 0) { oldJars.append(SEPARATOR_COMMA); oldJars.append(jarsTmp); jarsTmp = oldJars.toString(); } if (resourcesTmp != null && resourcesTmp.length() > 0) { if (oldFiles != null && !oldFiles.toString().equals(NULL) && oldFiles.length() > 0) { oldFiles.append(SEPARATOR_COMMA); oldFiles.append(resourcesTmp); resourcesTmp = oldFiles.toString(); } new GenericOptionsParser( job, new String[] {GENERIC_PARSER_LIB_JARS, jarsTmp, GENERIC_PARSER_FILES, resourcesTmp}); } else { new GenericOptionsParser(job, new String[] {GENERIC_PARSER_LIB_JARS, jarsTmp}); } }
@Override public void map( WritableComparable key, CompactorInputSplit split, OutputCollector<NullWritable, NullWritable> nullWritableVOutputCollector, Reporter reporter) throws IOException { // This will only get called once, since CompactRecordReader only returns one record, // the input split. // Based on the split we're passed we go instantiate the real reader and then iterate on it // until it finishes. @SuppressWarnings("unchecked") // since there is no way to parametrize instance of Class AcidInputFormat<WritableComparable, V> aif = instantiate(AcidInputFormat.class, jobConf.get(INPUT_FORMAT_CLASS_NAME)); ValidTxnList txnList = new ValidReadTxnList(jobConf.get(ValidTxnList.VALID_TXNS_KEY)); boolean isMajor = jobConf.getBoolean(IS_MAJOR, false); AcidInputFormat.RawReader<V> reader = aif.getRawReader( jobConf, isMajor, split.getBucket(), txnList, split.getBaseDir(), split.getDeltaDirs()); RecordIdentifier identifier = reader.createKey(); V value = reader.createValue(); getWriter(reporter, reader.getObjectInspector(), split.getBucket()); while (reader.next(identifier, value)) { if (isMajor && reader.isDelete(value)) continue; writer.write(value); reporter.progress(); } }
@Override @SuppressWarnings("unchecked") public void configure(JobConf conf) { super.configure(conf); keySerializerDefinition = getStoreDef().getKeySerializer(); valueSerializerDefinition = getStoreDef().getValueSerializer(); try { SerializerFactory factory = new DefaultSerializerFactory(); if (conf.get("serializer.factory") != null) { factory = (SerializerFactory) Class.forName(conf.get("serializer.factory")).newInstance(); } keySerializer = (Serializer<Object>) factory.getSerializer(keySerializerDefinition); valueSerializer = (Serializer<Object>) factory.getSerializer(valueSerializerDefinition); } catch (Exception e) { throw new RuntimeException(e); } keyCompressor = new CompressionStrategyFactory().get(keySerializerDefinition.getCompression()); valueCompressor = new CompressionStrategyFactory().get(valueSerializerDefinition.getCompression()); routingStrategy = new ConsistentRoutingStrategy( getCluster().getNodes(), getStoreDef().getReplicationFactor()); }
@Test public void testConfigureAccumuloInputFormatWithIterators() throws Exception { AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf); ColumnMapper columnMapper = new ColumnMapper( conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes); Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings()); List<IteratorSetting> iterators = new ArrayList<IteratorSetting>(); Set<Range> ranges = Collections.singleton(new Range()); String instanceName = "realInstance"; String zookeepers = "host1:2181,host2:2181,host3:2181"; IteratorSetting cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class); cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName()); cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName()); cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "dave"); cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:name"); iterators.add(cfg); cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class); cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName()); cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName()); cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "50"); cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:age"); iterators.add(cfg); ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class); HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class); // Stub out the ZKI mock Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName); Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers); // Call out to the real configure method Mockito.doCallRealMethod() .when(mockInputFormat) .configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges); // Also compute the correct cf:cq pairs so we can assert the right argument was passed Mockito.doCallRealMethod() .when(mockInputFormat) .getPairCollection(columnMapper.getColumnMappings()); mockInputFormat.configure( conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges); // Verify that the correct methods are invoked on AccumuloInputFormat Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false); Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS)); Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE); Mockito.verify(mockInputFormat) .setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER)); Mockito.verify(mockInputFormat).addIterators(conf, iterators); Mockito.verify(mockInputFormat).setRanges(conf, ranges); Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs); }
public void configure(JobConf conf) { numberOfCenters = Integer.valueOf(conf.get("numberOfCenters")); centersDirectory = conf.get("centersReadDirectory"); try { Configuration c = new Configuration(); FileSystem fs = FileSystem.get(c); for (int index = 0; index < numberOfCenters; ++index) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c); LongWritable key = new LongWritable(); Point value = new Point(); reader.next(key, value); Point center = (Point) value; centers.add(center); reader.close(); } } catch (IOException e) { // do nothing // I hope this doesn't happen System.out.println("well, damn."); e.printStackTrace(); } }
public void configure(JobConf conf) { /* * It reads all the configurations and distributed cache from outside. */ // Read number of nodes in input layer and output layer from configuration inputNumdims = conf.get("numdims"); inputNumhid = conf.get("numhid"); // Read the weights from distributed cache Path[] pathwaysFiles = new Path[0]; try { pathwaysFiles = DistributedCache.getLocalCacheFiles(conf); for (Path path : pathwaysFiles) { /* * this loop reads all the distributed cache files * In fact, the driver program ensures that there is only one distributed cache file */ BufferedReader fis = new BufferedReader(new FileReader(path.toString())); weightline = fis.readLine(); } } catch (Exception e) { e.printStackTrace(); } }
@Override public void close() { System.err.println( "Target: " + vocE.size() + " types. Writing to " + job_.get("root", null) + "/vocab.E"); System.err.println( "Source: " + vocF.size() + " types .Writing to " + job_.get("root", null) + "/vocab.F"); // write out vocabulary to file try { FileSystem fs = FileSystem.get(job_); DataOutputStream dos = new DataOutputStream( new BufferedOutputStream(fs.create(new Path(job_.get("root", null) + "/vocab.E")))); ((VocabularyWritable) vocE).write(dos); dos.close(); DataOutputStream dos2 = new DataOutputStream( new BufferedOutputStream(fs.create(new Path(job_.get("root", null) + "/vocab.F")))); ((VocabularyWritable) vocF).write(dos2); dos2.close(); } catch (IOException e) { throw new RuntimeException("Vocab couldn't be written to disk.\n" + e.toString()); } }
/** * Driver to copy srcPath to destPath depending on required protocol. * * @param args arguments */ static void copy(final Configuration conf, final Arguments args) throws IOException { LOG.info("srcPaths=" + args.srcs); LOG.info("destPath=" + args.dst); checkSrcPath(conf, args.srcs); JobConf job = createJobConf(conf); if (args.preservedAttributes != null) { job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes); } if (args.mapredSslConf != null) { job.set("dfs.https.client.keystore.resource", args.mapredSslConf); } // Initialize the mapper try { setup(conf, job, args); JobClient.runJob(job); finalize(conf, job, args.dst, args.preservedAttributes); } finally { // delete tmp fullyDelete(job.get(TMP_DIR_LABEL), job); // delete jobDirectory fullyDelete(job.get(JOB_DIR_LABEL), job); } }
public void configure(JobConf job) { sLogger.setLevel(Level.INFO); srcLang = job.get("fLang"); mJob = job; pwsimMapping = new HMapIV<ArrayListOfIntsWritable>(); valOut = new PairOfIntString(); keyOut = new PairOfInts(); // read doc ids of sample into vectors String samplesFile = job.get("Ivory.SampleFile"); if (samplesFile != null) { try { samplesMap = readSamplesFromCache(getFilename(samplesFile), job); } catch (NumberFormatException e) { e.printStackTrace(); throw new RuntimeException("Incorrect format in " + samplesFile); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException("I/O error in " + samplesFile); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Error reading sample file: " + samplesFile); } } }
private void init() { String nodes = conf.get(AnalysisProcessorConfiguration.nodes); this.nodelist = String2List(nodes, SEPERATOR_COMMA); String status = conf.get("status"); this.statuslist = String2List(status, SEPERATOR_COMMA); parsePhase(); }
public List<JavaScriptSource> setUpSource(JobConf job) { List<JavaScriptSource> js = new ArrayList<JavaScriptSource>(); js.add( new JavaScriptSource("emit.js", "function emit(k,v){$mapper.emit(k,v,$output_collector)}")); js.add(new JavaScriptSource("map.js", job.get("map.js"))); js.add(new JavaScriptSource("reduce.js", job.get("reduce.js"))); js.add(new JavaScriptSource("functions.js", job.get("functions.js"))); js.add(new JavaScriptSource("filter.js", job.get("filter.js"))); js.add(new JavaScriptSource("query_id", job.get("query_id"))); try { Path[] files = DistributedCache.getLocalCacheFiles(job); if (files != null) { for (int i = 0; i < files.length; i++) { Path path = files[i]; if (path.getName().endsWith(".js")) { String source = Files.toString(new File(path.toString()), Charset.forName("UTF-8")); js.add(new JavaScriptSource(path.getName(), source)); } } } } catch (IOException ioe) { throw new RuntimeException("Couldn't read from DistributedCache", ioe); } return js; }
/* (non-Javadoc) * @see org.apache.hadoop.chukwa.analysis.HiTune.AnalysisProcessor#run() */ @Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, InstrumentDataflow.class); try { conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(InstrumentDataflow.MapClass.class); conf.setReducerClass(InstrumentDataflow.ReduceClass.class); conf.setOutputKeyClass(Text.class); Class<? extends WritableComparable> outputKeyClass = Class.forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class.forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); // FileInputFormat.setInputPathFilter(conf, evtFileFilter.class); // conf.setNumReduceTasks(1); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
/** * The main driver for word count map/reduce program. Invoke this method to submit the map/reduce * job. * * @throws IOException When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), WordCountSeqOutput.class); conf.setJobName("wordcount_seqOF"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) // conf.setOutputValueClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Combiner.class); conf.setReducerClass(Reduce.class); conf.setOutputFormat(SequenceFileOutputFormat.class); // // compress Mapper output // conf.setCompressMapOutput(true); // conf.setMapOutputCompressorClass(org.apache.hadoop.io.compress.GzipCodec.class); // compress final output conf.set("mapred.output.compress", conf.get("mapred.output.compress", "true")); conf.set("mapred.output.compression.type", conf.get("mapred.output.compression.type", "BLOCK")); conf.set( "mapred.output.compression.codec", conf.get("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec")); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println( "ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
public void configure(JobConf job) { double fpLat = Double.parseDouble(job.get("fpLat")); double fpLong = Double.parseDouble(job.get("fpLong")); fp = new FocalPoint(fpLong, fpLat); k = Integer.parseInt(job.get("k")); Comparator<Tuple> comparer = new TupleAscComparer(); queue = new PriorityQueue<Tuple>(50, comparer); }
public void setLocalElasticSearchInstallation(JobConf conf) { String esConfigPath = conf.get(ES_CONFIG_OPT, ES_CONFIG); String esPluginsPath = conf.get(ES_PLUGINS_OPT, ES_PLUGINS); System.setProperty(ES_CONFIG_OPT, esConfigPath); System.setProperty(ES_PLUGINS_OPT, esPluginsPath); LOG.info( "Using Elasticsearch configuration file at " + esConfigPath + " and plugin directory " + esPluginsPath); }
/** * Configure language configuration field. * * @param path * @param jobConf * @throws MinerApplicationException */ private void updateLanguageConfiguration(Path path, JobConf jobConf) throws MinerApplicationException { if (path.getName().equals(new Path(jobConf.get(AbstractStep.LANG_FILE)).getName())) { languageConfiguration = new LanguageConfiguration(jobConf.get(AbstractStep.LANG_CODE), path); } if (languageConfiguration == null) { throw new MinerApplicationException( String.format(NOT_IN_CACHE, jobConf.get(AbstractStep.LANG_FILE))); } }
@Override public void commitJob(JobContext context) throws IOException { JobConf conf = ShimLoader.getHadoopShims().getJobConf(context); Path tmpLocation = new Path(conf.get(TMP_LOCATION)); Path finalLocation = new Path(conf.get(FINAL_LOCATION)); FileSystem fs = tmpLocation.getFileSystem(conf); LOG.debug("Moving contents of " + tmpLocation.toString() + " to " + finalLocation.toString()); FileStatus[] contents = fs.listStatus(tmpLocation); for (int i = 0; i < contents.length; i++) { Path newPath = new Path(finalLocation, contents[i].getPath().getName()); fs.rename(contents[i].getPath(), newPath); } fs.delete(tmpLocation, true); }
public void configure(JobConf job) { String inputFile = job.get("map.input.file"); String deltaInputPath = job.get(DELTA_FILE_PATH_CONF); if (inputFile.contains(deltaInputPath)) { isDeltaChar = IS_DELTA_TRUE; } StringTokenizer st = new StringTokenizer(job.get(PRIMARY_KEYS_CONF), ","); primaryKeyIndexes = new int[st.countTokens()]; for (int i = 0; st.hasMoreTokens(); i++) { primaryKeyIndexes[i] = Integer.parseInt(st.nextToken()); } }
@Override public String getTaskAttemptLogUrl( JobConf conf, String taskTrackerHttpAddress, String taskAttemptId) throws MalformedURLException { if (conf.get("mapreduce.framework.name") != null && conf.get("mapreduce.framework.name").equals("yarn")) { // if the cluster is running in MR2 mode, return null LOG.warn("Can't fetch tasklog: TaskLogServlet is not supported in MR2 mode."); return null; } else { // Was using Hadoop-internal API to get tasklogs, disable until MAPREDUCE-5857 is fixed. LOG.warn("Can't fetch tasklog: TaskLogServlet is not supported in MR1 mode."); return null; } }
private RunningJob submitAction(Context context, Namespace ns) throws Exception { Hive2ActionExecutor ae = new Hive2ActionExecutor(); WorkflowAction action = context.getAction(); ae.prepareActionDir(getFileSystem(), context); ae.submitLauncher(getFileSystem(), context, action); String jobId = action.getExternalId(); String jobTracker = action.getTrackerUri(); String consoleUrl = action.getConsoleUrl(); assertNotNull(jobId); assertNotNull(jobTracker); assertNotNull(consoleUrl); Element e = XmlUtils.parseXml(action.getConf()); XConfiguration conf = new XConfiguration( new StringReader(XmlUtils.prettyPrint(e.getChild("configuration", ns)).toString())); conf.set("mapred.job.tracker", e.getChildTextTrim("job-tracker", ns)); conf.set("fs.default.name", e.getChildTextTrim("name-node", ns)); conf.set("user.name", context.getProtoActionConf().get("user.name")); conf.set("group.name", getTestGroup()); JobConf jobConf = Services.get().get(HadoopAccessorService.class).createJobConf(jobTracker); XConfiguration.copy(conf, jobConf); String user = jobConf.get("user.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, jobConf); final RunningJob runningJob = jobClient.getJob(JobID.forName(jobId)); assertNotNull(runningJob); return runningJob; }
/** * Gets a set of input splits for a MapReduce job running over a Kiji table. One split is created * per region in the input Kiji table. * * @param configuration of the job using the splits. The configuration should specify the input * Kiji table being used, through the configuration variable {@link * KijiConfKeys#KIJI_INPUT_TABLE_URI}. * @param numSplits desired for the job. This framework hint is ignored by this method. * @return an array of input splits to be operated on in the MapReduce job. * @throws IOException if an I/O error occurs while communicating with HBase to determine the * regions in the Kiji table. */ @Override public InputSplit[] getSplits(JobConf configuration, int numSplits) throws IOException { final String uriString = Preconditions.checkNotNull(configuration.get(KijiConfKeys.KIJI_INPUT_TABLE_URI)); final KijiURI inputTableURI = KijiURI.newBuilder(uriString).build(); final Kiji kiji = Kiji.Factory.open(inputTableURI, configuration); try { final KijiTable table = kiji.openTable(inputTableURI.getTable()); try { final HTableInterface htable = HBaseKijiTable.downcast(table).getHTable(); final List<InputSplit> splits = Lists.newArrayList(); for (KijiRegion region : table.getRegions()) { final byte[] startKey = region.getStartKey(); // TODO(KIJIMR-65): For now pick the first available location (ie. region server), if any. final String location = region.getLocations().isEmpty() ? null : region.getLocations().iterator().next(); final TableSplit tableSplit = new TableSplit(htable.getTableName(), startKey, region.getEndKey(), location); splits.add(new KijiTableSplit(tableSplit)); } return splits.toArray(new InputSplit[0]); } finally { table.release(); } } finally { kiji.release(); } }
private static void finalize( Configuration conf, JobConf jobconf, final Path destPath, String presevedAttributes) throws IOException { if (presevedAttributes == null) { return; } EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes); if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP) && !preseved.contains(FileAttribute.PERMISSION)) { return; } FileSystem dstfs = destPath.getFileSystem(conf); Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL)); SequenceFile.Reader in = null; try { in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf); Text dsttext = new Text(); FilePair pair = new FilePair(); for (; in.next(dsttext, pair); ) { Path absdst = new Path(destPath, pair.output); updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs); } } finally { checkAndClose(in); } }
/** * Start the child process to handle the task for us. * * @param conf the task's configuration * @param recordReader the fake record reader to update progress with * @param output the collector to send output to * @param reporter the reporter for the task * @param outputKeyClass the class of the output keys * @param outputValueClass the class of the output values * @throws IOException * @throws InterruptedException */ Application( JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader, OutputCollector<K2, V2> output, Reporter reporter, Class<? extends K2> outputKeyClass, Class<? extends V2> outputValueClass) throws IOException, InterruptedException { serverSocket = new ServerSocket(0); Map<String, String> env = new HashMap<String, String>(); // add TMPDIR environment variable with the value of java.io.tmpdir env.put("TMPDIR", System.getProperty("java.io.tmpdir")); env.put("hadoop.pipes.command.port", Integer.toString(serverSocket.getLocalPort())); List<String> cmd = new ArrayList<String>(); String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString(); FileUtil.chmod(executable, "a+x"); cmd.add(executable); // wrap the command in a stdout/stderr capture TaskAttemptID taskid = TaskAttemptID.forName(conf.get("mapred.task.id")); File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR); long logLength = TaskLog.getTaskLogLength(conf); cmd = TaskLog.captureOutAndError(cmd, stdout, stderr, logLength); process = runClient(cmd, env); clientSocket = serverSocket.accept(); handler = new OutputHandler<K2, V2>(output, reporter, recordReader); K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf); V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf); downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf); downlink.start(); downlink.setJobConf(conf); }
protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) { List<String> result = newArrayList(); String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, ""); Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex); try { Path partitionDirPath = new Path(part.getSd().getLocation()); FileStatus[] partitionContent = partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath); for (FileStatus currStatus : partitionContent) { if (!currStatus.isDir()) { if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) { result.add(currStatus.getPath().toUri().getPath()); } else { LOG.debug( "Ignoring path {} since matches ignore regex {}", currStatus.getPath().toUri().getPath(), ignoreFileRegex); } } } } catch (IOException e) { logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e); } return result; }
private static IntWritable deduceInputFile(JobConf job) { Path[] inputPaths = FileInputFormat.getInputPaths(job); Path inputFile = new Path(job.get("map.input.file")); // value == one for sort-input; value == two for sort-output return (inputFile.getParent().equals(inputPaths[0])) ? sortInput : sortOutput; }
public void testActionCheck() throws Exception { JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobBean job = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); WorkflowActionBean action = this.addRecordToWfActionTable(job.getId(), "1", WorkflowAction.Status.PREP); WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId()); new ActionStartXCommand(action.getId(), "map-reduce").call(); action = jpaService.execute(wfActionGetCmd); ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job, action, false, false); MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor(); JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action.getConf())); String user = conf.get("user.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf); String launcherId = action.getExternalId(); final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId)); waitFor( 120 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); new ActionCheckXCommand(action.getId()).call(); action = jpaService.execute(wfActionGetCmd); String mapperId = action.getExternalId(); String childId = action.getExternalChildIDs(); assertTrue(launcherId.equals(mapperId)); final RunningJob mrJob = jobClient.getJob(JobID.forName(childId)); waitFor( 120 * 1000, new Predicate() { public boolean evaluate() throws Exception { return mrJob.isComplete(); } }); assertTrue(mrJob.isSuccessful()); new ActionCheckXCommand(action.getId()).call(); action = jpaService.execute(wfActionGetCmd); assertEquals("SUCCEEDED", action.getExternalStatus()); }
/** * passing data and popsize so that they would be available before a call to the mapper * * @param job this mapreduce job */ @Override public void configure(JobConf job) { popsize = job.getInt("popsize", popsize); data = job.get("dataset", data); new Population(data, popsize); }
@Test public void testConfigureAccumuloInputFormatWithAuthorizations() throws Exception { AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf); conf.set(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "foo,bar"); ColumnMapper columnMapper = new ColumnMapper( conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes); Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings()); List<IteratorSetting> iterators = Collections.emptyList(); Set<Range> ranges = Collections.singleton(new Range()); String instanceName = "realInstance"; String zookeepers = "host1:2181,host2:2181,host3:2181"; ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class); HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class); // Stub out the ZKI mock Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName); Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers); // Call out to the real configure method Mockito.doCallRealMethod() .when(mockInputFormat) .configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges); // Also compute the correct cf:cq pairs so we can assert the right argument was passed Mockito.doCallRealMethod() .when(mockInputFormat) .getPairCollection(columnMapper.getColumnMappings()); mockInputFormat.configure( conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges); // Verify that the correct methods are invoked on AccumuloInputFormat Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false); Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS)); Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE); Mockito.verify(mockInputFormat).setScanAuthorizations(conf, new Authorizations("foo,bar")); Mockito.verify(mockInputFormat).addIterators(conf, iterators); Mockito.verify(mockInputFormat).setRanges(conf, ranges); Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs); }
@Override public void abortJob(JobContext context, int status) throws IOException { JobConf conf = ShimLoader.getHadoopShims().getJobConf(context); Path tmpLocation = new Path(conf.get(TMP_LOCATION)); FileSystem fs = tmpLocation.getFileSystem(conf); LOG.debug("Removing " + tmpLocation.toString()); fs.delete(tmpLocation, true); }
/** * Mapper configuration. Extracts source and destination file system, as well as top-level paths * on source and destination directories. Gets the named file systems, to be used later in map. */ public void configure(JobConf job) { destPath = new Path(job.get(DST_DIR_LABEL, "/")); try { destFileSys = destPath.getFileSystem(job); } catch (IOException ex) { throw new RuntimeException("Unable to get the named file system.", ex); } sizeBuf = job.getInt("copy.buf.size", 128 * 1024); buffer = new byte[sizeBuf]; ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false); preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false); if (preserve_status) { preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL)); } update = job.getBoolean(Options.UPDATE.propertyname, false); overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false); this.job = job; }