/* * Helper function to create Vertex from MapWork. */ private Vertex createVertex( JobConf conf, MapWork mapWork, LocalResource appJarLr, List<LocalResource> additionalLr, FileSystem fs, Path mrScratchDir, Context ctx, TezWork tezWork) throws Exception { Path tezDir = getTezDir(mrScratchDir); // set up the operator plan Utilities.setMapWork(conf, mapWork, mrScratchDir, false); // create the directories FileSinkOperators need Utilities.createTmpDirs(conf, mapWork); // Tez ask us to call this even if there's no preceding vertex MultiStageMRConfToTezTranslator.translateVertexConfToTez(conf, null); // finally create the vertex Vertex map = null; // use tez to combine splits boolean useTezGroupedSplits = false; int numTasks = -1; Class amSplitGeneratorClass = null; InputSplitInfo inputSplitInfo = null; Class inputFormatClass = conf.getClass("mapred.input.format.class", InputFormat.class); boolean vertexHasCustomInput = false; if (tezWork != null) { for (BaseWork baseWork : tezWork.getParents(mapWork)) { if (tezWork.getEdgeType(baseWork, mapWork) == EdgeType.CUSTOM_EDGE) { vertexHasCustomInput = true; } } } if (vertexHasCustomInput) { useTezGroupedSplits = false; // grouping happens in execution phase. Setting the class to TezGroupedSplitsInputFormat // here would cause pre-mature grouping which would be incorrect. inputFormatClass = HiveInputFormat.class; conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class); // mapreduce.tez.input.initializer.serialize.event.payload should be set to false when using // this plug-in to avoid getting a serialized event at run-time. conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false); } else { // we'll set up tez to combine spits for us iff the input format // is HiveInputFormat if (inputFormatClass == HiveInputFormat.class) { useTezGroupedSplits = true; conf.setClass( "mapred.input.format.class", TezGroupedSplitsInputFormat.class, InputFormat.class); } } if (HiveConf.getBoolVar(conf, ConfVars.HIVE_AM_SPLIT_GENERATION)) { // if we're generating the splits in the AM, we just need to set // the correct plugin. amSplitGeneratorClass = MRInputAMSplitGenerator.class; } else { // client side split generation means we have to compute them now inputSplitInfo = MRHelpers.generateInputSplits( conf, new Path(tezDir, "split_" + mapWork.getName().replaceAll(" ", "_"))); numTasks = inputSplitInfo.getNumTasks(); } byte[] serializedConf = MRHelpers.createUserPayloadFromConf(conf); map = new Vertex( mapWork.getName(), new ProcessorDescriptor(MapTezProcessor.class.getName()).setUserPayload(serializedConf), numTasks, getContainerResource(conf)); Map<String, String> environment = new HashMap<String, String>(); MRHelpers.updateEnvironmentForMRTasks(conf, environment, true); map.setTaskEnvironment(environment); map.setJavaOpts(getContainerJavaOpts(conf)); assert mapWork.getAliasToWork().keySet().size() == 1; String alias = mapWork.getAliasToWork().keySet().iterator().next(); byte[] mrInput = null; if (useTezGroupedSplits) { mrInput = MRHelpers.createMRInputPayloadWithGrouping( serializedConf, HiveInputFormat.class.getName()); } else { mrInput = MRHelpers.createMRInputPayload(serializedConf, null); } map.addInput( alias, new InputDescriptor(MRInputLegacy.class.getName()).setUserPayload(mrInput), amSplitGeneratorClass); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); localResources.put(getBaseName(appJarLr), appJarLr); for (LocalResource lr : additionalLr) { localResources.put(getBaseName(lr), lr); } if (inputSplitInfo != null) { // only relevant for client-side split generation map.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints()); MRHelpers.updateLocalResourcesForInputSplits( FileSystem.get(conf), inputSplitInfo, localResources); } map.setTaskLocalResources(localResources); return map; }
public void configure(JobConf job) { super.configure(job); String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName(); this.ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()); }