public SqoopOutputFormatLoadExecutorSpark(JobContext jobctx) { context = jobctx; loaderName = context.getConfiguration().get(MRJobConstants.JOB_ETL_LOADER); writer = new SqoopRecordWriter(); // jackh: This must be conditional - Extract schema using credentials in case of MR and simply // extract from the // credentials object in case of Spark (due to known issue with Hadoop/Spark that the // credentials are never added // for serialization) // matcher = MatcherFactory.getMatcher( // MRConfigurationUtils.getConnectorSchema(Direction.FROM, context.getConfiguration()), // MRConfigurationUtils.getConnectorSchema(Direction.TO, context.getConfiguration())); matcher = MatcherFactory.getMatcher( MRConfigurationUtils.getConnectorSchemaUnsafe( Direction.FROM, context.getConfiguration()), MRConfigurationUtils.getConnectorSchemaUnsafe( Direction.TO, context.getConfiguration())); toDataFormat = (IntermediateDataFormat<?>) ClassUtils.instantiate( context.getConfiguration().get(MRJobConstants.TO_INTERMEDIATE_DATA_FORMAT)); // Using the TO schema since the SqoopDataWriter in the SqoopMapper encapsulates the // toDataFormat toDataFormat.setSchema(matcher.getToSchema()); }
@Override public void write(IntermediateDataFormat<?> key, Integer value) throws InterruptedException { free.acquire(); checkIfConsumerThrew(); // NOTE: this is the place where data written from SqoopMapper writable is available to the // SqoopOutputFormat toDataFormat.setCSVTextData(key.toString()); filled.release(); }
@SuppressWarnings({"unchecked", "rawtypes"}) @Override public void run(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String extractorName = conf.get(MRJobConstants.JOB_ETL_EXTRACTOR); Extractor extractor = (Extractor) ClassUtils.instantiate(extractorName); Schema fromSchema = MRConfigurationUtils.getConnectorSchema(Direction.FROM, conf); Schema toSchema = MRConfigurationUtils.getConnectorSchema(Direction.TO, conf); matcher = MatcherFactory.getMatcher(fromSchema, toSchema); String fromIDFClass = conf.get(MRJobConstants.FROM_INTERMEDIATE_DATA_FORMAT); fromIDF = (IntermediateDataFormat<Object>) ClassUtils.instantiate(fromIDFClass); fromIDF.setSchema(matcher.getFromSchema()); String toIDFClass = conf.get(MRJobConstants.TO_INTERMEDIATE_DATA_FORMAT); toIDF = (IntermediateDataFormat<Object>) ClassUtils.instantiate(toIDFClass); toIDF.setSchema(matcher.getToSchema()); // Objects that should be passed to the Executor execution PrefixContext subContext = new PrefixContext(conf, MRJobConstants.PREFIX_CONNECTOR_FROM_CONTEXT); Object fromConfig = MRConfigurationUtils.getConnectorLinkConfig(Direction.FROM, conf); Object fromJob = MRConfigurationUtils.getConnectorJobConfig(Direction.FROM, conf); SqoopSplit split = context.getCurrentKey(); ExtractorContext extractorContext = new ExtractorContext( subContext, new SqoopDataWriter(context, fromIDF, toIDF, matcher), fromSchema); try { LOG.info("Running extractor class " + extractorName); extractor.extract(extractorContext, fromConfig, fromJob, split.getPartition()); LOG.info("Extractor has finished"); context.getCounter(SqoopCounters.ROWS_READ).increment(extractor.getRowsRead()); } catch (Exception e) { throw new SqoopException(MRExecutionError.MAPRED_EXEC_0017, e); } finally { LOG.info("Stopping progress service"); } }