public SqoopOutputFormatLoadExecutorSpark(JobContext jobctx) { context = jobctx; loaderName = context.getConfiguration().get(MRJobConstants.JOB_ETL_LOADER); writer = new SqoopRecordWriter(); // jackh: This must be conditional - Extract schema using credentials in case of MR and simply // extract from the // credentials object in case of Spark (due to known issue with Hadoop/Spark that the // credentials are never added // for serialization) // matcher = MatcherFactory.getMatcher( // MRConfigurationUtils.getConnectorSchema(Direction.FROM, context.getConfiguration()), // MRConfigurationUtils.getConnectorSchema(Direction.TO, context.getConfiguration())); matcher = MatcherFactory.getMatcher( MRConfigurationUtils.getConnectorSchemaUnsafe( Direction.FROM, context.getConfiguration()), MRConfigurationUtils.getConnectorSchemaUnsafe( Direction.TO, context.getConfiguration())); toDataFormat = (IntermediateDataFormat<?>) ClassUtils.instantiate( context.getConfiguration().get(MRJobConstants.TO_INTERMEDIATE_DATA_FORMAT)); // Using the TO schema since the SqoopDataWriter in the SqoopMapper encapsulates the // toDataFormat toDataFormat.setSchema(matcher.getToSchema()); }
@SuppressWarnings({"unchecked", "rawtypes"}) @Override public void run(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String extractorName = conf.get(MRJobConstants.JOB_ETL_EXTRACTOR); Extractor extractor = (Extractor) ClassUtils.instantiate(extractorName); Schema fromSchema = MRConfigurationUtils.getConnectorSchema(Direction.FROM, conf); Schema toSchema = MRConfigurationUtils.getConnectorSchema(Direction.TO, conf); matcher = MatcherFactory.getMatcher(fromSchema, toSchema); String fromIDFClass = conf.get(MRJobConstants.FROM_INTERMEDIATE_DATA_FORMAT); fromIDF = (IntermediateDataFormat<Object>) ClassUtils.instantiate(fromIDFClass); fromIDF.setSchema(matcher.getFromSchema()); String toIDFClass = conf.get(MRJobConstants.TO_INTERMEDIATE_DATA_FORMAT); toIDF = (IntermediateDataFormat<Object>) ClassUtils.instantiate(toIDFClass); toIDF.setSchema(matcher.getToSchema()); // Objects that should be passed to the Executor execution PrefixContext subContext = new PrefixContext(conf, MRJobConstants.PREFIX_CONNECTOR_FROM_CONTEXT); Object fromConfig = MRConfigurationUtils.getConnectorLinkConfig(Direction.FROM, conf); Object fromJob = MRConfigurationUtils.getConnectorJobConfig(Direction.FROM, conf); SqoopSplit split = context.getCurrentKey(); ExtractorContext extractorContext = new ExtractorContext( subContext, new SqoopDataWriter(context, fromIDF, toIDF, matcher), fromSchema); try { LOG.info("Running extractor class " + extractorName); extractor.extract(extractorContext, fromConfig, fromJob, split.getPartition()); LOG.info("Extractor has finished"); context.getCounter(SqoopCounters.ROWS_READ).increment(extractor.getRowsRead()); } catch (Exception e) { throw new SqoopException(MRExecutionError.MAPRED_EXEC_0017, e); } finally { LOG.info("Stopping progress service"); } }
@SuppressWarnings({"rawtypes", "unchecked"}) @Override public void run() { LOG.info("SqoopOutputFormatLoadExecutor consumer thread is starting"); try { DataReader reader = new SqoopOutputFormatDataReader(); Configuration conf = context.getConfiguration(); Loader loader = (Loader) ClassUtils.instantiate(loaderName); // Objects that should be passed to the Loader PrefixContext subContext = new PrefixContext(conf, MRJobConstants.PREFIX_CONNECTOR_TO_CONTEXT); Object connectorLinkConfig = MRConfigurationUtils.getConnectorLinkConfigUnsafe(Direction.TO, conf); Object connectorToJobConfig = MRConfigurationUtils.getConnectorJobConfigUnsafe(Direction.TO, conf); // Using the TO schema since the SqoopDataWriter in the SqoopMapper // encapsulates the toDataFormat // Create loader context LoaderContext loaderContext = new LoaderContext(subContext, reader, matcher.getToSchema()); LOG.info("Running loader class " + loaderName); loader.load(loaderContext, connectorLinkConfig, connectorToJobConfig); LOG.info("Loader has finished"); ((TaskAttemptContext) jobctx) .getCounter(SqoopCounters.ROWS_WRITTEN) .increment(loader.getRowsWritten()); } catch (Throwable t) { readerFinished = true; LOG.error("Error while loading data out of MR job.", t); // Release so that the writer can tell Sqoop something went // wrong. free.release(); throw new SqoopException(SparkExecutionError.SPARK_EXEC_0018, t); } // if no exception happens yet and reader finished before writer, // something went wrong if (!writerFinished) { // throw exception if data are not all consumed readerFinished = true; LOG.error("Reader terminated, but writer is still running!"); // Release so that the writer can tell Sqoop something went // wrong. free.release(); throw new SqoopException(SparkExecutionError.SPARK_EXEC_0019); } // inform writer that reader is finished readerFinished = true; }