public SqoopOutputFormatLoadExecutorSpark(JobContext jobctx) {
   context = jobctx;
   loaderName = context.getConfiguration().get(MRJobConstants.JOB_ETL_LOADER);
   writer = new SqoopRecordWriter();
   // jackh: This must be conditional - Extract schema using credentials in case of MR and simply
   // extract from the
   // credentials object in case of Spark (due to known issue with Hadoop/Spark that the
   // credentials are never added
   // for serialization)
   // matcher = MatcherFactory.getMatcher(
   // MRConfigurationUtils.getConnectorSchema(Direction.FROM, context.getConfiguration()),
   // MRConfigurationUtils.getConnectorSchema(Direction.TO, context.getConfiguration()));
   matcher =
       MatcherFactory.getMatcher(
           MRConfigurationUtils.getConnectorSchemaUnsafe(
               Direction.FROM, context.getConfiguration()),
           MRConfigurationUtils.getConnectorSchemaUnsafe(
               Direction.TO, context.getConfiguration()));
   toDataFormat =
       (IntermediateDataFormat<?>)
           ClassUtils.instantiate(
               context.getConfiguration().get(MRJobConstants.TO_INTERMEDIATE_DATA_FORMAT));
   // Using the TO schema since the SqoopDataWriter in the SqoopMapper encapsulates the
   // toDataFormat
   toDataFormat.setSchema(matcher.getToSchema());
 }
Exemplo n.º 2
0
  @SuppressWarnings({"unchecked", "rawtypes"})
  @Override
  public void run(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();

    String extractorName = conf.get(MRJobConstants.JOB_ETL_EXTRACTOR);
    Extractor extractor = (Extractor) ClassUtils.instantiate(extractorName);

    Schema fromSchema = MRConfigurationUtils.getConnectorSchema(Direction.FROM, conf);
    Schema toSchema = MRConfigurationUtils.getConnectorSchema(Direction.TO, conf);
    matcher = MatcherFactory.getMatcher(fromSchema, toSchema);

    String fromIDFClass = conf.get(MRJobConstants.FROM_INTERMEDIATE_DATA_FORMAT);
    fromIDF = (IntermediateDataFormat<Object>) ClassUtils.instantiate(fromIDFClass);
    fromIDF.setSchema(matcher.getFromSchema());
    String toIDFClass = conf.get(MRJobConstants.TO_INTERMEDIATE_DATA_FORMAT);
    toIDF = (IntermediateDataFormat<Object>) ClassUtils.instantiate(toIDFClass);
    toIDF.setSchema(matcher.getToSchema());

    // Objects that should be passed to the Executor execution
    PrefixContext subContext =
        new PrefixContext(conf, MRJobConstants.PREFIX_CONNECTOR_FROM_CONTEXT);
    Object fromConfig = MRConfigurationUtils.getConnectorLinkConfig(Direction.FROM, conf);
    Object fromJob = MRConfigurationUtils.getConnectorJobConfig(Direction.FROM, conf);

    SqoopSplit split = context.getCurrentKey();
    ExtractorContext extractorContext =
        new ExtractorContext(
            subContext, new SqoopDataWriter(context, fromIDF, toIDF, matcher), fromSchema);

    try {
      LOG.info("Running extractor class " + extractorName);
      extractor.extract(extractorContext, fromConfig, fromJob, split.getPartition());
      LOG.info("Extractor has finished");
      context.getCounter(SqoopCounters.ROWS_READ).increment(extractor.getRowsRead());
    } catch (Exception e) {
      throw new SqoopException(MRExecutionError.MAPRED_EXEC_0017, e);
    } finally {
      LOG.info("Stopping progress service");
    }
  }
    @SuppressWarnings({"rawtypes", "unchecked"})
    @Override
    public void run() {
      LOG.info("SqoopOutputFormatLoadExecutor consumer thread is starting");
      try {
        DataReader reader = new SqoopOutputFormatDataReader();
        Configuration conf = context.getConfiguration();
        Loader loader = (Loader) ClassUtils.instantiate(loaderName);

        // Objects that should be passed to the Loader
        PrefixContext subContext =
            new PrefixContext(conf, MRJobConstants.PREFIX_CONNECTOR_TO_CONTEXT);
        Object connectorLinkConfig =
            MRConfigurationUtils.getConnectorLinkConfigUnsafe(Direction.TO, conf);
        Object connectorToJobConfig =
            MRConfigurationUtils.getConnectorJobConfigUnsafe(Direction.TO, conf);
        // Using the TO schema since the SqoopDataWriter in the SqoopMapper
        // encapsulates the toDataFormat

        // Create loader context
        LoaderContext loaderContext = new LoaderContext(subContext, reader, matcher.getToSchema());

        LOG.info("Running loader class " + loaderName);
        loader.load(loaderContext, connectorLinkConfig, connectorToJobConfig);
        LOG.info("Loader has finished");
        ((TaskAttemptContext) jobctx)
            .getCounter(SqoopCounters.ROWS_WRITTEN)
            .increment(loader.getRowsWritten());

      } catch (Throwable t) {
        readerFinished = true;
        LOG.error("Error while loading data out of MR job.", t);
        // Release so that the writer can tell Sqoop something went
        // wrong.
        free.release();
        throw new SqoopException(SparkExecutionError.SPARK_EXEC_0018, t);
      }

      // if no exception happens yet and reader finished before writer,
      // something went wrong
      if (!writerFinished) {
        // throw exception if data are not all consumed
        readerFinished = true;
        LOG.error("Reader terminated, but writer is still running!");
        // Release so that the writer can tell Sqoop something went
        // wrong.
        free.release();
        throw new SqoopException(SparkExecutionError.SPARK_EXEC_0019);
      }
      // inform writer that reader is finished
      readerFinished = true;
    }