Beispiel #1
0
    public static String runTask(String[] args) throws Exception {
      final String schema = args[0];
      String version = args[1];

      final HadoopIngestionSpec theSchema =
          HadoopDruidIndexerConfig.jsonMapper.readValue(schema, HadoopIngestionSpec.class);
      final HadoopDruidIndexerConfig config =
          HadoopDruidIndexerConfig.fromSchema(
              theSchema.withTuningConfig(theSchema.getTuningConfig().withVersion(version)));

      HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(config);

      log.info("Starting a hadoop index generator job...");
      if (job.run()) {
        return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(job.getPublishedSegments());
      }

      return null;
    }
Beispiel #2
0
 @Override
 public boolean isReady(TaskActionClient taskActionClient) throws Exception {
   Optional<SortedSet<Interval>> intervals =
       spec.getDataSchema().getGranularitySpec().bucketIntervals();
   if (intervals.isPresent()) {
     Interval interval = JodaUtils.umbrellaInterval(JodaUtils.condenseIntervals(intervals.get()));
     return taskActionClient.submit(new LockTryAcquireAction(interval)).isPresent();
   } else {
     return true;
   }
 }
Beispiel #3
0
    public static String runTask(String[] args) throws Exception {
      final String schema = args[0];
      final String workingPath = args[1];
      final String segmentOutputPath = args[2];

      final HadoopIngestionSpec theSchema =
          HadoopDruidIndexerConfig.jsonMapper.readValue(schema, HadoopIngestionSpec.class);
      final HadoopDruidIndexerConfig config =
          HadoopDruidIndexerConfig.fromSchema(
              theSchema
                  .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath))
                  .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath)));

      Jobby job = new HadoopDruidDetermineConfigurationJob(config);

      log.info("Starting a hadoop determine configuration job...");
      if (job.run()) {
        return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(config.getSchema());
      }

      return null;
    }
Beispiel #4
0
 private static String getTheDataSource(HadoopIngestionSpec spec, HadoopIngestionSpec config) {
   if (spec != null) {
     return spec.getDataSchema().getDataSource();
   }
   return config.getDataSchema().getDataSource();
 }
Beispiel #5
0
  @SuppressWarnings("unchecked")
  @Override
  public TaskStatus run(TaskToolbox toolbox) throws Exception {
    final List<String> finalHadoopDependencyCoordinates =
        hadoopDependencyCoordinates != null
            ? hadoopDependencyCoordinates
            : toolbox.getConfig().getDefaultHadoopCoordinates();

    final DefaultTeslaAether aetherClient = Initialization.getAetherClient(extensionsConfig);

    final List<URL> extensionURLs = Lists.newArrayList();
    for (String coordinate : extensionsConfig.getCoordinates()) {
      final ClassLoader coordinateLoader =
          Initialization.getClassLoaderForCoordinates(aetherClient, coordinate);
      extensionURLs.addAll(Arrays.asList(((URLClassLoader) coordinateLoader).getURLs()));
    }

    final List<URL> nonHadoopURLs = Lists.newArrayList();
    nonHadoopURLs.addAll(
        Arrays.asList(((URLClassLoader) HadoopIndexTask.class.getClassLoader()).getURLs()));

    final List<URL> driverURLs = Lists.newArrayList();
    driverURLs.addAll(nonHadoopURLs);
    // put hadoop dependencies last to avoid jets3t & apache.httpcore version conflicts
    for (String hadoopDependencyCoordinate : finalHadoopDependencyCoordinates) {
      final ClassLoader hadoopLoader =
          Initialization.getClassLoaderForCoordinates(aetherClient, hadoopDependencyCoordinate);
      driverURLs.addAll(Arrays.asList(((URLClassLoader) hadoopLoader).getURLs()));
    }

    final URLClassLoader loader =
        new URLClassLoader(driverURLs.toArray(new URL[driverURLs.size()]), null);
    Thread.currentThread().setContextClassLoader(loader);

    final List<URL> jobUrls = Lists.newArrayList();
    jobUrls.addAll(nonHadoopURLs);
    jobUrls.addAll(extensionURLs);

    System.setProperty(
        "druid.hadoop.internal.classpath", Joiner.on(File.pathSeparator).join(jobUrls));
    boolean determineIntervals =
        !spec.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();

    final Class<?> determineConfigurationMainClass =
        loader.loadClass(HadoopDetermineConfigInnerProcessing.class.getName());
    final Method determineConfigurationMainMethod =
        determineConfigurationMainClass.getMethod("runTask", String[].class);

    String[] determineConfigArgs =
        new String[] {
          toolbox.getObjectMapper().writeValueAsString(spec),
          toolbox.getConfig().getHadoopWorkingPath(),
          toolbox.getSegmentPusher().getPathForHadoop(getDataSource())
        };

    String config =
        (String) determineConfigurationMainMethod.invoke(null, new Object[] {determineConfigArgs});
    HadoopIngestionSpec indexerSchema =
        toolbox.getObjectMapper().readValue(config, HadoopIngestionSpec.class);

    // We should have a lock from before we started running only if interval was specified
    final String version;
    if (determineIntervals) {
      Interval interval =
          JodaUtils.umbrellaInterval(
              JodaUtils.condenseIntervals(
                  indexerSchema.getDataSchema().getGranularitySpec().bucketIntervals().get()));
      TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
      version = lock.getVersion();
    } else {
      Iterable<TaskLock> locks = getTaskLocks(toolbox);
      final TaskLock myLock = Iterables.getOnlyElement(locks);
      version = myLock.getVersion();
    }
    log.info("Setting version to: %s", version);

    final Class<?> indexGeneratorMainClass =
        loader.loadClass(HadoopIndexGeneratorInnerProcessing.class.getName());
    final Method indexGeneratorMainMethod =
        indexGeneratorMainClass.getMethod("runTask", String[].class);
    String[] indexGeneratorArgs =
        new String[] {toolbox.getObjectMapper().writeValueAsString(indexerSchema), version};
    String segments =
        (String) indexGeneratorMainMethod.invoke(null, new Object[] {indexGeneratorArgs});

    if (segments != null) {

      List<DataSegment> publishedSegments =
          toolbox.getObjectMapper().readValue(segments, new TypeReference<List<DataSegment>>() {});

      toolbox.pushSegments(publishedSegments);
      return TaskStatus.success(getId());
    } else {
      return TaskStatus.failure(getId());
    }
  }