Example #1
0
 @Override
 public boolean isReady(TaskActionClient taskActionClient) throws Exception {
   Optional<SortedSet<Interval>> intervals =
       spec.getDataSchema().getGranularitySpec().bucketIntervals();
   if (intervals.isPresent()) {
     Interval interval = JodaUtils.umbrellaInterval(JodaUtils.condenseIntervals(intervals.get()));
     return taskActionClient.submit(new LockTryAcquireAction(interval)).isPresent();
   } else {
     return true;
   }
 }
 @Override
 public SegmentAnalysis apply(SegmentAnalysis analysis) {
   return new SegmentAnalysis(
       analysis.getId(),
       analysis.getIntervals() != null
           ? JodaUtils.condenseIntervals(analysis.getIntervals())
           : null,
       analysis.getColumns(),
       analysis.getSize(),
       analysis.getNumRows());
 }
 @VisibleForTesting
 public static SegmentAnalysis finalizeAnalysis(SegmentAnalysis analysis) {
   return new SegmentAnalysis(
       analysis.getId(),
       analysis.getIntervals() != null
           ? JodaUtils.condenseIntervals(analysis.getIntervals())
           : null,
       analysis.getColumns(),
       analysis.getSize(),
       analysis.getNumRows(),
       analysis.getAggregators(),
       analysis.getQueryGranularity());
 }
Example #4
0
  @SuppressWarnings("unchecked")
  @Override
  public TaskStatus run(TaskToolbox toolbox) throws Exception {
    final List<String> finalHadoopDependencyCoordinates =
        hadoopDependencyCoordinates != null
            ? hadoopDependencyCoordinates
            : toolbox.getConfig().getDefaultHadoopCoordinates();

    final DefaultTeslaAether aetherClient = Initialization.getAetherClient(extensionsConfig);

    final List<URL> extensionURLs = Lists.newArrayList();
    for (String coordinate : extensionsConfig.getCoordinates()) {
      final ClassLoader coordinateLoader =
          Initialization.getClassLoaderForCoordinates(aetherClient, coordinate);
      extensionURLs.addAll(Arrays.asList(((URLClassLoader) coordinateLoader).getURLs()));
    }

    final List<URL> nonHadoopURLs = Lists.newArrayList();
    nonHadoopURLs.addAll(
        Arrays.asList(((URLClassLoader) HadoopIndexTask.class.getClassLoader()).getURLs()));

    final List<URL> driverURLs = Lists.newArrayList();
    driverURLs.addAll(nonHadoopURLs);
    // put hadoop dependencies last to avoid jets3t & apache.httpcore version conflicts
    for (String hadoopDependencyCoordinate : finalHadoopDependencyCoordinates) {
      final ClassLoader hadoopLoader =
          Initialization.getClassLoaderForCoordinates(aetherClient, hadoopDependencyCoordinate);
      driverURLs.addAll(Arrays.asList(((URLClassLoader) hadoopLoader).getURLs()));
    }

    final URLClassLoader loader =
        new URLClassLoader(driverURLs.toArray(new URL[driverURLs.size()]), null);
    Thread.currentThread().setContextClassLoader(loader);

    final List<URL> jobUrls = Lists.newArrayList();
    jobUrls.addAll(nonHadoopURLs);
    jobUrls.addAll(extensionURLs);

    System.setProperty(
        "druid.hadoop.internal.classpath", Joiner.on(File.pathSeparator).join(jobUrls));
    boolean determineIntervals =
        !spec.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();

    final Class<?> determineConfigurationMainClass =
        loader.loadClass(HadoopDetermineConfigInnerProcessing.class.getName());
    final Method determineConfigurationMainMethod =
        determineConfigurationMainClass.getMethod("runTask", String[].class);

    String[] determineConfigArgs =
        new String[] {
          toolbox.getObjectMapper().writeValueAsString(spec),
          toolbox.getConfig().getHadoopWorkingPath(),
          toolbox.getSegmentPusher().getPathForHadoop(getDataSource())
        };

    String config =
        (String) determineConfigurationMainMethod.invoke(null, new Object[] {determineConfigArgs});
    HadoopIngestionSpec indexerSchema =
        toolbox.getObjectMapper().readValue(config, HadoopIngestionSpec.class);

    // We should have a lock from before we started running only if interval was specified
    final String version;
    if (determineIntervals) {
      Interval interval =
          JodaUtils.umbrellaInterval(
              JodaUtils.condenseIntervals(
                  indexerSchema.getDataSchema().getGranularitySpec().bucketIntervals().get()));
      TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
      version = lock.getVersion();
    } else {
      Iterable<TaskLock> locks = getTaskLocks(toolbox);
      final TaskLock myLock = Iterables.getOnlyElement(locks);
      version = myLock.getVersion();
    }
    log.info("Setting version to: %s", version);

    final Class<?> indexGeneratorMainClass =
        loader.loadClass(HadoopIndexGeneratorInnerProcessing.class.getName());
    final Method indexGeneratorMainMethod =
        indexGeneratorMainClass.getMethod("runTask", String[].class);
    String[] indexGeneratorArgs =
        new String[] {toolbox.getObjectMapper().writeValueAsString(indexerSchema), version};
    String segments =
        (String) indexGeneratorMainMethod.invoke(null, new Object[] {indexGeneratorArgs});

    if (segments != null) {

      List<DataSegment> publishedSegments =
          toolbox.getObjectMapper().readValue(segments, new TypeReference<List<DataSegment>>() {});

      toolbox.pushSegments(publishedSegments);
      return TaskStatus.success(getId());
    } else {
      return TaskStatus.failure(getId());
    }
  }
 @JsonCreator
 public MultipleIntervalSegmentSpec(@JsonProperty("intervals") List<Interval> intervals) {
   this.intervals = Collections.unmodifiableList(JodaUtils.condenseIntervals(intervals));
 }