Java TaskToolbox Examples

Programming Language: Java

Namespace/Package Name: io.druid.indexing.common

Class/Type: TaskToolbox

Examples at hotexamples.com: 4

Java TaskToolbox - 4 examples found. These are the top rated real world Java examples of io.druid.indexing.common.TaskToolbox extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getTaskActionClient(2)

getSegmentPusher(2)

getTaskWorkDir(2)

pushSegments(2)

getConfig(1)

getEmitter(1)

getMonitorScheduler(1)

getNewSegmentServerView(1)

getObjectMapper(1)

getQueryExecutorService(1)

getQueryRunnerFactoryConglomerate(1)

Example #1

Show file

File: ThreadPoolTaskRunner.java Project: phungleson/druid

    @Override
    public TaskStatus call() {
      final long startTime = System.currentTimeMillis();
      final File taskDir = toolbox.getTaskWorkDir();

      TaskStatus status;

      try {
        log.info("Running task: %s", task.getId());
        status = task.run(toolbox);
      } catch (InterruptedException e) {
        log.error(e, "Interrupted while running task[%s]", task);
        throw Throwables.propagate(e);
      } catch (Exception e) {
        log.error(e, "Exception while running task[%s]", task);
        status = TaskStatus.failure(task.getId());
      } catch (Throwable t) {
        log.error(t, "Uncaught Throwable while running task[%s]", task);
        throw Throwables.propagate(t);
      }

      try {
        if (taskDir.exists()) {
          log.info("Removing task directory: %s", taskDir);
          FileUtils.deleteDirectory(taskDir);
        }
      } catch (Exception e) {
        log.makeAlert(e, "Failed to delete task directory")
            .addData("taskDir", taskDir.toString())
            .addData("task", task.getId())
            .emit();
      }

      try {
        return status.withDuration(System.currentTimeMillis() - startTime);
      } catch (Exception e) {
        log.error(e, "Uncaught Exception during callback for task[%s]", task);
        throw Throwables.propagate(e);
      }
    }

Example #2

Show file

File: RealtimeIndexTask.java Project: kevinhsu/druid

 @Override
 public void publishSegment(DataSegment segment) throws IOException {
   taskToolbox.pushSegments(ImmutableList.of(segment));
 }

Example #3

Show file

File: RealtimeIndexTask.java Project: kevinhsu/druid

  @Override
  public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    if (this.plumber != null) {
      throw new IllegalStateException("WTF?!? run with non-null plumber??!");
    }

    // Shed any locks we might have (e.g. if we were uncleanly killed and restarted) since we'll
    // reacquire
    // them if we actually need them
    for (final TaskLock taskLock : getTaskLocks(toolbox)) {
      toolbox.getTaskActionClient().submit(new LockReleaseAction(taskLock.getInterval()));
    }

    boolean normalExit = true;

    // Set up firehose
    final Period intermediatePersistPeriod = fireDepartmentConfig.getIntermediatePersistPeriod();
    final Firehose firehose = firehoseFactory.connect();

    // It would be nice to get the PlumberSchool in the constructor.  Although that will need
    // jackson injectables for
    // stuff like the ServerView, which seems kind of odd?  Perhaps revisit this when Guice has been
    // introduced.
    final RealtimePlumberSchool realtimePlumberSchool =
        new RealtimePlumberSchool(
            windowPeriod, new File(toolbox.getTaskWorkDir(), "persist"), segmentGranularity);
    realtimePlumberSchool.setDefaultMaxPendingPersists(maxPendingPersists);

    final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(this, toolbox);

    // NOTE: We talk to the coordinator in various places in the plumber and we could be more robust
    // to issues
    // with the coordinator.  Right now, we'll block/throw in whatever thread triggered the
    // coordinator behavior,
    // which will typically be either the main data processing loop or the persist thread.

    // Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
    final DataSegmentAnnouncer lockingSegmentAnnouncer =
        new DataSegmentAnnouncer() {
          @Override
          public void announceSegment(final DataSegment segment) throws IOException {
            // Side effect: Calling announceSegment causes a lock to be acquired
            toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
            toolbox.getSegmentAnnouncer().announceSegment(segment);
          }

          @Override
          public void unannounceSegment(final DataSegment segment) throws IOException {
            try {
              toolbox.getSegmentAnnouncer().unannounceSegment(segment);
            } finally {
              toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
            }
          }

          @Override
          public void announceSegments(Iterable<DataSegment> segments) throws IOException {
            // Side effect: Calling announceSegments causes locks to be acquired
            for (DataSegment segment : segments) {
              toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
            }
            toolbox.getSegmentAnnouncer().announceSegments(segments);
          }

          @Override
          public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
            try {
              toolbox.getSegmentAnnouncer().unannounceSegments(segments);
            } finally {
              for (DataSegment segment : segments) {
                toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
              }
            }
          }
        };

    // NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
    // NOTE: (and thus the firehose)

    // Shouldn't usually happen, since we don't expect people to submit tasks that intersect with
    // the
    // realtime window, but if they do it can be problematic. If we decide to care, we can use more
    // threads in
    // the plumber such that waiting for the coordinator doesn't block data processing.
    final VersioningPolicy versioningPolicy =
        new VersioningPolicy() {
          @Override
          public String getVersion(final Interval interval) {
            try {
              // Side effect: Calling getVersion causes a lock to be acquired
              final TaskLock myLock =
                  toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));

              return myLock.getVersion();
            } catch (IOException e) {
              throw Throwables.propagate(e);
            }
          }
        };

    // NOTE: This pusher selects path based purely on global configuration and the DataSegment,
    // which means
    // NOTE: that redundant realtime tasks will upload to the same location. This can cause
    // index.zip and
    // NOTE: descriptor.json to mismatch, or it can cause historical nodes to load different
    // instances of the
    // NOTE: "same" segment.
    realtimePlumberSchool.setDataSegmentPusher(toolbox.getSegmentPusher());
    realtimePlumberSchool.setConglomerate(toolbox.getQueryRunnerFactoryConglomerate());
    realtimePlumberSchool.setQueryExecutorService(toolbox.getQueryExecutorService());
    realtimePlumberSchool.setVersioningPolicy(versioningPolicy);
    realtimePlumberSchool.setSegmentAnnouncer(lockingSegmentAnnouncer);
    realtimePlumberSchool.setSegmentPublisher(segmentPublisher);
    realtimePlumberSchool.setServerView(toolbox.getNewSegmentServerView());
    realtimePlumberSchool.setEmitter(toolbox.getEmitter());

    if (this.rejectionPolicyFactory != null) {
      realtimePlumberSchool.setRejectionPolicyFactory(rejectionPolicyFactory);
    }

    final FireDepartment fireDepartment =
        new FireDepartment(schema, fireDepartmentConfig, null, null);
    final RealtimeMetricsMonitor metricsMonitor =
        new RealtimeMetricsMonitor(ImmutableList.of(fireDepartment));
    this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
    this.plumber = realtimePlumberSchool.findPlumber(schema, fireDepartment.getMetrics());

    try {
      plumber.startJob();

      // Set up metrics emission
      toolbox.getMonitorScheduler().addMonitor(metricsMonitor);

      // Time to read data!
      long nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
      while (firehose.hasMore()) {
        final InputRow inputRow;
        try {
          inputRow = firehose.nextRow();
          if (inputRow == null) {
            continue;
          }

          final Sink sink = plumber.getSink(inputRow.getTimestampFromEpoch());
          if (sink == null) {
            fireDepartment.getMetrics().incrementThrownAway();
            log.debug("Throwing away event[%s]", inputRow);

            if (System.currentTimeMillis() > nextFlush) {
              plumber.persist(firehose.commit());
              nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
            }

            continue;
          }

          if (sink.isEmpty()) {
            log.info("Task %s: New sink: %s", getId(), sink);
          }

          int currCount = sink.add(inputRow);
          fireDepartment.getMetrics().incrementProcessed();
          if (currCount >= fireDepartmentConfig.getMaxRowsInMemory()
              || System.currentTimeMillis() > nextFlush) {
            plumber.persist(firehose.commit());
            nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
          }
        } catch (FormattedException e) {
          log.warn(e, "unparseable line");
          fireDepartment.getMetrics().incrementUnparseable();
        }
      }
    } catch (Throwable e) {
      normalExit = false;
      log.makeAlert(e, "Exception aborted realtime processing[%s]", schema.getDataSource()).emit();
      throw e;
    } finally {
      if (normalExit) {
        try {
          plumber.persist(firehose.commit());
          plumber.finishJob();
        } catch (Exception e) {
          log.makeAlert(e, "Failed to finish realtime task").emit();
        } finally {
          Closeables.closeQuietly(firehose);
          toolbox.getMonitorScheduler().removeMonitor(metricsMonitor);
        }
      }
    }

    return TaskStatus.success(getId());
  }

Example #4

Show file

File: HadoopIndexTask.java Project: phungleson/druid

  @SuppressWarnings("unchecked")
  @Override
  public TaskStatus run(TaskToolbox toolbox) throws Exception {
    final List<String> finalHadoopDependencyCoordinates =
        hadoopDependencyCoordinates != null
            ? hadoopDependencyCoordinates
            : toolbox.getConfig().getDefaultHadoopCoordinates();

    final DefaultTeslaAether aetherClient = Initialization.getAetherClient(extensionsConfig);

    final List<URL> extensionURLs = Lists.newArrayList();
    for (String coordinate : extensionsConfig.getCoordinates()) {
      final ClassLoader coordinateLoader =
          Initialization.getClassLoaderForCoordinates(aetherClient, coordinate);
      extensionURLs.addAll(Arrays.asList(((URLClassLoader) coordinateLoader).getURLs()));
    }

    final List<URL> nonHadoopURLs = Lists.newArrayList();
    nonHadoopURLs.addAll(
        Arrays.asList(((URLClassLoader) HadoopIndexTask.class.getClassLoader()).getURLs()));

    final List<URL> driverURLs = Lists.newArrayList();
    driverURLs.addAll(nonHadoopURLs);
    // put hadoop dependencies last to avoid jets3t & apache.httpcore version conflicts
    for (String hadoopDependencyCoordinate : finalHadoopDependencyCoordinates) {
      final ClassLoader hadoopLoader =
          Initialization.getClassLoaderForCoordinates(aetherClient, hadoopDependencyCoordinate);
      driverURLs.addAll(Arrays.asList(((URLClassLoader) hadoopLoader).getURLs()));
    }

    final URLClassLoader loader =
        new URLClassLoader(driverURLs.toArray(new URL[driverURLs.size()]), null);
    Thread.currentThread().setContextClassLoader(loader);

    final List<URL> jobUrls = Lists.newArrayList();
    jobUrls.addAll(nonHadoopURLs);
    jobUrls.addAll(extensionURLs);

    System.setProperty(
        "druid.hadoop.internal.classpath", Joiner.on(File.pathSeparator).join(jobUrls));
    boolean determineIntervals =
        !spec.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();

    final Class<?> determineConfigurationMainClass =
        loader.loadClass(HadoopDetermineConfigInnerProcessing.class.getName());
    final Method determineConfigurationMainMethod =
        determineConfigurationMainClass.getMethod("runTask", String[].class);

    String[] determineConfigArgs =
        new String[] {
          toolbox.getObjectMapper().writeValueAsString(spec),
          toolbox.getConfig().getHadoopWorkingPath(),
          toolbox.getSegmentPusher().getPathForHadoop(getDataSource())
        };

    String config =
        (String) determineConfigurationMainMethod.invoke(null, new Object[] {determineConfigArgs});
    HadoopIngestionSpec indexerSchema =
        toolbox.getObjectMapper().readValue(config, HadoopIngestionSpec.class);

    // We should have a lock from before we started running only if interval was specified
    final String version;
    if (determineIntervals) {
      Interval interval =
          JodaUtils.umbrellaInterval(
              JodaUtils.condenseIntervals(
                  indexerSchema.getDataSchema().getGranularitySpec().bucketIntervals().get()));
      TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
      version = lock.getVersion();
    } else {
      Iterable<TaskLock> locks = getTaskLocks(toolbox);
      final TaskLock myLock = Iterables.getOnlyElement(locks);
      version = myLock.getVersion();
    }
    log.info("Setting version to: %s", version);

    final Class<?> indexGeneratorMainClass =
        loader.loadClass(HadoopIndexGeneratorInnerProcessing.class.getName());
    final Method indexGeneratorMainMethod =
        indexGeneratorMainClass.getMethod("runTask", String[].class);
    String[] indexGeneratorArgs =
        new String[] {toolbox.getObjectMapper().writeValueAsString(indexerSchema), version};
    String segments =
        (String) indexGeneratorMainMethod.invoke(null, new Object[] {indexGeneratorArgs});

    if (segments != null) {

      List<DataSegment> publishedSegments =
          toolbox.getObjectMapper().readValue(segments, new TypeReference<List<DataSegment>>() {});

      toolbox.pushSegments(publishedSegments);
      return TaskStatus.success(getId());
    } else {
      return TaskStatus.failure(getId());
    }
  }