public static String runTask(String[] args) throws Exception { final String schema = args[0]; String version = args[1]; final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.jsonMapper.readValue(schema, HadoopIngestionSpec.class); final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSchema( theSchema.withTuningConfig(theSchema.getTuningConfig().withVersion(version))); HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(config); log.info("Starting a hadoop index generator job..."); if (job.run()) { return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(job.getPublishedSegments()); } return null; }
@Override public boolean isReady(TaskActionClient taskActionClient) throws Exception { Optional<SortedSet<Interval>> intervals = spec.getDataSchema().getGranularitySpec().bucketIntervals(); if (intervals.isPresent()) { Interval interval = JodaUtils.umbrellaInterval(JodaUtils.condenseIntervals(intervals.get())); return taskActionClient.submit(new LockTryAcquireAction(interval)).isPresent(); } else { return true; } }
public static String runTask(String[] args) throws Exception { final String schema = args[0]; final String workingPath = args[1]; final String segmentOutputPath = args[2]; final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.jsonMapper.readValue(schema, HadoopIngestionSpec.class); final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSchema( theSchema .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath)) .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath))); Jobby job = new HadoopDruidDetermineConfigurationJob(config); log.info("Starting a hadoop determine configuration job..."); if (job.run()) { return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(config.getSchema()); } return null; }
private static String getTheDataSource(HadoopIngestionSpec spec, HadoopIngestionSpec config) { if (spec != null) { return spec.getDataSchema().getDataSource(); } return config.getDataSchema().getDataSource(); }
@SuppressWarnings("unchecked") @Override public TaskStatus run(TaskToolbox toolbox) throws Exception { final List<String> finalHadoopDependencyCoordinates = hadoopDependencyCoordinates != null ? hadoopDependencyCoordinates : toolbox.getConfig().getDefaultHadoopCoordinates(); final DefaultTeslaAether aetherClient = Initialization.getAetherClient(extensionsConfig); final List<URL> extensionURLs = Lists.newArrayList(); for (String coordinate : extensionsConfig.getCoordinates()) { final ClassLoader coordinateLoader = Initialization.getClassLoaderForCoordinates(aetherClient, coordinate); extensionURLs.addAll(Arrays.asList(((URLClassLoader) coordinateLoader).getURLs())); } final List<URL> nonHadoopURLs = Lists.newArrayList(); nonHadoopURLs.addAll( Arrays.asList(((URLClassLoader) HadoopIndexTask.class.getClassLoader()).getURLs())); final List<URL> driverURLs = Lists.newArrayList(); driverURLs.addAll(nonHadoopURLs); // put hadoop dependencies last to avoid jets3t & apache.httpcore version conflicts for (String hadoopDependencyCoordinate : finalHadoopDependencyCoordinates) { final ClassLoader hadoopLoader = Initialization.getClassLoaderForCoordinates(aetherClient, hadoopDependencyCoordinate); driverURLs.addAll(Arrays.asList(((URLClassLoader) hadoopLoader).getURLs())); } final URLClassLoader loader = new URLClassLoader(driverURLs.toArray(new URL[driverURLs.size()]), null); Thread.currentThread().setContextClassLoader(loader); final List<URL> jobUrls = Lists.newArrayList(); jobUrls.addAll(nonHadoopURLs); jobUrls.addAll(extensionURLs); System.setProperty( "druid.hadoop.internal.classpath", Joiner.on(File.pathSeparator).join(jobUrls)); boolean determineIntervals = !spec.getDataSchema().getGranularitySpec().bucketIntervals().isPresent(); final Class<?> determineConfigurationMainClass = loader.loadClass(HadoopDetermineConfigInnerProcessing.class.getName()); final Method determineConfigurationMainMethod = determineConfigurationMainClass.getMethod("runTask", String[].class); String[] determineConfigArgs = new String[] { toolbox.getObjectMapper().writeValueAsString(spec), toolbox.getConfig().getHadoopWorkingPath(), toolbox.getSegmentPusher().getPathForHadoop(getDataSource()) }; String config = (String) determineConfigurationMainMethod.invoke(null, new Object[] {determineConfigArgs}); HadoopIngestionSpec indexerSchema = toolbox.getObjectMapper().readValue(config, HadoopIngestionSpec.class); // We should have a lock from before we started running only if interval was specified final String version; if (determineIntervals) { Interval interval = JodaUtils.umbrellaInterval( JodaUtils.condenseIntervals( indexerSchema.getDataSchema().getGranularitySpec().bucketIntervals().get())); TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval)); version = lock.getVersion(); } else { Iterable<TaskLock> locks = getTaskLocks(toolbox); final TaskLock myLock = Iterables.getOnlyElement(locks); version = myLock.getVersion(); } log.info("Setting version to: %s", version); final Class<?> indexGeneratorMainClass = loader.loadClass(HadoopIndexGeneratorInnerProcessing.class.getName()); final Method indexGeneratorMainMethod = indexGeneratorMainClass.getMethod("runTask", String[].class); String[] indexGeneratorArgs = new String[] {toolbox.getObjectMapper().writeValueAsString(indexerSchema), version}; String segments = (String) indexGeneratorMainMethod.invoke(null, new Object[] {indexGeneratorArgs}); if (segments != null) { List<DataSegment> publishedSegments = toolbox.getObjectMapper().readValue(segments, new TypeReference<List<DataSegment>>() {}); toolbox.pushSegments(publishedSegments); return TaskStatus.success(getId()); } else { return TaskStatus.failure(getId()); } }