public static String runTask(String[] args) throws Exception { final String schema = args[0]; final String workingPath = args[1]; final String segmentOutputPath = args[2]; final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.jsonMapper.readValue(schema, HadoopIngestionSpec.class); final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSchema( theSchema .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath)) .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath))); Jobby job = new HadoopDruidDetermineConfigurationJob(config); log.info("Starting a hadoop determine configuration job..."); if (job.run()) { return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(config.getSchema()); } return null; }
@Override public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException { final Set<Interval> intervals = Sets.newTreeSet(Comparators.intervals()); Optional<Set<Interval>> optionalIntervals = config.getSegmentGranularIntervals(); if (optionalIntervals.isPresent()) { for (Interval segmentInterval : optionalIntervals.get()) { for (Interval dataInterval : dataGranularity.getIterable(segmentInterval)) { intervals.add(dataInterval); } } } Path betaInput = new Path(inputPath); FileSystem fs = betaInput.getFileSystem(job.getConfiguration()); Set<String> paths = Sets.newTreeSet(); Pattern fileMatcher = Pattern.compile(filePattern); DateTimeFormatter customFormatter = null; if (pathFormat != null) { customFormatter = DateTimeFormat.forPattern(pathFormat); } for (Interval interval : intervals) { DateTime t = interval.getStart(); String intervalPath = null; if (customFormatter != null) { intervalPath = customFormatter.print(t); } else { intervalPath = dataGranularity.toPath(t); } Path granularPath = new Path(betaInput, intervalPath); log.info("Checking path[%s]", granularPath); for (FileStatus status : FSSpideringIterator.spiderIterable(fs, granularPath)) { final Path filePath = status.getPath(); if (fileMatcher.matcher(filePath.toString()).matches()) { paths.add(filePath.toString()); } } } for (String path : paths) { log.info("Appending path[%s]", path); FileInputFormat.addInputPath(job, new Path(path)); } return job; }
public static String runTask(String[] args) throws Exception { final String schema = args[0]; String version = args[1]; final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.jsonMapper.readValue(schema, HadoopIngestionSpec.class); final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSchema( theSchema.withTuningConfig(theSchema.getTuningConfig().withVersion(version))); HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(config); log.info("Starting a hadoop index generator job..."); if (job.run()) { return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(job.getPublishedSegments()); } return null; }