Exemplo n.º 1
0
    public static String runTask(String[] args) throws Exception {
      final String schema = args[0];
      final String workingPath = args[1];
      final String segmentOutputPath = args[2];

      final HadoopIngestionSpec theSchema =
          HadoopDruidIndexerConfig.jsonMapper.readValue(schema, HadoopIngestionSpec.class);
      final HadoopDruidIndexerConfig config =
          HadoopDruidIndexerConfig.fromSchema(
              theSchema
                  .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath))
                  .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath)));

      Jobby job = new HadoopDruidDetermineConfigurationJob(config);

      log.info("Starting a hadoop determine configuration job...");
      if (job.run()) {
        return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(config.getSchema());
      }

      return null;
    }
Exemplo n.º 2
0
  @Override
  public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException {
    final Set<Interval> intervals = Sets.newTreeSet(Comparators.intervals());
    Optional<Set<Interval>> optionalIntervals = config.getSegmentGranularIntervals();
    if (optionalIntervals.isPresent()) {
      for (Interval segmentInterval : optionalIntervals.get()) {
        for (Interval dataInterval : dataGranularity.getIterable(segmentInterval)) {
          intervals.add(dataInterval);
        }
      }
    }

    Path betaInput = new Path(inputPath);
    FileSystem fs = betaInput.getFileSystem(job.getConfiguration());
    Set<String> paths = Sets.newTreeSet();
    Pattern fileMatcher = Pattern.compile(filePattern);

    DateTimeFormatter customFormatter = null;
    if (pathFormat != null) {
      customFormatter = DateTimeFormat.forPattern(pathFormat);
    }

    for (Interval interval : intervals) {
      DateTime t = interval.getStart();
      String intervalPath = null;
      if (customFormatter != null) {
        intervalPath = customFormatter.print(t);
      } else {
        intervalPath = dataGranularity.toPath(t);
      }

      Path granularPath = new Path(betaInput, intervalPath);
      log.info("Checking path[%s]", granularPath);
      for (FileStatus status : FSSpideringIterator.spiderIterable(fs, granularPath)) {
        final Path filePath = status.getPath();
        if (fileMatcher.matcher(filePath.toString()).matches()) {
          paths.add(filePath.toString());
        }
      }
    }

    for (String path : paths) {
      log.info("Appending path[%s]", path);
      FileInputFormat.addInputPath(job, new Path(path));
    }

    return job;
  }
Exemplo n.º 3
0
    public static String runTask(String[] args) throws Exception {
      final String schema = args[0];
      String version = args[1];

      final HadoopIngestionSpec theSchema =
          HadoopDruidIndexerConfig.jsonMapper.readValue(schema, HadoopIngestionSpec.class);
      final HadoopDruidIndexerConfig config =
          HadoopDruidIndexerConfig.fromSchema(
              theSchema.withTuningConfig(theSchema.getTuningConfig().withVersion(version)));

      HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(config);

      log.info("Starting a hadoop index generator job...");
      if (job.run()) {
        return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(job.getPublishedSegments());
      }

      return null;
    }