Java HadoopDruidIndexerConfig.getGranularitySpec Examples

Programming Language: Java

Method/Function: getGranularitySpec

Examples at hotexamples.com: 7

Java HadoopDruidIndexerConfig.getGranularitySpec - 7 examples found. These are the top rated real world Java examples of HadoopDruidIndexerConfig.getGranularitySpec extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getGranularitySpec(7)

getPartitionsSpec(4)

fromConfiguration(3)

getPathSpec(3)

addInputPaths(2)

isCleanupOnFailure(2)

getDataSource(2)

getIntervals(2)

makeIntermediatePath(2)

getUpdaterJobSpec(2)

setIntervals(1)

makeSegmentPartitionInfoPath(1)

makeGroupedDataDir(1)

isOverwriteFiles(1)

isIgnoreInvalidRows(1)

isCombineText(1)

getSegmentGranularIntervals(1)

intoConfiguration(1)

getWorkingPath(1)

addJobProperties(1)

getSchema(1)

getParser(1)

getInputFormatClass(1)

fromString(1)

setShardSpecs(1)

Example #1

Show file

File: HadoopDruidIndexerConfigTest.java Project: claymodel/dtv-tvc-commerce

  @Test
  public void testGranularitySpec() {
    final HadoopDruidIndexerConfig cfg;

    try {
      cfg =
          jsonReadWriteRead(
              "{"
                  + " \"granularitySpec\":{"
                  + "   \"type\":\"uniform\","
                  + "   \"gran\":\"hour\","
                  + "   \"intervals\":[\"2012-01-01/P1D\"]"
                  + " }"
                  + "}",
              HadoopDruidIndexerConfig.class);
    } catch (Exception e) {
      throw Throwables.propagate(e);
    }

    final UniformGranularitySpec granularitySpec =
        (UniformGranularitySpec) cfg.getGranularitySpec();

    Assert.assertEquals(
        "getIntervals",
        Lists.newArrayList(new Interval("2012-01-01/P1D")),
        granularitySpec.getIntervals());

    Assert.assertEquals("getGranularity", "HOUR", granularitySpec.getGranularity().toString());
  }

Example #2

Show file

File: HadoopDruidIndexerConfigTest.java Project: claymodel/dtv-tvc-commerce

  @Test
  public void testGranularitySpecPostConstructorIntervals() {
    // Deprecated and replaced by granularitySpec, but still supported
    final HadoopDruidIndexerConfig cfg;

    try {
      cfg =
          jsonMapper.readValue(
              "{" + "\"segmentGranularity\":\"day\"" + "}", HadoopDruidIndexerConfig.class);
    } catch (Exception e) {
      throw Throwables.propagate(e);
    }

    cfg.setIntervals(Lists.newArrayList(new Interval("2012-03-01/P1D")));

    final UniformGranularitySpec granularitySpec =
        (UniformGranularitySpec) cfg.getGranularitySpec();

    Assert.assertEquals(
        "getIntervals",
        Lists.newArrayList(new Interval("2012-03-01/P1D")),
        granularitySpec.getIntervals());

    Assert.assertEquals("getGranularity", "DAY", granularitySpec.getGranularity().toString());
  }

Example #3

Show file

File: DeterminePartitionsJob.java Project: peidachang/druid-1

    public DeterminePartitionsDimSelectionMapperHelper(
        HadoopDruidIndexerConfig config, String partitionDimension) {
      this.config = config;
      this.partitionDimension = partitionDimension;

      final ImmutableMap.Builder<DateTime, Integer> timeIndexBuilder = ImmutableMap.builder();
      int idx = 0;
      for (final Interval bucketInterval : config.getGranularitySpec().bucketIntervals().get()) {
        timeIndexBuilder.put(bucketInterval.getStart(), idx);
        idx++;
      }

      this.intervalIndexes = timeIndexBuilder.build();
    }

Example #4

Show file

File: DeterminePartitionsJob.java Project: peidachang/druid-1

    public void emitDimValueCounts(
        TaskInputOutputContext<?, ?, BytesWritable, Text> context,
        DateTime timestamp,
        Map<String, Iterable<String>> dims)
        throws IOException, InterruptedException {
      final Optional<Interval> maybeInterval =
          config.getGranularitySpec().bucketInterval(timestamp);

      if (!maybeInterval.isPresent()) {
        throw new ISE("WTF?! No bucket found for timestamp: %s", timestamp);
      }

      final Interval interval = maybeInterval.get();
      final int intervalIndex = intervalIndexes.get(interval.getStart());

      final ByteBuffer buf = ByteBuffer.allocate(4 + 8);
      buf.putInt(intervalIndex);
      buf.putLong(interval.getStartMillis());
      final byte[] groupKey = buf.array();

      // Emit row-counter value.
      write(context, groupKey, new DimValueCount("", "", 1));

      for (final Map.Entry<String, Iterable<String>> dimAndValues : dims.entrySet()) {
        final String dim = dimAndValues.getKey();

        if (partitionDimension == null || partitionDimension.equals(dim)) {
          final Iterable<String> dimValues = dimAndValues.getValue();

          if (Iterables.size(dimValues) == 1) {
            // Emit this value.
            write(
                context, groupKey, new DimValueCount(dim, Iterables.getOnlyElement(dimValues), 1));
          } else {
            // This dimension is unsuitable for partitioning. Poison it by emitting a negative
            // value.
            write(context, groupKey, new DimValueCount(dim, "", -1));
          }
        }
      }
    }

Example #5

Show file

File: HadoopDruidIndexerConfigTest.java Project: claymodel/dtv-tvc-commerce

  @Test
  public void testGranularitySpecLegacy() {
    // Deprecated and replaced by granularitySpec, but still supported
    final HadoopDruidIndexerConfig cfg;

    try {
      cfg =
          jsonReadWriteRead(
              "{" + "\"segmentGranularity\":\"day\"," + "\"intervals\":[\"2012-02-01/P1D\"]" + "}",
              HadoopDruidIndexerConfig.class);
    } catch (Exception e) {
      throw Throwables.propagate(e);
    }

    final UniformGranularitySpec granularitySpec =
        (UniformGranularitySpec) cfg.getGranularitySpec();

    Assert.assertEquals(
        "getIntervals",
        Lists.newArrayList(new Interval("2012-02-01/P1D")),
        granularitySpec.getIntervals());

    Assert.assertEquals("getGranularity", "DAY", granularitySpec.getGranularity().toString());
  }

Example #6

Show file

File: DeterminePartitionsJob.java Project: peidachang/druid-1

  public boolean run() {
    try {
      /*
       * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
       * in the final segment.
       */

      if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) {
        throw new ISE(
            "DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]",
            config.getPartitionsSpec());
      }

      if (!config.getPartitionsSpec().isAssumeGrouped()) {
        final Job groupByJob =
            Job.getInstance(
                new Configuration(),
                String.format(
                    "%s-determine_partitions_groupby-%s",
                    config.getDataSource(), config.getIntervals()));

        JobHelper.injectSystemProperties(groupByJob);
        config.addJobProperties(groupByJob);

        groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
        groupByJob.setMapOutputKeyClass(BytesWritable.class);
        groupByJob.setMapOutputValueClass(NullWritable.class);
        groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
        groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
        groupByJob.setOutputKeyClass(BytesWritable.class);
        groupByJob.setOutputValueClass(NullWritable.class);
        groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        JobHelper.setupClasspath(
            JobHelper.distributedClassPath(config.getWorkingPath()),
            JobHelper.distributedClassPath(config.makeIntermediatePath()),
            groupByJob);

        config.addInputPaths(groupByJob);
        config.intoConfiguration(groupByJob);
        FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

        groupByJob.submit();
        log.info(
            "Job %s submitted, status available at: %s",
            groupByJob.getJobName(), groupByJob.getTrackingURL());

        if (!groupByJob.waitForCompletion(true)) {
          log.error("Job failed: %s", groupByJob.getJobID());
          return false;
        }
      } else {
        log.info("Skipping group-by job.");
      }

      /*
       * Read grouped data and determine appropriate partitions.
       */
      final Job dimSelectionJob =
          Job.getInstance(
              new Configuration(),
              String.format(
                  "%s-determine_partitions_dimselection-%s",
                  config.getDataSource(), config.getIntervals()));

      dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

      JobHelper.injectSystemProperties(dimSelectionJob);
      config.addJobProperties(dimSelectionJob);

      if (!config.getPartitionsSpec().isAssumeGrouped()) {
        // Read grouped data from the groupByJob.
        dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
        dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
        FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
      } else {
        // Directly read the source data, since we assume it's already grouped.
        dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
        config.addInputPaths(dimSelectionJob);
      }

      SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob);
      dimSelectionJob.setMapOutputValueClass(Text.class);
      dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
      dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
      dimSelectionJob.setOutputKeyClass(BytesWritable.class);
      dimSelectionJob.setOutputValueClass(Text.class);
      dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
      dimSelectionJob.setPartitionerClass(DeterminePartitionsDimSelectionPartitioner.class);
      dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size());
      JobHelper.setupClasspath(
          JobHelper.distributedClassPath(config.getWorkingPath()),
          JobHelper.distributedClassPath(config.makeIntermediatePath()),
          dimSelectionJob);

      config.intoConfiguration(dimSelectionJob);
      FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());

      dimSelectionJob.submit();
      log.info(
          "Job %s submitted, status available at: %s",
          dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL());

      if (!dimSelectionJob.waitForCompletion(true)) {
        log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
        return false;
      }

      /*
       * Load partitions determined by the previous job.
       */

      log.info(
          "Job completed, loading up partitions for intervals[%s].",
          config.getSegmentGranularIntervals());
      FileSystem fileSystem = null;
      Map<DateTime, List<HadoopyShardSpec>> shardSpecs =
          Maps.newTreeMap(DateTimeComparator.getInstance());
      int shardCount = 0;
      for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
        final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
        if (fileSystem == null) {
          fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
        }
        if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) {
          List<ShardSpec> specs =
              config.JSON_MAPPER.readValue(
                  Utils.openInputStream(dimSelectionJob, partitionInfoPath),
                  new TypeReference<List<ShardSpec>>() {});

          List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
          for (int i = 0; i < specs.size(); ++i) {
            actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
            log.info(
                "DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i));
          }

          shardSpecs.put(segmentGranularity.getStart(), actualSpecs);
        } else {
          log.info("Path[%s] didn't exist!?", partitionInfoPath);
        }
      }
      config.setShardSpecs(shardSpecs);

      return true;
    } catch (Exception e) {
      throw Throwables.propagate(e);
    }
  }

Example #7

Show file

File: HadoopDruidIndexerMapper.java Project: upton/druid

 @Override
 protected void setup(Context context) throws IOException, InterruptedException {
   config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
   parser = config.getParser();
   granularitySpec = config.getGranularitySpec();
 }