예제 #1
0
  @Test
  public void testV1Serialization() throws Exception {

    final Interval interval = new Interval("2011-10-01/2011-10-02");
    final ImmutableMap<String, Object> loadSpec =
        ImmutableMap.<String, Object>of("something", "or_other");

    DataSegment segment =
        new DataSegment(
            "something",
            interval,
            "1",
            loadSpec,
            Arrays.asList("dim1", "dim2"),
            Arrays.asList("met1", "met2"),
            NoneShardSpec.instance(),
            IndexIO.CURRENT_VERSION_ID,
            1);

    final Map<String, Object> objectMap =
        mapper.readValue(
            mapper.writeValueAsString(segment), new TypeReference<Map<String, Object>>() {});

    Assert.assertEquals(10, objectMap.size());
    Assert.assertEquals("something", objectMap.get("dataSource"));
    Assert.assertEquals(interval.toString(), objectMap.get("interval"));
    Assert.assertEquals("1", objectMap.get("version"));
    Assert.assertEquals(loadSpec, objectMap.get("loadSpec"));
    Assert.assertEquals("dim1,dim2", objectMap.get("dimensions"));
    Assert.assertEquals("met1,met2", objectMap.get("metrics"));
    Assert.assertEquals(ImmutableMap.of("type", "none"), objectMap.get("shardSpec"));
    Assert.assertEquals(IndexIO.CURRENT_VERSION_ID, objectMap.get("binaryVersion"));
    Assert.assertEquals(1, objectMap.get("size"));

    DataSegment deserializedSegment =
        mapper.readValue(mapper.writeValueAsString(segment), DataSegment.class);

    Assert.assertEquals(segment.getDataSource(), deserializedSegment.getDataSource());
    Assert.assertEquals(segment.getInterval(), deserializedSegment.getInterval());
    Assert.assertEquals(segment.getVersion(), deserializedSegment.getVersion());
    Assert.assertEquals(segment.getLoadSpec(), deserializedSegment.getLoadSpec());
    Assert.assertEquals(segment.getDimensions(), deserializedSegment.getDimensions());
    Assert.assertEquals(segment.getMetrics(), deserializedSegment.getMetrics());
    Assert.assertEquals(segment.getShardSpec(), deserializedSegment.getShardSpec());
    Assert.assertEquals(segment.getSize(), deserializedSegment.getSize());
    Assert.assertEquals(segment.getIdentifier(), deserializedSegment.getIdentifier());

    deserializedSegment = mapper.readValue(mapper.writeValueAsString(segment), DataSegment.class);
    Assert.assertEquals(0, segment.compareTo(deserializedSegment));

    deserializedSegment = mapper.readValue(mapper.writeValueAsString(segment), DataSegment.class);
    Assert.assertEquals(0, deserializedSegment.compareTo(segment));

    deserializedSegment = mapper.readValue(mapper.writeValueAsString(segment), DataSegment.class);
    Assert.assertEquals(segment.hashCode(), deserializedSegment.hashCode());
  }
예제 #2
0
  private void serverAddedSegment(final DruidServerMetadata server, final DataSegment segment) {

    String segmentId = segment.getIdentifier();
    synchronized (lock) {
      log.debug("Adding segment[%s] for server[%s]", segment, server);

      ServerSelector selector = selectors.get(segmentId);
      if (selector == null) {
        selector = new ServerSelector(segment, tierSelectorStrategy);

        VersionedIntervalTimeline<String, ServerSelector> timeline =
            timelines.get(segment.getDataSource());
        if (timeline == null) {
          timeline = new VersionedIntervalTimeline<>(Ordering.natural());
          timelines.put(segment.getDataSource(), timeline);
        }

        timeline.add(
            segment.getInterval(),
            segment.getVersion(),
            segment.getShardSpec().createChunk(selector));
        selectors.put(segmentId, selector);
      }

      QueryableDruidServer queryableDruidServer = clients.get(server.getName());
      if (queryableDruidServer == null) {
        queryableDruidServer = addServer(baseView.getInventoryValue(server.getName()));
      }
      selector.addServerAndUpdateSegment(queryableDruidServer, segment);
    }
  }
    @Override
    public Set<DataSegment> findUsedSegments(Set<SegmentIdentifier> identifiers)
        throws IOException {
      final VersionedIntervalTimeline<String, DataSegment> timeline =
          new VersionedIntervalTimeline<>(Ordering.natural());
      for (DataSegment dataSegment : appenderatorTester.getPushedSegments()) {
        timeline.add(
            dataSegment.getInterval(),
            dataSegment.getVersion(),
            dataSegment.getShardSpec().createChunk(dataSegment));
      }

      final Set<DataSegment> retVal = Sets.newHashSet();
      for (SegmentIdentifier identifier : identifiers) {
        for (TimelineObjectHolder<String, DataSegment> holder :
            timeline.lookup(identifier.getInterval())) {
          for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
            if (identifiers.contains(SegmentIdentifier.fromDataSegment(chunk.getObject()))) {
              retVal.add(chunk.getObject());
            }
          }
        }
      }

      return retVal;
    }
 private static void setJobName(JobConf jobConf, List<DataSegment> segments) {
   if (segments.size() == 1) {
     final DataSegment segment = segments.get(0);
     jobConf.setJobName(
         String.format(
             "druid-convert-%s-%s-%s",
             segment.getDataSource(), segment.getInterval(), segment.getVersion()));
   } else {
     final Set<String> dataSources =
         Sets.newHashSet(
             Iterables.transform(
                 segments,
                 new Function<DataSegment, String>() {
                   @Override
                   public String apply(DataSegment input) {
                     return input.getDataSource();
                   }
                 }));
     final Set<String> versions =
         Sets.newHashSet(
             Iterables.transform(
                 segments,
                 new Function<DataSegment, String>() {
                   @Override
                   public String apply(DataSegment input) {
                     return input.getVersion();
                   }
                 }));
     jobConf.setJobName(
         String.format(
             "druid-convert-%s-%s",
             Arrays.toString(dataSources.toArray()), Arrays.toString(versions.toArray())));
   }
 }
예제 #5
0
  private void serverRemovedSegment(DruidServerMetadata server, DataSegment segment) {

    String segmentId = segment.getIdentifier();
    final ServerSelector selector;

    synchronized (lock) {
      log.debug("Removing segment[%s] from server[%s].", segmentId, server);

      selector = selectors.get(segmentId);
      if (selector == null) {
        log.warn("Told to remove non-existant segment[%s]", segmentId);
        return;
      }

      QueryableDruidServer queryableDruidServer = clients.get(server.getName());
      if (!selector.removeServer(queryableDruidServer)) {
        log.warn(
            "Asked to disassociate non-existant association between server[%s] and segment[%s]",
            server, segmentId);
      }

      if (selector.isEmpty()) {
        VersionedIntervalTimeline<String, ServerSelector> timeline =
            timelines.get(segment.getDataSource());
        selectors.remove(segmentId);

        final PartitionChunk<ServerSelector> removedPartition =
            timeline.remove(
                segment.getInterval(),
                segment.getVersion(),
                segment.getShardSpec().createChunk(selector));

        if (removedPartition == null) {
          log.warn(
              "Asked to remove timeline entry[interval: %s, version: %s] that doesn't exist",
              segment.getInterval(), segment.getVersion());
        }
      }
    }
  }
예제 #6
0
  @Test
  public void testSimpleJob() throws IOException, InterruptedException {

    final SQLMetadataSegmentManager manager =
        new SQLMetadataSegmentManager(
            HadoopDruidConverterConfig.jsonMapper,
            new Supplier<MetadataSegmentManagerConfig>() {
              @Override
              public MetadataSegmentManagerConfig get() {
                return new MetadataSegmentManagerConfig();
              }
            },
            metadataStorageTablesConfigSupplier,
            connector);

    final List<DataSegment> oldSemgments = getDataSegments(manager);
    final File tmpDir = temporaryFolder.newFolder();
    final HadoopConverterJob converterJob =
        new HadoopConverterJob(
            new HadoopDruidConverterConfig(
                DATASOURCE,
                interval,
                new IndexSpec(new RoaringBitmapSerdeFactory(), "uncompressed", "uncompressed"),
                oldSemgments,
                true,
                tmpDir.toURI(),
                ImmutableMap.<String, String>of(),
                null,
                tmpSegmentDir.toURI().toString()));

    final List<DataSegment> segments = Lists.newArrayList(converterJob.run());
    Assert.assertNotNull("bad result", segments);
    Assert.assertEquals("wrong segment count", 4, segments.size());
    Assert.assertTrue(converterJob.getLoadedBytes() > 0);
    Assert.assertTrue(converterJob.getWrittenBytes() > 0);
    Assert.assertTrue(converterJob.getWrittenBytes() > converterJob.getLoadedBytes());

    Assert.assertEquals(oldSemgments.size(), segments.size());

    final DataSegment segment = segments.get(0);
    Assert.assertTrue(interval.contains(segment.getInterval()));
    Assert.assertTrue(segment.getVersion().endsWith("_converted"));
    Assert.assertTrue(segment.getLoadSpec().get("path").toString().contains("_converted"));

    for (File file : tmpDir.listFiles()) {
      Assert.assertFalse(file.isDirectory());
      Assert.assertTrue(file.isFile());
    }

    final Comparator<DataSegment> segmentComparator =
        new Comparator<DataSegment>() {
          @Override
          public int compare(DataSegment o1, DataSegment o2) {
            return o1.getIdentifier().compareTo(o2.getIdentifier());
          }
        };
    Collections.sort(oldSemgments, segmentComparator);
    Collections.sort(segments, segmentComparator);

    for (int i = 0; i < oldSemgments.size(); ++i) {
      final DataSegment oldSegment = oldSemgments.get(i);
      final DataSegment newSegment = segments.get(i);
      Assert.assertEquals(oldSegment.getDataSource(), newSegment.getDataSource());
      Assert.assertEquals(oldSegment.getInterval(), newSegment.getInterval());
      Assert.assertEquals(
          Sets.<String>newHashSet(oldSegment.getMetrics()),
          Sets.<String>newHashSet(newSegment.getMetrics()));
      Assert.assertEquals(
          Sets.<String>newHashSet(oldSegment.getDimensions()),
          Sets.<String>newHashSet(newSegment.getDimensions()));
      Assert.assertEquals(oldSegment.getVersion() + "_converted", newSegment.getVersion());
      Assert.assertTrue(oldSegment.getSize() < newSegment.getSize());
      Assert.assertEquals(oldSegment.getBinaryVersion(), newSegment.getBinaryVersion());
    }
  }
    @Override
    protected void map(String key, String value, final Context context)
        throws IOException, InterruptedException {
      final InputSplit split = context.getInputSplit();
      if (!(split instanceof DatasourceInputSplit)) {
        throw new IAE(
            "Unexpected split type. Expected [%s] was [%s]",
            DatasourceInputSplit.class.getCanonicalName(), split.getClass().getCanonicalName());
      }

      final String tmpDirLoc = context.getConfiguration().get(TMP_FILE_LOC_KEY);
      final File tmpDir = Paths.get(tmpDirLoc).toFile();

      final DataSegment segment =
          Iterables.getOnlyElement(((DatasourceInputSplit) split).getSegments()).getSegment();

      final HadoopDruidConverterConfig config =
          converterConfigFromConfiguration(context.getConfiguration());

      context.setStatus("DOWNLOADING");
      context.progress();
      final Path inPath = new Path(JobHelper.getURIFromSegment(segment));
      final File inDir = new File(tmpDir, "in");

      if (inDir.exists() && !inDir.delete()) {
        log.warn("Could not delete [%s]", inDir);
      }

      if (!inDir.mkdir() && (!inDir.exists() || inDir.isDirectory())) {
        log.warn("Unable to make directory");
      }

      final long inSize =
          JobHelper.unzipNoGuava(inPath, context.getConfiguration(), inDir, context);
      log.debug("Loaded %d bytes into [%s] for converting", inSize, inDir.getAbsolutePath());
      context.getCounter(COUNTER_GROUP, COUNTER_LOADED).increment(inSize);

      context.setStatus("CONVERTING");
      context.progress();
      final File outDir = new File(tmpDir, "out");
      if (!outDir.mkdir() && (!outDir.exists() || !outDir.isDirectory())) {
        throw new IOException(String.format("Could not create output directory [%s]", outDir));
      }
      HadoopDruidConverterConfig.INDEX_MERGER.convert(
          inDir, outDir, config.getIndexSpec(), JobHelper.progressIndicatorForContext(context));
      if (config.isValidate()) {
        context.setStatus("Validating");
        HadoopDruidConverterConfig.INDEX_IO.validateTwoSegments(inDir, outDir);
      }
      context.progress();
      context.setStatus("Starting PUSH");
      final Path baseOutputPath = new Path(config.getSegmentOutputPath());
      final FileSystem outputFS = baseOutputPath.getFileSystem(context.getConfiguration());
      final DataSegment finalSegmentTemplate =
          segment.withVersion(segment.getVersion() + "_converted");
      final DataSegment finalSegment =
          JobHelper.serializeOutIndex(
              finalSegmentTemplate,
              context.getConfiguration(),
              context,
              context.getTaskAttemptID(),
              outDir,
              JobHelper.makeSegmentOutputPath(baseOutputPath, outputFS, finalSegmentTemplate));
      context.progress();
      context.setStatus("Finished PUSH");
      final String finalSegmentString =
          HadoopDruidConverterConfig.jsonMapper.writeValueAsString(finalSegment);
      context
          .getConfiguration()
          .set(ConvertingOutputFormat.PUBLISHED_SEGMENT_KEY, finalSegmentString);
      context.write(new Text("dataSegment"), new Text(finalSegmentString));

      context.getCounter(COUNTER_GROUP, COUNTER_WRITTEN).increment(finalSegment.getSize());
      context.progress();
      context.setStatus("Ready To Commit");
    }