@Test public void testV1Serialization() throws Exception { final Interval interval = new Interval("2011-10-01/2011-10-02"); final ImmutableMap<String, Object> loadSpec = ImmutableMap.<String, Object>of("something", "or_other"); DataSegment segment = new DataSegment( "something", interval, "1", loadSpec, Arrays.asList("dim1", "dim2"), Arrays.asList("met1", "met2"), NoneShardSpec.instance(), IndexIO.CURRENT_VERSION_ID, 1); final Map<String, Object> objectMap = mapper.readValue( mapper.writeValueAsString(segment), new TypeReference<Map<String, Object>>() {}); Assert.assertEquals(10, objectMap.size()); Assert.assertEquals("something", objectMap.get("dataSource")); Assert.assertEquals(interval.toString(), objectMap.get("interval")); Assert.assertEquals("1", objectMap.get("version")); Assert.assertEquals(loadSpec, objectMap.get("loadSpec")); Assert.assertEquals("dim1,dim2", objectMap.get("dimensions")); Assert.assertEquals("met1,met2", objectMap.get("metrics")); Assert.assertEquals(ImmutableMap.of("type", "none"), objectMap.get("shardSpec")); Assert.assertEquals(IndexIO.CURRENT_VERSION_ID, objectMap.get("binaryVersion")); Assert.assertEquals(1, objectMap.get("size")); DataSegment deserializedSegment = mapper.readValue(mapper.writeValueAsString(segment), DataSegment.class); Assert.assertEquals(segment.getDataSource(), deserializedSegment.getDataSource()); Assert.assertEquals(segment.getInterval(), deserializedSegment.getInterval()); Assert.assertEquals(segment.getVersion(), deserializedSegment.getVersion()); Assert.assertEquals(segment.getLoadSpec(), deserializedSegment.getLoadSpec()); Assert.assertEquals(segment.getDimensions(), deserializedSegment.getDimensions()); Assert.assertEquals(segment.getMetrics(), deserializedSegment.getMetrics()); Assert.assertEquals(segment.getShardSpec(), deserializedSegment.getShardSpec()); Assert.assertEquals(segment.getSize(), deserializedSegment.getSize()); Assert.assertEquals(segment.getIdentifier(), deserializedSegment.getIdentifier()); deserializedSegment = mapper.readValue(mapper.writeValueAsString(segment), DataSegment.class); Assert.assertEquals(0, segment.compareTo(deserializedSegment)); deserializedSegment = mapper.readValue(mapper.writeValueAsString(segment), DataSegment.class); Assert.assertEquals(0, deserializedSegment.compareTo(segment)); deserializedSegment = mapper.readValue(mapper.writeValueAsString(segment), DataSegment.class); Assert.assertEquals(segment.hashCode(), deserializedSegment.hashCode()); }
private void serverAddedSegment(final DruidServerMetadata server, final DataSegment segment) { String segmentId = segment.getIdentifier(); synchronized (lock) { log.debug("Adding segment[%s] for server[%s]", segment, server); ServerSelector selector = selectors.get(segmentId); if (selector == null) { selector = new ServerSelector(segment, tierSelectorStrategy); VersionedIntervalTimeline<String, ServerSelector> timeline = timelines.get(segment.getDataSource()); if (timeline == null) { timeline = new VersionedIntervalTimeline<>(Ordering.natural()); timelines.put(segment.getDataSource(), timeline); } timeline.add( segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(selector)); selectors.put(segmentId, selector); } QueryableDruidServer queryableDruidServer = clients.get(server.getName()); if (queryableDruidServer == null) { queryableDruidServer = addServer(baseView.getInventoryValue(server.getName())); } selector.addServerAndUpdateSegment(queryableDruidServer, segment); } }
@Override public Set<DataSegment> findUsedSegments(Set<SegmentIdentifier> identifiers) throws IOException { final VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural()); for (DataSegment dataSegment : appenderatorTester.getPushedSegments()) { timeline.add( dataSegment.getInterval(), dataSegment.getVersion(), dataSegment.getShardSpec().createChunk(dataSegment)); } final Set<DataSegment> retVal = Sets.newHashSet(); for (SegmentIdentifier identifier : identifiers) { for (TimelineObjectHolder<String, DataSegment> holder : timeline.lookup(identifier.getInterval())) { for (PartitionChunk<DataSegment> chunk : holder.getObject()) { if (identifiers.contains(SegmentIdentifier.fromDataSegment(chunk.getObject()))) { retVal.add(chunk.getObject()); } } } } return retVal; }
private static void setJobName(JobConf jobConf, List<DataSegment> segments) { if (segments.size() == 1) { final DataSegment segment = segments.get(0); jobConf.setJobName( String.format( "druid-convert-%s-%s-%s", segment.getDataSource(), segment.getInterval(), segment.getVersion())); } else { final Set<String> dataSources = Sets.newHashSet( Iterables.transform( segments, new Function<DataSegment, String>() { @Override public String apply(DataSegment input) { return input.getDataSource(); } })); final Set<String> versions = Sets.newHashSet( Iterables.transform( segments, new Function<DataSegment, String>() { @Override public String apply(DataSegment input) { return input.getVersion(); } })); jobConf.setJobName( String.format( "druid-convert-%s-%s", Arrays.toString(dataSources.toArray()), Arrays.toString(versions.toArray()))); } }
private void serverRemovedSegment(DruidServerMetadata server, DataSegment segment) { String segmentId = segment.getIdentifier(); final ServerSelector selector; synchronized (lock) { log.debug("Removing segment[%s] from server[%s].", segmentId, server); selector = selectors.get(segmentId); if (selector == null) { log.warn("Told to remove non-existant segment[%s]", segmentId); return; } QueryableDruidServer queryableDruidServer = clients.get(server.getName()); if (!selector.removeServer(queryableDruidServer)) { log.warn( "Asked to disassociate non-existant association between server[%s] and segment[%s]", server, segmentId); } if (selector.isEmpty()) { VersionedIntervalTimeline<String, ServerSelector> timeline = timelines.get(segment.getDataSource()); selectors.remove(segmentId); final PartitionChunk<ServerSelector> removedPartition = timeline.remove( segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(selector)); if (removedPartition == null) { log.warn( "Asked to remove timeline entry[interval: %s, version: %s] that doesn't exist", segment.getInterval(), segment.getVersion()); } } } }
@Test public void testSimpleJob() throws IOException, InterruptedException { final SQLMetadataSegmentManager manager = new SQLMetadataSegmentManager( HadoopDruidConverterConfig.jsonMapper, new Supplier<MetadataSegmentManagerConfig>() { @Override public MetadataSegmentManagerConfig get() { return new MetadataSegmentManagerConfig(); } }, metadataStorageTablesConfigSupplier, connector); final List<DataSegment> oldSemgments = getDataSegments(manager); final File tmpDir = temporaryFolder.newFolder(); final HadoopConverterJob converterJob = new HadoopConverterJob( new HadoopDruidConverterConfig( DATASOURCE, interval, new IndexSpec(new RoaringBitmapSerdeFactory(), "uncompressed", "uncompressed"), oldSemgments, true, tmpDir.toURI(), ImmutableMap.<String, String>of(), null, tmpSegmentDir.toURI().toString())); final List<DataSegment> segments = Lists.newArrayList(converterJob.run()); Assert.assertNotNull("bad result", segments); Assert.assertEquals("wrong segment count", 4, segments.size()); Assert.assertTrue(converterJob.getLoadedBytes() > 0); Assert.assertTrue(converterJob.getWrittenBytes() > 0); Assert.assertTrue(converterJob.getWrittenBytes() > converterJob.getLoadedBytes()); Assert.assertEquals(oldSemgments.size(), segments.size()); final DataSegment segment = segments.get(0); Assert.assertTrue(interval.contains(segment.getInterval())); Assert.assertTrue(segment.getVersion().endsWith("_converted")); Assert.assertTrue(segment.getLoadSpec().get("path").toString().contains("_converted")); for (File file : tmpDir.listFiles()) { Assert.assertFalse(file.isDirectory()); Assert.assertTrue(file.isFile()); } final Comparator<DataSegment> segmentComparator = new Comparator<DataSegment>() { @Override public int compare(DataSegment o1, DataSegment o2) { return o1.getIdentifier().compareTo(o2.getIdentifier()); } }; Collections.sort(oldSemgments, segmentComparator); Collections.sort(segments, segmentComparator); for (int i = 0; i < oldSemgments.size(); ++i) { final DataSegment oldSegment = oldSemgments.get(i); final DataSegment newSegment = segments.get(i); Assert.assertEquals(oldSegment.getDataSource(), newSegment.getDataSource()); Assert.assertEquals(oldSegment.getInterval(), newSegment.getInterval()); Assert.assertEquals( Sets.<String>newHashSet(oldSegment.getMetrics()), Sets.<String>newHashSet(newSegment.getMetrics())); Assert.assertEquals( Sets.<String>newHashSet(oldSegment.getDimensions()), Sets.<String>newHashSet(newSegment.getDimensions())); Assert.assertEquals(oldSegment.getVersion() + "_converted", newSegment.getVersion()); Assert.assertTrue(oldSegment.getSize() < newSegment.getSize()); Assert.assertEquals(oldSegment.getBinaryVersion(), newSegment.getBinaryVersion()); } }
@Override protected void map(String key, String value, final Context context) throws IOException, InterruptedException { final InputSplit split = context.getInputSplit(); if (!(split instanceof DatasourceInputSplit)) { throw new IAE( "Unexpected split type. Expected [%s] was [%s]", DatasourceInputSplit.class.getCanonicalName(), split.getClass().getCanonicalName()); } final String tmpDirLoc = context.getConfiguration().get(TMP_FILE_LOC_KEY); final File tmpDir = Paths.get(tmpDirLoc).toFile(); final DataSegment segment = Iterables.getOnlyElement(((DatasourceInputSplit) split).getSegments()).getSegment(); final HadoopDruidConverterConfig config = converterConfigFromConfiguration(context.getConfiguration()); context.setStatus("DOWNLOADING"); context.progress(); final Path inPath = new Path(JobHelper.getURIFromSegment(segment)); final File inDir = new File(tmpDir, "in"); if (inDir.exists() && !inDir.delete()) { log.warn("Could not delete [%s]", inDir); } if (!inDir.mkdir() && (!inDir.exists() || inDir.isDirectory())) { log.warn("Unable to make directory"); } final long inSize = JobHelper.unzipNoGuava(inPath, context.getConfiguration(), inDir, context); log.debug("Loaded %d bytes into [%s] for converting", inSize, inDir.getAbsolutePath()); context.getCounter(COUNTER_GROUP, COUNTER_LOADED).increment(inSize); context.setStatus("CONVERTING"); context.progress(); final File outDir = new File(tmpDir, "out"); if (!outDir.mkdir() && (!outDir.exists() || !outDir.isDirectory())) { throw new IOException(String.format("Could not create output directory [%s]", outDir)); } HadoopDruidConverterConfig.INDEX_MERGER.convert( inDir, outDir, config.getIndexSpec(), JobHelper.progressIndicatorForContext(context)); if (config.isValidate()) { context.setStatus("Validating"); HadoopDruidConverterConfig.INDEX_IO.validateTwoSegments(inDir, outDir); } context.progress(); context.setStatus("Starting PUSH"); final Path baseOutputPath = new Path(config.getSegmentOutputPath()); final FileSystem outputFS = baseOutputPath.getFileSystem(context.getConfiguration()); final DataSegment finalSegmentTemplate = segment.withVersion(segment.getVersion() + "_converted"); final DataSegment finalSegment = JobHelper.serializeOutIndex( finalSegmentTemplate, context.getConfiguration(), context, context.getTaskAttemptID(), outDir, JobHelper.makeSegmentOutputPath(baseOutputPath, outputFS, finalSegmentTemplate)); context.progress(); context.setStatus("Finished PUSH"); final String finalSegmentString = HadoopDruidConverterConfig.jsonMapper.writeValueAsString(finalSegment); context .getConfiguration() .set(ConvertingOutputFormat.PUBLISHED_SEGMENT_KEY, finalSegmentString); context.write(new Text("dataSegment"), new Text(finalSegmentString)); context.getCounter(COUNTER_GROUP, COUNTER_WRITTEN).increment(finalSegment.getSize()); context.progress(); context.setStatus("Ready To Commit"); }