private void parseInterval(String startString, String endString) { try { interval = Interval.parse(startString + "T00:00:00/" + endString + "T23:59:59"); // XXX } catch (IllegalArgumentException e) { log.warn("{}", e.getMessage()); interval = Interval.parse(startString + "T00:00:00/" + startString + "T23:59:59"); } }
private void checkAgainstTimespanRestriction(String timespan) { Duration duration = Period.parse(requestIntervalRestriction).toDurationFrom(new DateTime()); if (duration.getMillis() < Interval.parse(timespan).toDurationMillis()) { throw new BadRequestException( "Requested timespan is to long, please use a period shorter than '" + requestIntervalRestriction + "'"); } }
private DataSegment getSegmentWithPath(String path) { return new DataSegment( "dataSource", Interval.parse("2000/3000"), "ver", ImmutableMap.<String, Object>of("type", "hdfs", "path", path), ImmutableList.of("product"), ImmutableList.of("visited_sum", "unique_hosts"), new NoneShardSpec(), 9, 12334); }
@Test public void testSegmentConvertSerde() throws IOException { final DataSegment segment = new DataSegment( "dataSource", Interval.parse("1990-01-01/1999-12-31"), "version", ImmutableMap.<String, Object>of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("metric1", "metric2"), new NoneShardSpec(), 0, 12345L); final ConvertSegmentTask convertSegmentTaskOriginal = ConvertSegmentTask.create( segment, new IndexSpec(new RoaringBitmapSerdeFactory(), "lzf", "uncompressed"), false, true, null); final String json = jsonMapper.writeValueAsString(convertSegmentTaskOriginal); final Task task = jsonMapper.readValue(json, Task.class); Assert.assertTrue(task instanceof ConvertSegmentTask); final ConvertSegmentTask convertSegmentTask = (ConvertSegmentTask) task; Assert.assertEquals( convertSegmentTaskOriginal.getDataSource(), convertSegmentTask.getDataSource()); Assert.assertEquals(convertSegmentTaskOriginal.getInterval(), convertSegmentTask.getInterval()); Assert.assertEquals( convertSegmentTaskOriginal .getIndexSpec() .getBitmapSerdeFactory() .getClass() .getCanonicalName(), convertSegmentTask.getIndexSpec().getBitmapSerdeFactory().getClass().getCanonicalName()); Assert.assertEquals( convertSegmentTaskOriginal.getIndexSpec().getDimensionCompression(), convertSegmentTask.getIndexSpec().getDimensionCompression()); Assert.assertEquals( convertSegmentTaskOriginal.getIndexSpec().getMetricCompression(), convertSegmentTask.getIndexSpec().getMetricCompression()); Assert.assertEquals(false, convertSegmentTask.isForce()); Assert.assertEquals(segment, convertSegmentTask.getSegment()); }
@Test public void testSegmentConvetSerdeReflection() throws IOException { final ConvertSegmentTask task = ConvertSegmentTask.create( new DataSegment( "dataSource", Interval.parse("1990-01-01/1999-12-31"), "version", ImmutableMap.<String, Object>of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("metric1", "metric2"), new NoneShardSpec(), 0, 12345L), indexSpec, false, true, null); final String json = jsonMapper.writeValueAsString(task); final ConvertSegmentTask taskFromJson = jsonMapper.readValue(json, ConvertSegmentTask.class); Assert.assertEquals(json, jsonMapper.writeValueAsString(taskFromJson)); }
public class WindowedDataSegmentTest { private static final ObjectMapper MAPPER = new DefaultObjectMapper(); private static final DataSegment SEGMENT = new DataSegment( "test1", Interval.parse("2000/3000"), "ver", ImmutableMap.<String, Object>of( "type", "local", "path", "/tmp/index1.zip"), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), new NoneShardSpec(), 9, 2); @Test public void testSerdeFullWindow() throws IOException { final WindowedDataSegment windowedDataSegment = WindowedDataSegment.of(SEGMENT); final WindowedDataSegment roundTrip = MAPPER.readValue(MAPPER.writeValueAsBytes(windowedDataSegment), WindowedDataSegment.class); Assert.assertEquals(windowedDataSegment, roundTrip); Assert.assertEquals(SEGMENT, roundTrip.getSegment()); Assert.assertEquals(SEGMENT.getInterval(), roundTrip.getInterval()); } @Test public void testSerdePartialWindow() throws IOException { final Interval partialInterval = new Interval("2500/3000"); final WindowedDataSegment windowedDataSegment = new WindowedDataSegment(SEGMENT, partialInterval); final WindowedDataSegment roundTrip = MAPPER.readValue(MAPPER.writeValueAsBytes(windowedDataSegment), WindowedDataSegment.class); Assert.assertEquals(windowedDataSegment, roundTrip); Assert.assertEquals(SEGMENT, roundTrip.getSegment()); Assert.assertEquals(partialInterval, roundTrip.getInterval()); } }
public class HadoopConverterJobTest { @Rule public final TemporaryFolder temporaryFolder = new TemporaryFolder(); @Rule public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule = new TestDerbyConnector.DerbyConnectorRule(); private String storageLocProperty = null; private File tmpSegmentDir = null; private static final String DATASOURCE = "testDatasource"; private static final String STORAGE_PROPERTY_KEY = "druid.storage.storageDirectory"; private Supplier<MetadataStorageTablesConfig> metadataStorageTablesConfigSupplier; private DerbyConnector connector; private final Interval interval = Interval.parse("2011-01-01T00:00:00.000Z/2011-05-01T00:00:00.000Z"); @After public void tearDown() { if (storageLocProperty == null) { System.clearProperty(STORAGE_PROPERTY_KEY); } else { System.setProperty(STORAGE_PROPERTY_KEY, storageLocProperty); } tmpSegmentDir = null; } @Before public void setUp() throws Exception { final MetadataStorageUpdaterJobSpec metadataStorageUpdaterJobSpec = new MetadataStorageUpdaterJobSpec() { @Override public String getSegmentTable() { return derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(); } @Override public MetadataStorageConnectorConfig get() { return derbyConnectorRule.getMetadataConnectorConfig(); } }; final File scratchFileDir = temporaryFolder.newFolder(); storageLocProperty = System.getProperty(STORAGE_PROPERTY_KEY); tmpSegmentDir = temporaryFolder.newFolder(); System.setProperty(STORAGE_PROPERTY_KEY, tmpSegmentDir.getAbsolutePath()); final URL url = Preconditions.checkNotNull(Query.class.getClassLoader().getResource("druid.sample.tsv")); final File tmpInputFile = temporaryFolder.newFile(); FileUtils.retryCopy( new ByteSource() { @Override public InputStream openStream() throws IOException { return url.openStream(); } }, tmpInputFile, FileUtils.IS_EXCEPTION, 3); final HadoopDruidIndexerConfig hadoopDruidIndexerConfig = new HadoopDruidIndexerConfig( new HadoopIngestionSpec( new DataSchema( DATASOURCE, HadoopDruidIndexerConfig.JSON_MAPPER.convertValue( new StringInputRowParser( new DelimitedParseSpec( new TimestampSpec("ts", "iso", null), new DimensionsSpec(Arrays.asList(TestIndex.DIMENSIONS), null, null), "\t", "\u0001", Arrays.asList(TestIndex.COLUMNS))), Map.class), new AggregatorFactory[] { new DoubleSumAggregatorFactory(TestIndex.METRICS[0], TestIndex.METRICS[0]), new HyperUniquesAggregatorFactory("quality_uniques", "quality") }, new UniformGranularitySpec( Granularity.MONTH, QueryGranularity.DAY, ImmutableList.<Interval>of(interval)), HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig( ImmutableMap.<String, Object>of( "type", "static", "paths", tmpInputFile.getAbsolutePath()), metadataStorageUpdaterJobSpec, tmpSegmentDir.getAbsolutePath()), new HadoopTuningConfig( scratchFileDir.getAbsolutePath(), null, null, null, null, null, false, false, false, false, null, false, false, false, null, null, false))); metadataStorageTablesConfigSupplier = derbyConnectorRule.metadataTablesConfigSupplier(); connector = derbyConnectorRule.getConnector(); try { connector .getDBI() .withHandle( new HandleCallback<Void>() { @Override public Void withHandle(Handle handle) throws Exception { handle.execute("DROP TABLE druid_segments"); return null; } }); } catch (CallbackFailedException e) { // Who cares } List<Jobby> jobs = ImmutableList.of( new Jobby() { @Override public boolean run() { connector.createSegmentTable(metadataStorageUpdaterJobSpec.getSegmentTable()); return true; } }, new HadoopDruidDetermineConfigurationJob(hadoopDruidIndexerConfig), new HadoopDruidIndexerJob( hadoopDruidIndexerConfig, new SQLMetadataStorageUpdaterJobHandler(connector))); JobHelper.runJobs(jobs, hadoopDruidIndexerConfig); } private List<DataSegment> getDataSegments(SQLMetadataSegmentManager manager) throws InterruptedException { manager.start(); while (!manager.isStarted()) { Thread.sleep(10); } manager.poll(); final DruidDataSource druidDataSource = manager.getInventoryValue(DATASOURCE); manager.stop(); return Lists.newArrayList(druidDataSource.getSegments()); } @Test public void testSimpleJob() throws IOException, InterruptedException { final SQLMetadataSegmentManager manager = new SQLMetadataSegmentManager( HadoopDruidConverterConfig.jsonMapper, new Supplier<MetadataSegmentManagerConfig>() { @Override public MetadataSegmentManagerConfig get() { return new MetadataSegmentManagerConfig(); } }, metadataStorageTablesConfigSupplier, connector); final List<DataSegment> oldSemgments = getDataSegments(manager); final File tmpDir = temporaryFolder.newFolder(); final HadoopConverterJob converterJob = new HadoopConverterJob( new HadoopDruidConverterConfig( DATASOURCE, interval, new IndexSpec(new RoaringBitmapSerdeFactory(), "uncompressed", "uncompressed"), oldSemgments, true, tmpDir.toURI(), ImmutableMap.<String, String>of(), null, tmpSegmentDir.toURI().toString())); final List<DataSegment> segments = Lists.newArrayList(converterJob.run()); Assert.assertNotNull("bad result", segments); Assert.assertEquals("wrong segment count", 4, segments.size()); Assert.assertTrue(converterJob.getLoadedBytes() > 0); Assert.assertTrue(converterJob.getWrittenBytes() > 0); Assert.assertTrue(converterJob.getWrittenBytes() > converterJob.getLoadedBytes()); Assert.assertEquals(oldSemgments.size(), segments.size()); final DataSegment segment = segments.get(0); Assert.assertTrue(interval.contains(segment.getInterval())); Assert.assertTrue(segment.getVersion().endsWith("_converted")); Assert.assertTrue(segment.getLoadSpec().get("path").toString().contains("_converted")); for (File file : tmpDir.listFiles()) { Assert.assertFalse(file.isDirectory()); Assert.assertTrue(file.isFile()); } final Comparator<DataSegment> segmentComparator = new Comparator<DataSegment>() { @Override public int compare(DataSegment o1, DataSegment o2) { return o1.getIdentifier().compareTo(o2.getIdentifier()); } }; Collections.sort(oldSemgments, segmentComparator); Collections.sort(segments, segmentComparator); for (int i = 0; i < oldSemgments.size(); ++i) { final DataSegment oldSegment = oldSemgments.get(i); final DataSegment newSegment = segments.get(i); Assert.assertEquals(oldSegment.getDataSource(), newSegment.getDataSource()); Assert.assertEquals(oldSegment.getInterval(), newSegment.getInterval()); Assert.assertEquals( Sets.<String>newHashSet(oldSegment.getMetrics()), Sets.<String>newHashSet(newSegment.getMetrics())); Assert.assertEquals( Sets.<String>newHashSet(oldSegment.getDimensions()), Sets.<String>newHashSet(newSegment.getDimensions())); Assert.assertEquals(oldSegment.getVersion() + "_converted", newSegment.getVersion()); Assert.assertTrue(oldSegment.getSize() < newSegment.getSize()); Assert.assertEquals(oldSegment.getBinaryVersion(), newSegment.getBinaryVersion()); } } private static void corrupt(DataSegment segment) throws IOException { final Map<String, Object> localLoadSpec = segment.getLoadSpec(); final Path segmentPath = Paths.get(localLoadSpec.get("path").toString()); final MappedByteBuffer buffer = Files.map(segmentPath.toFile(), FileChannel.MapMode.READ_WRITE); while (buffer.hasRemaining()) { buffer.put((byte) 0xFF); } } @Test @Ignore // This takes a long time due to retries public void testHadoopFailure() throws IOException, InterruptedException { final SQLMetadataSegmentManager manager = new SQLMetadataSegmentManager( HadoopDruidConverterConfig.jsonMapper, new Supplier<MetadataSegmentManagerConfig>() { @Override public MetadataSegmentManagerConfig get() { return new MetadataSegmentManagerConfig(); } }, metadataStorageTablesConfigSupplier, connector); final List<DataSegment> oldSemgments = getDataSegments(manager); final File tmpDir = temporaryFolder.newFolder(); final HadoopConverterJob converterJob = new HadoopConverterJob( new HadoopDruidConverterConfig( DATASOURCE, interval, new IndexSpec(new RoaringBitmapSerdeFactory(), "uncompressed", "uncompressed"), oldSemgments, true, tmpDir.toURI(), ImmutableMap.<String, String>of(), null, tmpSegmentDir.toURI().toString())); corrupt(oldSemgments.get(0)); final List<DataSegment> result = converterJob.run(); Assert.assertNull("result should be null", result); final List<DataSegment> segments = getDataSegments(manager); Assert.assertEquals(oldSemgments.size(), segments.size()); Assert.assertEquals(oldSemgments, segments); } }
@Before public void setUp() throws Exception { segments = ImmutableList.of( WindowedDataSegment.of( new DataSegment( "test1", Interval.parse("2000/3000"), "ver", ImmutableMap.<String, Object>of( "type", "local", "path", "/tmp/index1.zip"), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), new NoneShardSpec(), 9, 2)), WindowedDataSegment.of( new DataSegment( "test2", Interval.parse("2050/3000"), "ver", ImmutableMap.<String, Object>of( "type", "hdfs", "path", "/tmp/index2.zip"), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), new NoneShardSpec(), 9, 11)), WindowedDataSegment.of( new DataSegment( "test3", Interval.parse("2030/3000"), "ver", ImmutableMap.<String, Object>of( "type", "hdfs", "path", "/tmp/index3.zip"), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), new NoneShardSpec(), 9, 4))); Path path1 = new Path(JobHelper.getURIFromSegment(segments.get(0).getSegment())); Path path2 = new Path(JobHelper.getURIFromSegment(segments.get(1).getSegment())); Path path3 = new Path(JobHelper.getURIFromSegment(segments.get(2).getSegment())); // dummy locations for test locations = ImmutableList.of( new LocatedFileStatus( 1000, false, 0, 0, 0, 0, null, null, null, null, path1, new BlockLocation[] { new BlockLocation(null, new String[] {"s1", "s2"}, 0, 600), new BlockLocation(null, new String[] {"s2", "s3"}, 600, 400) }), new LocatedFileStatus( 4000, false, 0, 0, 0, 0, null, null, null, null, path2, new BlockLocation[] { new BlockLocation(null, new String[] {"s1", "s2"}, 0, 1000), new BlockLocation(null, new String[] {"s1", "s3"}, 1000, 1200), new BlockLocation(null, new String[] {"s2", "s3"}, 2200, 1100), new BlockLocation(null, new String[] {"s1", "s2"}, 3300, 700), }), new LocatedFileStatus( 500, false, 0, 0, 0, 0, null, null, null, null, path3, new BlockLocation[] {new BlockLocation(null, new String[] {"s2", "s3"}, 0, 500)})); config = new Configuration(); config.set( DatasourceInputFormat.CONF_INPUT_SEGMENTS, new DefaultObjectMapper().writeValueAsString(segments)); context = EasyMock.createMock(JobContext.class); EasyMock.expect(context.getConfiguration()).andReturn(config); EasyMock.replay(context); }
public class HadoopIngestionSpecUpdateDatasourcePathSpecSegmentsTest { private final String testDatasource = "test"; private final Interval testDatasourceInterval = new Interval("1970/3000"); private final Interval testDatasourceIntervalPartial = new Interval("2050/3000"); private final ObjectMapper jsonMapper; public HadoopIngestionSpecUpdateDatasourcePathSpecSegmentsTest() { jsonMapper = new DefaultObjectMapper(); jsonMapper.setInjectableValues( new InjectableValues.Std().addValue(ObjectMapper.class, jsonMapper)); } private static final DataSegment SEGMENT = new DataSegment( "test1", Interval.parse("2000/3000"), "ver", ImmutableMap.<String, Object>of( "type", "local", "path", "/tmp/index1.zip"), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), NoneShardSpec.instance(), 9, 2); @Test public void testupdateSegmentListIfDatasourcePathSpecIsUsedWithNoDatasourcePathSpec() throws Exception { PathSpec pathSpec = new StaticPathSpec("/xyz", null); HadoopDruidIndexerConfig config = testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(pathSpec, null); Assert.assertTrue(config.getPathSpec() instanceof StaticPathSpec); } @Test public void testupdateSegmentListIfDatasourcePathSpecIsUsedWithJustDatasourcePathSpec() throws Exception { PathSpec pathSpec = new DatasourcePathSpec( jsonMapper, null, new DatasourceIngestionSpec( testDatasource, testDatasourceInterval, null, null, null, null, null, null, false), null); HadoopDruidIndexerConfig config = testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(pathSpec, testDatasourceInterval); Assert.assertEquals( ImmutableList.of(WindowedDataSegment.of(SEGMENT)), ((DatasourcePathSpec) config.getPathSpec()).getSegments()); } @Test public void testupdateSegmentListIfDatasourcePathSpecWithMatchingUserSegments() throws Exception { PathSpec pathSpec = new DatasourcePathSpec( jsonMapper, null, new DatasourceIngestionSpec( testDatasource, testDatasourceInterval, null, ImmutableList.<DataSegment>of(SEGMENT), null, null, null, null, false), null); HadoopDruidIndexerConfig config = testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(pathSpec, testDatasourceInterval); Assert.assertEquals( ImmutableList.of(WindowedDataSegment.of(SEGMENT)), ((DatasourcePathSpec) config.getPathSpec()).getSegments()); } @Test(expected = IOException.class) public void testupdateSegmentListThrowsExceptionWithUserSegmentsMismatch() throws Exception { PathSpec pathSpec = new DatasourcePathSpec( jsonMapper, null, new DatasourceIngestionSpec( testDatasource, testDatasourceInterval, null, ImmutableList.<DataSegment>of(SEGMENT.withVersion("v2")), null, null, null, null, false), null); testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(pathSpec, testDatasourceInterval); } @Test public void testupdateSegmentListIfDatasourcePathSpecIsUsedWithJustDatasourcePathSpecAndPartialInterval() throws Exception { PathSpec pathSpec = new DatasourcePathSpec( jsonMapper, null, new DatasourceIngestionSpec( testDatasource, testDatasourceIntervalPartial, null, null, null, null, null, null, false), null); HadoopDruidIndexerConfig config = testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(pathSpec, testDatasourceIntervalPartial); Assert.assertEquals( ImmutableList.of(new WindowedDataSegment(SEGMENT, testDatasourceIntervalPartial)), ((DatasourcePathSpec) config.getPathSpec()).getSegments()); } @Test public void testupdateSegmentListIfDatasourcePathSpecIsUsedWithMultiplePathSpec() throws Exception { PathSpec pathSpec = new MultiplePathSpec( ImmutableList.of( new StaticPathSpec("/xyz", null), new DatasourcePathSpec( jsonMapper, null, new DatasourceIngestionSpec( testDatasource, testDatasourceInterval, null, null, null, null, null, null, false), null))); HadoopDruidIndexerConfig config = testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(pathSpec, testDatasourceInterval); Assert.assertEquals( ImmutableList.of(WindowedDataSegment.of(SEGMENT)), ((DatasourcePathSpec) ((MultiplePathSpec) config.getPathSpec()).getChildren().get(1)) .getSegments()); } private HadoopDruidIndexerConfig testRunUpdateSegmentListIfDatasourcePathSpecIsUsed( PathSpec datasourcePathSpec, Interval jobInterval) throws Exception { HadoopIngestionSpec spec = new HadoopIngestionSpec( new DataSchema( "foo", null, new AggregatorFactory[0], new UniformGranularitySpec( Granularity.DAY, null, ImmutableList.of(new Interval("2010-01-01/P1D"))), jsonMapper), new HadoopIOConfig(jsonMapper.convertValue(datasourcePathSpec, Map.class), null, null), null); spec = jsonMapper.readValue(jsonMapper.writeValueAsString(spec), HadoopIngestionSpec.class); UsedSegmentLister segmentLister = EasyMock.createMock(UsedSegmentLister.class); EasyMock.expect( segmentLister.getUsedSegmentsForIntervals( testDatasource, Lists.newArrayList(jobInterval))) .andReturn(ImmutableList.of(SEGMENT)); EasyMock.replay(segmentLister); spec = HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed( spec, jsonMapper, segmentLister); return HadoopDruidIndexerConfig.fromString(jsonMapper.writeValueAsString(spec)); } }