public void testQueriesFromFile(String filePath, int timesToRun) throws Exception { LOG.info("Starting query tests for [%s]", filePath); List<QueryWithResults> queries = jsonMapper.readValue( FromFileTestQueryHelper.class.getResourceAsStream(filePath), new TypeReference<List<QueryWithResults>>() {}); for (int i = 0; i < timesToRun; i++) { LOG.info("Starting Iteration " + i); boolean failed = false; for (QueryWithResults queryWithResult : queries) { LOG.info("Running Query " + queryWithResult.getQuery().getType()); List<Map<String, Object>> result = queryClient.query(queryWithResult.getQuery()); if (!QueryResultVerifier.compareResults(result, queryWithResult.getExpectedResults())) { LOG.error( "Failed while executing %s actualResults : %s", queryWithResult, jsonMapper.writeValueAsString(result)); failed = true; } else { LOG.info("Results Verified for Query " + queryWithResult.getQuery().getType()); } } if (failed) { throw new ISE("one or more twitter queries failed"); } } }
public synchronized void close() { if (isOpen.getAndSet(false)) { LOGGER.info("Closing loading cache [%s]", id); loadingCache.close(); reverseLoadingCache.close(); } else { LOGGER.info("Closing already closed lookup"); return; } }
@Override public void getSegmentFiles(DataSegment segment, File outDir) throws SegmentLoadingException { S3Coords s3Coords = new S3Coords(segment); log.info("Pulling index at path[%s] to outDir[%s]", s3Coords, outDir); if (!isObjectInBucket(s3Coords)) { throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords); } if (!outDir.exists()) { outDir.mkdirs(); } if (!outDir.isDirectory()) { throw new ISE("outDir[%s] must be a directory.", outDir); } long startTime = System.currentTimeMillis(); S3Object s3Obj = null; try { s3Obj = s3Client.getObject(new S3Bucket(s3Coords.bucket), s3Coords.path); InputStream in = null; try { in = s3Obj.getDataInputStream(); final String key = s3Obj.getKey(); if (key.endsWith(".zip")) { CompressionUtils.unzip(in, outDir); } else if (key.endsWith(".gz")) { final File outFile = new File(outDir, toFilename(key, ".gz")); ByteStreams.copy(new GZIPInputStream(in), Files.newOutputStreamSupplier(outFile)); } else { ByteStreams.copy( in, Files.newOutputStreamSupplier(new File(outDir, toFilename(key, "")))); } log.info( "Pull of file[%s] completed in %,d millis", s3Obj, System.currentTimeMillis() - startTime); } catch (IOException e) { FileUtils.deleteDirectory(outDir); throw new SegmentLoadingException(e, "Problem decompressing object[%s]", s3Obj); } finally { Closeables.closeQuietly(in); } } catch (Exception e) { throw new SegmentLoadingException(e, e.getMessage()); } finally { S3Utils.closeStreamsQuietly(s3Obj); } }
@Override public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException { final Set<Interval> intervals = Sets.newTreeSet(Comparators.intervals()); Optional<Set<Interval>> optionalIntervals = config.getSegmentGranularIntervals(); if (optionalIntervals.isPresent()) { for (Interval segmentInterval : optionalIntervals.get()) { for (Interval dataInterval : dataGranularity.getIterable(segmentInterval)) { intervals.add(dataInterval); } } } Path betaInput = new Path(inputPath); FileSystem fs = betaInput.getFileSystem(job.getConfiguration()); Set<String> paths = Sets.newTreeSet(); Pattern fileMatcher = Pattern.compile(filePattern); DateTimeFormatter customFormatter = null; if (pathFormat != null) { customFormatter = DateTimeFormat.forPattern(pathFormat); } for (Interval interval : intervals) { DateTime t = interval.getStart(); String intervalPath = null; if (customFormatter != null) { intervalPath = customFormatter.print(t); } else { intervalPath = dataGranularity.toPath(t); } Path granularPath = new Path(betaInput, intervalPath); log.info("Checking path[%s]", granularPath); for (FileStatus status : FSSpideringIterator.spiderIterable(fs, granularPath)) { final Path filePath = status.getPath(); if (fileMatcher.matcher(filePath.toString()).matches()) { paths.add(filePath.toString()); } } } for (String path : paths) { log.info("Appending path[%s]", path); FileInputFormat.addInputPath(job, new Path(path)); } return job; }
public void dropSegment(DataSegment segment, LoadPeonCallback callback) { synchronized (lock) { if ((currentlyLoading != null) && currentlyLoading.getSegmentIdentifier().equals(segment.getIdentifier())) { if (callback != null) { currentlyLoading.addCallback(callback); } return; } } SegmentHolder holder = new SegmentHolder(segment, DROP, Arrays.asList(callback)); synchronized (lock) { if (segmentsToDrop.contains(holder)) { if (callback != null) { currentlyLoading.addCallback(callback); } return; } } log.info("Asking server peon[%s] to drop segment[%s]", basePath, segment); segmentsToDrop.add(holder); doNext(); }
public void shutdown() throws IOException { final long truncatedNow = segmentGranularity.truncate(new DateTime()).getMillis(); final long end = segmentGranularity.increment(truncatedNow) + windowMillis; final Duration timeUntilShutdown = new Duration(System.currentTimeMillis(), end); log.info("Shutdown at approx. %s (in %s)", new DateTime(end), timeUntilShutdown); ScheduledExecutors.scheduleWithFixedDelay( scheduledExecutor, timeUntilShutdown, new Callable<ScheduledExecutors.Signal>() { @Override public ScheduledExecutors.Signal call() throws Exception { try { valveOn.set(false); } catch (Exception e) { throw Throwables.propagate(e); } return ScheduledExecutors.Signal.STOP; } }); beginRejectionPolicy = true; }
public FileUtils.FileCopyResult getSegmentFiles( String region, String container, String path, File outDir) throws SegmentLoadingException { CloudFilesObjectApiProxy objectApi = new CloudFilesObjectApiProxy(cloudFilesApi, region, container); final CloudFilesByteSource byteSource = new CloudFilesByteSource(objectApi, path); try { final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, CloudFilesUtils.CLOUDFILESRETRY, true); log.info("Loaded %d bytes from [%s] to [%s]", result.size(), path, outDir.getAbsolutePath()); return result; } catch (Exception e) { try { org.apache.commons.io.FileUtils.deleteDirectory(outDir); } catch (IOException ioe) { log.warn( ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), path); } throw new SegmentLoadingException(e, e.getMessage()); } finally { try { byteSource.closeStream(); } catch (IOException ioe) { log.warn(ioe, "Failed to close payload for segmente pulled from [%s]", path); } } }
public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config) { String failedMessage = null; for (Jobby job : jobs) { if (failedMessage == null) { if (!job.run()) { failedMessage = String.format("Job[%s] failed!", job.getClass()); } } } if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) { if (failedMessage == null || config.getSchema().getTuningConfig().isCleanupOnFailure()) { Path workingPath = config.makeIntermediatePath(); log.info("Deleting path[%s]", workingPath); try { workingPath .getFileSystem(injectSystemProperties(new Configuration())) .delete(workingPath, true); } catch (IOException e) { log.error(e, "Failed to cleanup path[%s]", workingPath); } } } if (failedMessage != null) { throw new ISE(failedMessage); } return true; }
public IncrementalIndexAdapter( Interval dataInterval, IncrementalIndex<?> index, BitmapFactory bitmapFactory) { this.dataInterval = dataInterval; this.index = index; /* Sometimes it's hard to tell whether one dimension contains a null value or not. * If one dimension had show a null or empty value explicitly, then yes, it contains * null value. But if one dimension's values are all non-null, it still early to say * this dimension does not contain null value. Consider a two row case, first row had * "dimA=1" and "dimB=2", the second row only had "dimA=3". To dimB, its value are "2" and * never showed a null or empty value. But when we combines these two rows, dimB is null * in row 2. So we should iterate all rows to determine whether one dimension contains * a null value. */ this.hasNullValueDimensions = Sets.newHashSet(); final List<IncrementalIndex.DimensionDesc> dimensions = index.getDimensions(); indexers = Maps.newHashMapWithExpectedSize(dimensions.size()); for (IncrementalIndex.DimensionDesc dimension : dimensions) { indexers.put(dimension.getName(), new DimensionIndexer(dimension)); } int rowNum = 0; for (IncrementalIndex.TimeAndDims timeAndDims : index.getFacts().keySet()) { final int[][] dims = timeAndDims.getDims(); for (IncrementalIndex.DimensionDesc dimension : dimensions) { final int dimIndex = dimension.getIndex(); DimensionIndexer indexer = indexers.get(dimension.getName()); if (dimIndex >= dims.length || dims[dimIndex] == null) { hasNullValueDimensions.add(dimension.getName()); continue; } final IncrementalIndex.DimDim values = dimension.getValues(); if (hasNullValue(values, dims[dimIndex])) { hasNullValueDimensions.add(dimension.getName()); } final MutableBitmap[] bitmapIndexes = indexer.invertedIndexes; for (Comparable dimIdxComparable : dims[dimIndex]) { Integer dimIdx = (Integer) dimIdxComparable; if (bitmapIndexes[dimIdx] == null) { bitmapIndexes[dimIdx] = bitmapFactory.makeEmptyMutableBitmap(); } try { bitmapIndexes[dimIdx].add(rowNum); } catch (Exception e) { log.info(e.toString()); } } } ++rowNum; } }
@Override public void start() throws Exception { for (Method method : o.getClass().getMethods()) { if (method.getAnnotation(LifecycleStart.class) != null) { log.info("Invoking start method[%s] on object[%s].", method, o); method.invoke(o); } } }
@Override public void pushTaskLog(final String taskid, File file) throws IOException { if (!config.getDirectory().exists()) { config.getDirectory().mkdir(); } final File outputFile = fileForTask(taskid); Files.copy(file, outputFile); log.info("Wrote task log to: %s", outputFile); }
@Override public void stop() { log.info("Stopping object[%s]", o); try { stopMethod.invoke(o); } catch (Exception e) { log.error(e, "Unable to invoke stopMethod() on %s", o.getClass()); } }
private void doNext() { synchronized (lock) { if (currentlyLoading == null) { if (!segmentsToDrop.isEmpty()) { currentlyLoading = segmentsToDrop.first(); log.info("Server[%s] dropping [%s]", basePath, currentlyLoading); } else if (!segmentsToLoad.isEmpty()) { currentlyLoading = segmentsToLoad.first(); log.info("Server[%s] loading [%s]", basePath, currentlyLoading); } else { return; } submitExecutable(); } else { log.info( "Server[%s] skipping doNext() because something is currently loading[%s].", basePath, currentlyLoading); } } }
@Override public void getSegmentFiles(final DataSegment segment, final File outDir) throws SegmentLoadingException { final Map<String, Object> loadSpec = segment.getLoadSpec(); final String region = MapUtils.getString(loadSpec, "region"); final String container = MapUtils.getString(loadSpec, "container"); final String path = MapUtils.getString(loadSpec, "path"); log.info("Pulling index at path[%s] to outDir[%s]", path, outDir); prepareOutDir(outDir); getSegmentFiles(region, container, path, outDir); }
/** * Clears out resources used by the namespace such as threads. Implementations may override this * and call super.delete(...) if they have resources of their own which need cleared. * * <p>This particular method is NOT thread safe, and any impl which is intended to be thread safe * should safe-guard calls to this method. * * @param ns The namespace to be deleted * @return True if a deletion occurred, false if no deletion occurred. * @throws ISE if there is an error cancelling the namespace's future task */ public boolean delete(final String ns) { final NamespaceImplData implDatum = implData.get(ns); final boolean deleted = removeNamespaceLocalMetadata(implDatum); // At this point we have won leader election on canceling this implDatum if (deleted) { log.info("Deleting namespace [%s]", ns); lastVersion.remove(implDatum.name); return true; } else { log.debug("Did not delete namespace [%s]", ns); return false; } }
@Override public void stop() { for (Method method : o.getClass().getMethods()) { if (method.getAnnotation(LifecycleStop.class) != null) { log.info("Invoking stop method[%s] on object[%s].", method, o); try { method.invoke(o); } catch (Exception e) { log.error(e, "Exception when stopping method[%s] on object[%s]", method, o); } } } }
@Inject public DerbyConnector( Supplier<MetadataStorageConnectorConfig> config, Supplier<MetadataStorageTablesConfig> dbTables) { super(config, dbTables); final BasicDataSource datasource = getDatasource(); datasource.setDriverClassLoader(getClass().getClassLoader()); datasource.setDriverClassName("org.apache.derby.jdbc.ClientDriver"); this.dbi = new DBI(datasource); log.info("Configured Derby as metadata storage"); }
@Override protected boolean removeEldestEntry(Map.Entry<ByteBuffer, byte[]> eldest) { if (numBytes > sizeInBytes) { ++evictionCount; if (logEvictions && evictionCount % logEvictionCount == 0) { log.info( "Evicting %,dth element. Size[%,d], numBytes[%,d], averageSize[%,d]", evictionCount, size(), numBytes, numBytes / size()); } numBytes -= eldest.getKey().remaining() + eldest.getValue().length; return true; } return false; }
public static void main(String[] args) throws Exception { LogLevelAdjuster.register(); Lifecycle lifecycle = new Lifecycle(); lifecycle.addManagedInstance(WorkerNode.builder().build()); try { lifecycle.start(); } catch (Throwable t) { log.info(t, "Throwable caught at startup, committing seppuku"); System.exit(2); } lifecycle.join(); }
@Inject public SQLServerConnector( Supplier<MetadataStorageConnectorConfig> config, Supplier<MetadataStorageTablesConfig> dbTables) { super(config, dbTables); final BasicDataSource datasource = getDatasource(); datasource.setDriverClassLoader(getClass().getClassLoader()); datasource.setDriverClassName("com.microsoft.sqlserver.jdbc.SQLServerDriver"); this.dbi = new DBI(datasource); this.dbi.setStatementRewriter(new CustomStatementRewriter()); log.info("Configured Sql Server as metadata storage"); }
@Inject public OffHeapNamespaceExtractionCacheManager( Lifecycle lifecycle, @Named("namespaceExtractionFunctionCache") ConcurrentMap<String, Function<String, String>> fnCache, @Named("namespaceReverseExtractionFunctionCache") ConcurrentMap<String, Function<String, List<String>>> reverseFnCache, ServiceEmitter emitter, final Map<Class<? extends ExtractionNamespace>, ExtractionNamespaceFunctionFactory<?>> namespaceFunctionFactoryMap) { super(lifecycle, fnCache, reverseFnCache, emitter, namespaceFunctionFactoryMap); try { tmpFile = File.createTempFile("druidMapDB", getClass().getCanonicalName()); log.info("Using file [%s] for mapDB off heap namespace cache", tmpFile.getAbsolutePath()); } catch (IOException e) { throw Throwables.propagate(e); } mmapDB = DBMaker.newFileDB(tmpFile) .closeOnJvmShutdown() .transactionDisable() .deleteFilesAfterClose() .strictDBGet() .asyncWriteEnable() .mmapFileEnable() .commitFileSyncDisable() .cacheSize(10_000_000) .make(); lifecycle.addHandler( new Lifecycle.Handler() { @Override public void start() throws Exception { // NOOP } @Override public void stop() { if (!mmapDB.isClosed()) { mmapDB.close(); if (!tmpFile.delete()) { log.warn("Unable to delete file at [%s]", tmpFile.getAbsolutePath()); } } } }); }
@JsonCreator public RandomFirehoseFactory( @JsonProperty("sleepUsec") Long sleepUsec, @JsonProperty("maxGeneratedRows") Long maxGeneratedRows, @JsonProperty("seed") Long seed, @JsonProperty("nTokens") Integer nTokens, @JsonProperty("nPerSleep") Integer nPerSleep) { long nsec = (sleepUsec > 0) ? sleepUsec * 1000L : 0; long msec = nsec / 1000000L; this.delayMsec = msec; this.delayNsec = (int) (nsec - (msec * 1000000L)); this.maxGeneratedRows = maxGeneratedRows; this.seed = seed; this.nTokens = nTokens; this.nPerSleep = nPerSleep; if (nTokens <= 0) { log.warn("nTokens parameter " + nTokens + " ignored; must be greater than or equal to 1"); nTokens = 1; } if (nPerSleep <= 0) { log.warn("nPerSleep parameter " + nPerSleep + " ignored; must be greater than or equal to 1"); nPerSleep = 1; } log.info("maxGeneratedRows=" + maxGeneratedRows); log.info("seed=" + ((seed == 0L) ? "random value" : seed)); log.info("nTokens=" + nTokens); log.info("nPerSleep=" + nPerSleep); double dmsec = (double) delayMsec + ((double) this.delayNsec) / 1000000.; if (dmsec > 0.0) { log.info("sleep period=" + dmsec + "msec"); log.info( "approximate max rate of record generation=" + (nPerSleep * 1000. / dmsec) + "/sec" + " or " + (60. * nPerSleep * 1000. / dmsec) + "/minute"); } else { log.info("sleep period= NONE"); log.info("approximate max rate of record generation= as fast as possible"); } }
@Override public void newEntry(String name, Map properties) { synchronized (lock) { if (currentlyLoading == null) { log.warn( "Server[%s] a new entry[%s] appeared, even though nothing is currently loading[%s]", basePath, name, currentlyLoading); } else { if (!name.equals(currentlyLoading.getSegmentIdentifier())) { log.warn( "Server[%s] a new entry[%s] appeared that is not the currently loading entry[%s]", basePath, name, currentlyLoading); } else { log.info("Server[%s]'s currently loading entry[%s] appeared.", basePath, name); } } } }
public static File persist( final IncrementalIndex index, final Interval dataInterval, File outDir, ProgressIndicator progress) throws IOException { final long firstTimestamp = index.facts.firstKey().getTimestamp(); final long lastTimestamp = index.facts.lastKey().getTimestamp(); if (!(dataInterval.contains(firstTimestamp) && dataInterval.contains(lastTimestamp))) { throw new IAE( "interval[%s] does not encapsulate the full range of timestamps[%s, %s]", dataInterval, new DateTime(firstTimestamp), new DateTime(lastTimestamp)); } if (!outDir.exists()) { outDir.mkdirs(); } if (!outDir.isDirectory()) { throw new ISE("Can only persist to directories, [%s] wasn't a directory", outDir); } final List<String> dimensions = Lists.transform( Lists.newArrayList(index.dimensionOrder.keySet()), new Function<String, String>() { @Override public String apply(@Nullable String input) { return input.toLowerCase(); } }); final List<String> metrics = Lists.newArrayListWithCapacity(index.metrics.length); for (int i = 0; i < index.metrics.length; ++i) { metrics.add(index.metrics[i].getName().toLowerCase()); } log.info("Starting persist for interval[%s], rows[%,d]", dataInterval, index.size()); return merge( Arrays.<IndexableAdapter>asList( new IncrementalIndexAdapter(dataInterval, index, dimensions, metrics)), index.metrics, outDir, progress); }
public static String runTask(String[] args) throws Exception { final String schema = args[0]; String version = args[1]; final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.jsonMapper.readValue(schema, HadoopIngestionSpec.class); final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSchema( theSchema.withTuningConfig(theSchema.getTuningConfig().withVersion(version))); HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(config); log.info("Starting a hadoop index generator job..."); if (job.run()) { return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(job.getPublishedSegments()); } return null; }
@Override public void entryRemoved(String name) { synchronized (lock) { if (currentlyLoading == null) { log.warn( "Server[%s] an entry[%s] was removed even though it wasn't loading!?", basePath, name); return; } if (!name.equals(currentlyLoading.getSegmentIdentifier())) { log.warn( "Server[%s] entry [%s] was removed even though it's not what is currently loading[%s]", basePath, name, currentlyLoading); return; } actionCompleted(); log.info("Server[%s] done processing [%s]", basePath, name); } doNext(); }
public static void setupClasspath(Path distributedClassPath, Job job) throws IOException { String classpathProperty = System.getProperty("druid.hadoop.internal.classpath"); if (classpathProperty == null) { classpathProperty = System.getProperty("java.class.path"); } String[] jarFiles = classpathProperty.split(File.pathSeparator); final Configuration conf = job.getConfiguration(); final FileSystem fs = distributedClassPath.getFileSystem(conf); if (fs instanceof LocalFileSystem) { return; } for (String jarFilePath : jarFiles) { File jarFile = new File(jarFilePath); if (jarFile.getName().endsWith(".jar")) { final Path hdfsPath = new Path(distributedClassPath, jarFile.getName()); if (!existing.contains(hdfsPath)) { if (jarFile.getName().matches(".*SNAPSHOT(-selfcontained)?\\.jar$") || !fs.exists(hdfsPath)) { log.info("Uploading jar to path[%s]", hdfsPath); ByteStreams.copy( Files.newInputStreamSupplier(jarFile), new OutputSupplier<OutputStream>() { @Override public OutputStream getOutput() throws IOException { return fs.create(hdfsPath); } }); } existing.add(hdfsPath); } DistributedCache.addFileToClassPath(hdfsPath, conf, fs); } } }
public static String runTask(String[] args) throws Exception { final String schema = args[0]; final String workingPath = args[1]; final String segmentOutputPath = args[2]; final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.jsonMapper.readValue(schema, HadoopIngestionSpec.class); final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSchema( theSchema .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath)) .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath))); Jobby job = new HadoopDruidDetermineConfigurationJob(config); log.info("Starting a hadoop determine configuration job..."); if (job.run()) { return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(config.getSchema()); } return null; }
@JsonCreator public InlineSchemasAvroBytesDecoder( @JacksonInject @Json ObjectMapper mapper, @JsonProperty("schemas") Map<String, Map<String, Object>> schemas) throws Exception { Preconditions.checkArgument( schemas != null && schemas.size() > 0, "at least one schema must be provided in schemas attribute"); this.schemas = schemas; schemaObjs = new HashMap<>(schemas.size()); for (Map.Entry<String, Map<String, Object>> e : schemas.entrySet()) { int id = Integer.parseInt(e.getKey()); Map<String, Object> schema = e.getValue(); String schemaStr = mapper.writeValueAsString(schema); ; logger.info("Schema string [%s] = [%s]", id, schemaStr); schemaObjs.put(id, new Schema.Parser().parse(schemaStr)); } }
@Override public void writeIndexes(List<IntBuffer> segmentRowNumConversions, Closer closer) throws IOException { final SerializerUtils serializerUtils = new SerializerUtils(); long dimStartTime = System.currentTimeMillis(); String bmpFilename = String.format("%s.inverted", dimensionName); bitmapWriter = new GenericIndexedWriter<>( ioPeon, bmpFilename, indexSpec.getBitmapSerdeFactory().getObjectStrategy()); bitmapWriter.open(); final MappedByteBuffer dimValsMapped = Files.map(dictionaryFile); closer.register( new Closeable() { @Override public void close() throws IOException { ByteBufferUtils.unmap(dimValsMapped); } }); if (!dimensionName.equals(serializerUtils.readString(dimValsMapped))) { throw new ISE("dimensions[%s] didn't equate!? This is a major WTF moment.", dimensionName); } Indexed<String> dimVals = GenericIndexed.read(dimValsMapped, GenericIndexed.STRING_STRATEGY); log.info("Starting dimension[%s] with cardinality[%,d]", dimensionName, dimVals.size()); final BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory(); final BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory(); RTree tree = null; spatialWriter = null; boolean hasSpatial = capabilities.hasSpatialIndexes(); spatialIoPeon = new TmpFileIOPeon(); if (hasSpatial) { BitmapFactory bmpFactory = bitmapSerdeFactory.getBitmapFactory(); String spatialFilename = String.format("%s.spatial", dimensionName); spatialWriter = new ByteBufferWriter<ImmutableRTree>( spatialIoPeon, spatialFilename, new IndexedRTree.ImmutableRTreeObjectStrategy(bmpFactory)); spatialWriter.open(); tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory); } IndexSeeker[] dictIdSeeker = toIndexSeekers(adapters, dimConversions, dimensionName); // Iterate all dim values's dictionary id in ascending order which in line with dim values's // compare result. for (int dictId = 0; dictId < dimVals.size(); dictId++) { progress.progress(); List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(adapters.size()); for (int j = 0; j < adapters.size(); ++j) { int seekedDictId = dictIdSeeker[j].seek(dictId); if (seekedDictId != IndexSeeker.NOT_EXIST) { convertedInverteds.add( new ConvertingIndexedInts( adapters.get(j).getBitmapIndex(dimensionName, seekedDictId), segmentRowNumConversions.get(j))); } } MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap(); for (Integer row : CombiningIterable.createSplatted( convertedInverteds, Ordering.<Integer>natural().nullsFirst())) { if (row != IndexMerger.INVALID_ROW) { bitset.add(row); } } if ((dictId == 0) && (Iterables.getFirst(dimVals, "") == null)) { bitset.or(nullRowsBitmap); } bitmapWriter.write(bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset)); if (hasSpatial) { String dimVal = dimVals.get(dictId); if (dimVal != null) { List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal)); float[] coords = new float[stringCoords.size()]; for (int j = 0; j < coords.length; j++) { coords[j] = Float.valueOf(stringCoords.get(j)); } tree.insert(coords, bitset); } } } log.info( "Completed dimension[%s] in %,d millis.", dimensionName, System.currentTimeMillis() - dimStartTime); if (hasSpatial) { spatialWriter.write(ImmutableRTree.newImmutableFromMutable(tree)); } }