Пример #1
0
  public void testQueriesFromFile(String filePath, int timesToRun) throws Exception {
    LOG.info("Starting query tests for [%s]", filePath);
    List<QueryWithResults> queries =
        jsonMapper.readValue(
            FromFileTestQueryHelper.class.getResourceAsStream(filePath),
            new TypeReference<List<QueryWithResults>>() {});
    for (int i = 0; i < timesToRun; i++) {
      LOG.info("Starting Iteration " + i);

      boolean failed = false;
      for (QueryWithResults queryWithResult : queries) {
        LOG.info("Running Query " + queryWithResult.getQuery().getType());
        List<Map<String, Object>> result = queryClient.query(queryWithResult.getQuery());
        if (!QueryResultVerifier.compareResults(result, queryWithResult.getExpectedResults())) {
          LOG.error(
              "Failed while executing %s actualResults : %s",
              queryWithResult, jsonMapper.writeValueAsString(result));
          failed = true;
        } else {
          LOG.info("Results Verified for Query " + queryWithResult.getQuery().getType());
        }
      }

      if (failed) {
        throw new ISE("one or more twitter  queries failed");
      }
    }
  }
Пример #2
0
 public synchronized void close() {
   if (isOpen.getAndSet(false)) {
     LOGGER.info("Closing loading cache [%s]", id);
     loadingCache.close();
     reverseLoadingCache.close();
   } else {
     LOGGER.info("Closing already closed lookup");
     return;
   }
 }
  @Override
  public void getSegmentFiles(DataSegment segment, File outDir) throws SegmentLoadingException {
    S3Coords s3Coords = new S3Coords(segment);

    log.info("Pulling index at path[%s] to outDir[%s]", s3Coords, outDir);

    if (!isObjectInBucket(s3Coords)) {
      throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords);
    }

    if (!outDir.exists()) {
      outDir.mkdirs();
    }

    if (!outDir.isDirectory()) {
      throw new ISE("outDir[%s] must be a directory.", outDir);
    }

    long startTime = System.currentTimeMillis();
    S3Object s3Obj = null;

    try {
      s3Obj = s3Client.getObject(new S3Bucket(s3Coords.bucket), s3Coords.path);

      InputStream in = null;
      try {
        in = s3Obj.getDataInputStream();
        final String key = s3Obj.getKey();
        if (key.endsWith(".zip")) {
          CompressionUtils.unzip(in, outDir);
        } else if (key.endsWith(".gz")) {
          final File outFile = new File(outDir, toFilename(key, ".gz"));
          ByteStreams.copy(new GZIPInputStream(in), Files.newOutputStreamSupplier(outFile));
        } else {
          ByteStreams.copy(
              in, Files.newOutputStreamSupplier(new File(outDir, toFilename(key, ""))));
        }
        log.info(
            "Pull of file[%s] completed in %,d millis",
            s3Obj, System.currentTimeMillis() - startTime);
      } catch (IOException e) {
        FileUtils.deleteDirectory(outDir);
        throw new SegmentLoadingException(e, "Problem decompressing object[%s]", s3Obj);
      } finally {
        Closeables.closeQuietly(in);
      }
    } catch (Exception e) {
      throw new SegmentLoadingException(e, e.getMessage());
    } finally {
      S3Utils.closeStreamsQuietly(s3Obj);
    }
  }
Пример #4
0
  @Override
  public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException {
    final Set<Interval> intervals = Sets.newTreeSet(Comparators.intervals());
    Optional<Set<Interval>> optionalIntervals = config.getSegmentGranularIntervals();
    if (optionalIntervals.isPresent()) {
      for (Interval segmentInterval : optionalIntervals.get()) {
        for (Interval dataInterval : dataGranularity.getIterable(segmentInterval)) {
          intervals.add(dataInterval);
        }
      }
    }

    Path betaInput = new Path(inputPath);
    FileSystem fs = betaInput.getFileSystem(job.getConfiguration());
    Set<String> paths = Sets.newTreeSet();
    Pattern fileMatcher = Pattern.compile(filePattern);

    DateTimeFormatter customFormatter = null;
    if (pathFormat != null) {
      customFormatter = DateTimeFormat.forPattern(pathFormat);
    }

    for (Interval interval : intervals) {
      DateTime t = interval.getStart();
      String intervalPath = null;
      if (customFormatter != null) {
        intervalPath = customFormatter.print(t);
      } else {
        intervalPath = dataGranularity.toPath(t);
      }

      Path granularPath = new Path(betaInput, intervalPath);
      log.info("Checking path[%s]", granularPath);
      for (FileStatus status : FSSpideringIterator.spiderIterable(fs, granularPath)) {
        final Path filePath = status.getPath();
        if (fileMatcher.matcher(filePath.toString()).matches()) {
          paths.add(filePath.toString());
        }
      }
    }

    for (String path : paths) {
      log.info("Appending path[%s]", path);
      FileInputFormat.addInputPath(job, new Path(path));
    }

    return job;
  }
Пример #5
0
  public void dropSegment(DataSegment segment, LoadPeonCallback callback) {
    synchronized (lock) {
      if ((currentlyLoading != null)
          && currentlyLoading.getSegmentIdentifier().equals(segment.getIdentifier())) {
        if (callback != null) {
          currentlyLoading.addCallback(callback);
        }
        return;
      }
    }

    SegmentHolder holder = new SegmentHolder(segment, DROP, Arrays.asList(callback));

    synchronized (lock) {
      if (segmentsToDrop.contains(holder)) {
        if (callback != null) {
          currentlyLoading.addCallback(callback);
        }
        return;
      }
    }

    log.info("Asking server peon[%s] to drop segment[%s]", basePath, segment);
    segmentsToDrop.add(holder);
    doNext();
  }
  public void shutdown() throws IOException {
    final long truncatedNow = segmentGranularity.truncate(new DateTime()).getMillis();
    final long end = segmentGranularity.increment(truncatedNow) + windowMillis;
    final Duration timeUntilShutdown = new Duration(System.currentTimeMillis(), end);

    log.info("Shutdown at approx. %s (in %s)", new DateTime(end), timeUntilShutdown);

    ScheduledExecutors.scheduleWithFixedDelay(
        scheduledExecutor,
        timeUntilShutdown,
        new Callable<ScheduledExecutors.Signal>() {
          @Override
          public ScheduledExecutors.Signal call() throws Exception {
            try {
              valveOn.set(false);
            } catch (Exception e) {
              throw Throwables.propagate(e);
            }

            return ScheduledExecutors.Signal.STOP;
          }
        });

    beginRejectionPolicy = true;
  }
  public FileUtils.FileCopyResult getSegmentFiles(
      String region, String container, String path, File outDir) throws SegmentLoadingException {
    CloudFilesObjectApiProxy objectApi =
        new CloudFilesObjectApiProxy(cloudFilesApi, region, container);
    final CloudFilesByteSource byteSource = new CloudFilesByteSource(objectApi, path);

    try {
      final FileUtils.FileCopyResult result =
          CompressionUtils.unzip(byteSource, outDir, CloudFilesUtils.CLOUDFILESRETRY, true);
      log.info("Loaded %d bytes from [%s] to [%s]", result.size(), path, outDir.getAbsolutePath());
      return result;
    } catch (Exception e) {
      try {
        org.apache.commons.io.FileUtils.deleteDirectory(outDir);
      } catch (IOException ioe) {
        log.warn(
            ioe,
            "Failed to remove output directory [%s] for segment pulled from [%s]",
            outDir.getAbsolutePath(),
            path);
      }
      throw new SegmentLoadingException(e, e.getMessage());
    } finally {
      try {
        byteSource.closeStream();
      } catch (IOException ioe) {
        log.warn(ioe, "Failed to close payload for segmente pulled from [%s]", path);
      }
    }
  }
Пример #8
0
  public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config) {
    String failedMessage = null;
    for (Jobby job : jobs) {
      if (failedMessage == null) {
        if (!job.run()) {
          failedMessage = String.format("Job[%s] failed!", job.getClass());
        }
      }
    }

    if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) {
      if (failedMessage == null || config.getSchema().getTuningConfig().isCleanupOnFailure()) {
        Path workingPath = config.makeIntermediatePath();
        log.info("Deleting path[%s]", workingPath);
        try {
          workingPath
              .getFileSystem(injectSystemProperties(new Configuration()))
              .delete(workingPath, true);
        } catch (IOException e) {
          log.error(e, "Failed to cleanup path[%s]", workingPath);
        }
      }
    }

    if (failedMessage != null) {
      throw new ISE(failedMessage);
    }

    return true;
  }
Пример #9
0
  public IncrementalIndexAdapter(
      Interval dataInterval, IncrementalIndex<?> index, BitmapFactory bitmapFactory) {
    this.dataInterval = dataInterval;
    this.index = index;

    /* Sometimes it's hard to tell whether one dimension contains a null value or not.
     * If one dimension had show a null or empty value explicitly, then yes, it contains
     * null value. But if one dimension's values are all non-null, it still early to say
     * this dimension does not contain null value. Consider a two row case, first row had
     * "dimA=1" and "dimB=2", the second row only had "dimA=3". To dimB, its value are "2" and
     * never showed a null or empty value. But when we combines these two rows, dimB is null
     * in row 2. So we should iterate all rows to determine whether one dimension contains
     * a null value.
     */
    this.hasNullValueDimensions = Sets.newHashSet();

    final List<IncrementalIndex.DimensionDesc> dimensions = index.getDimensions();

    indexers = Maps.newHashMapWithExpectedSize(dimensions.size());
    for (IncrementalIndex.DimensionDesc dimension : dimensions) {
      indexers.put(dimension.getName(), new DimensionIndexer(dimension));
    }

    int rowNum = 0;
    for (IncrementalIndex.TimeAndDims timeAndDims : index.getFacts().keySet()) {
      final int[][] dims = timeAndDims.getDims();

      for (IncrementalIndex.DimensionDesc dimension : dimensions) {
        final int dimIndex = dimension.getIndex();
        DimensionIndexer indexer = indexers.get(dimension.getName());
        if (dimIndex >= dims.length || dims[dimIndex] == null) {
          hasNullValueDimensions.add(dimension.getName());
          continue;
        }
        final IncrementalIndex.DimDim values = dimension.getValues();
        if (hasNullValue(values, dims[dimIndex])) {
          hasNullValueDimensions.add(dimension.getName());
        }

        final MutableBitmap[] bitmapIndexes = indexer.invertedIndexes;

        for (Comparable dimIdxComparable : dims[dimIndex]) {
          Integer dimIdx = (Integer) dimIdxComparable;
          if (bitmapIndexes[dimIdx] == null) {
            bitmapIndexes[dimIdx] = bitmapFactory.makeEmptyMutableBitmap();
          }
          try {
            bitmapIndexes[dimIdx].add(rowNum);
          } catch (Exception e) {
            log.info(e.toString());
          }
        }
      }

      ++rowNum;
    }
  }
Пример #10
0
 @Override
 public void start() throws Exception {
   for (Method method : o.getClass().getMethods()) {
     if (method.getAnnotation(LifecycleStart.class) != null) {
       log.info("Invoking start method[%s] on object[%s].", method, o);
       method.invoke(o);
     }
   }
 }
Пример #11
0
 @Override
 public void pushTaskLog(final String taskid, File file) throws IOException {
   if (!config.getDirectory().exists()) {
     config.getDirectory().mkdir();
   }
   final File outputFile = fileForTask(taskid);
   Files.copy(file, outputFile);
   log.info("Wrote task log to: %s", outputFile);
 }
Пример #12
0
 @Override
 public void stop() {
   log.info("Stopping object[%s]", o);
   try {
     stopMethod.invoke(o);
   } catch (Exception e) {
     log.error(e, "Unable to invoke stopMethod() on %s", o.getClass());
   }
 }
Пример #13
0
  private void doNext() {
    synchronized (lock) {
      if (currentlyLoading == null) {
        if (!segmentsToDrop.isEmpty()) {
          currentlyLoading = segmentsToDrop.first();
          log.info("Server[%s] dropping [%s]", basePath, currentlyLoading);
        } else if (!segmentsToLoad.isEmpty()) {
          currentlyLoading = segmentsToLoad.first();
          log.info("Server[%s] loading [%s]", basePath, currentlyLoading);
        } else {
          return;
        }

        submitExecutable();
      } else {
        log.info(
            "Server[%s] skipping doNext() because something is currently loading[%s].",
            basePath, currentlyLoading);
      }
    }
  }
  @Override
  public void getSegmentFiles(final DataSegment segment, final File outDir)
      throws SegmentLoadingException {
    final Map<String, Object> loadSpec = segment.getLoadSpec();
    final String region = MapUtils.getString(loadSpec, "region");
    final String container = MapUtils.getString(loadSpec, "container");
    final String path = MapUtils.getString(loadSpec, "path");

    log.info("Pulling index at path[%s] to outDir[%s]", path, outDir);
    prepareOutDir(outDir);
    getSegmentFiles(region, container, path, outDir);
  }
 /**
  * Clears out resources used by the namespace such as threads. Implementations may override this
  * and call super.delete(...) if they have resources of their own which need cleared.
  *
  * <p>This particular method is NOT thread safe, and any impl which is intended to be thread safe
  * should safe-guard calls to this method.
  *
  * @param ns The namespace to be deleted
  * @return True if a deletion occurred, false if no deletion occurred.
  * @throws ISE if there is an error cancelling the namespace's future task
  */
 public boolean delete(final String ns) {
   final NamespaceImplData implDatum = implData.get(ns);
   final boolean deleted = removeNamespaceLocalMetadata(implDatum);
   // At this point we have won leader election on canceling this implDatum
   if (deleted) {
     log.info("Deleting namespace [%s]", ns);
     lastVersion.remove(implDatum.name);
     return true;
   } else {
     log.debug("Did not delete namespace [%s]", ns);
     return false;
   }
 }
Пример #16
0
 @Override
 public void stop() {
   for (Method method : o.getClass().getMethods()) {
     if (method.getAnnotation(LifecycleStop.class) != null) {
       log.info("Invoking stop method[%s] on object[%s].", method, o);
       try {
         method.invoke(o);
       } catch (Exception e) {
         log.error(e, "Exception when stopping method[%s] on object[%s]", method, o);
       }
     }
   }
 }
Пример #17
0
  @Inject
  public DerbyConnector(
      Supplier<MetadataStorageConnectorConfig> config,
      Supplier<MetadataStorageTablesConfig> dbTables) {
    super(config, dbTables);

    final BasicDataSource datasource = getDatasource();
    datasource.setDriverClassLoader(getClass().getClassLoader());
    datasource.setDriverClassName("org.apache.derby.jdbc.ClientDriver");

    this.dbi = new DBI(datasource);

    log.info("Configured Derby as metadata storage");
  }
Пример #18
0
  @Override
  protected boolean removeEldestEntry(Map.Entry<ByteBuffer, byte[]> eldest) {
    if (numBytes > sizeInBytes) {
      ++evictionCount;
      if (logEvictions && evictionCount % logEvictionCount == 0) {
        log.info(
            "Evicting %,dth element.  Size[%,d], numBytes[%,d], averageSize[%,d]",
            evictionCount, size(), numBytes, numBytes / size());
      }

      numBytes -= eldest.getKey().remaining() + eldest.getValue().length;
      return true;
    }
    return false;
  }
Пример #19
0
  public static void main(String[] args) throws Exception {
    LogLevelAdjuster.register();

    Lifecycle lifecycle = new Lifecycle();

    lifecycle.addManagedInstance(WorkerNode.builder().build());

    try {
      lifecycle.start();
    } catch (Throwable t) {
      log.info(t, "Throwable caught at startup, committing seppuku");
      System.exit(2);
    }

    lifecycle.join();
  }
Пример #20
0
  @Inject
  public SQLServerConnector(
      Supplier<MetadataStorageConnectorConfig> config,
      Supplier<MetadataStorageTablesConfig> dbTables) {
    super(config, dbTables);

    final BasicDataSource datasource = getDatasource();
    datasource.setDriverClassLoader(getClass().getClassLoader());
    datasource.setDriverClassName("com.microsoft.sqlserver.jdbc.SQLServerDriver");

    this.dbi = new DBI(datasource);

    this.dbi.setStatementRewriter(new CustomStatementRewriter());

    log.info("Configured Sql Server as metadata storage");
  }
  @Inject
  public OffHeapNamespaceExtractionCacheManager(
      Lifecycle lifecycle,
      @Named("namespaceExtractionFunctionCache")
          ConcurrentMap<String, Function<String, String>> fnCache,
      @Named("namespaceReverseExtractionFunctionCache")
          ConcurrentMap<String, Function<String, List<String>>> reverseFnCache,
      ServiceEmitter emitter,
      final Map<Class<? extends ExtractionNamespace>, ExtractionNamespaceFunctionFactory<?>>
          namespaceFunctionFactoryMap) {
    super(lifecycle, fnCache, reverseFnCache, emitter, namespaceFunctionFactoryMap);
    try {
      tmpFile = File.createTempFile("druidMapDB", getClass().getCanonicalName());
      log.info("Using file [%s] for mapDB off heap namespace cache", tmpFile.getAbsolutePath());
    } catch (IOException e) {
      throw Throwables.propagate(e);
    }
    mmapDB =
        DBMaker.newFileDB(tmpFile)
            .closeOnJvmShutdown()
            .transactionDisable()
            .deleteFilesAfterClose()
            .strictDBGet()
            .asyncWriteEnable()
            .mmapFileEnable()
            .commitFileSyncDisable()
            .cacheSize(10_000_000)
            .make();
    lifecycle.addHandler(
        new Lifecycle.Handler() {
          @Override
          public void start() throws Exception {
            // NOOP
          }

          @Override
          public void stop() {
            if (!mmapDB.isClosed()) {
              mmapDB.close();
              if (!tmpFile.delete()) {
                log.warn("Unable to delete file at [%s]", tmpFile.getAbsolutePath());
              }
            }
          }
        });
  }
Пример #22
0
 @JsonCreator
 public RandomFirehoseFactory(
     @JsonProperty("sleepUsec") Long sleepUsec,
     @JsonProperty("maxGeneratedRows") Long maxGeneratedRows,
     @JsonProperty("seed") Long seed,
     @JsonProperty("nTokens") Integer nTokens,
     @JsonProperty("nPerSleep") Integer nPerSleep) {
   long nsec = (sleepUsec > 0) ? sleepUsec * 1000L : 0;
   long msec = nsec / 1000000L;
   this.delayMsec = msec;
   this.delayNsec = (int) (nsec - (msec * 1000000L));
   this.maxGeneratedRows = maxGeneratedRows;
   this.seed = seed;
   this.nTokens = nTokens;
   this.nPerSleep = nPerSleep;
   if (nTokens <= 0) {
     log.warn("nTokens parameter " + nTokens + " ignored; must be greater than or equal to 1");
     nTokens = 1;
   }
   if (nPerSleep <= 0) {
     log.warn("nPerSleep parameter " + nPerSleep + " ignored; must be greater than or equal to 1");
     nPerSleep = 1;
   }
   log.info("maxGeneratedRows=" + maxGeneratedRows);
   log.info("seed=" + ((seed == 0L) ? "random value" : seed));
   log.info("nTokens=" + nTokens);
   log.info("nPerSleep=" + nPerSleep);
   double dmsec = (double) delayMsec + ((double) this.delayNsec) / 1000000.;
   if (dmsec > 0.0) {
     log.info("sleep period=" + dmsec + "msec");
     log.info(
         "approximate max rate of record generation="
             + (nPerSleep * 1000. / dmsec)
             + "/sec"
             + "  or  "
             + (60. * nPerSleep * 1000. / dmsec)
             + "/minute");
   } else {
     log.info("sleep period= NONE");
     log.info("approximate max rate of record generation= as fast as possible");
   }
 }
Пример #23
0
 @Override
 public void newEntry(String name, Map properties) {
   synchronized (lock) {
     if (currentlyLoading == null) {
       log.warn(
           "Server[%s] a new entry[%s] appeared, even though nothing is currently loading[%s]",
           basePath, name, currentlyLoading);
     } else {
       if (!name.equals(currentlyLoading.getSegmentIdentifier())) {
         log.warn(
             "Server[%s] a new entry[%s] appeared that is not the currently loading entry[%s]",
             basePath, name, currentlyLoading);
       } else {
         log.info("Server[%s]'s currently loading entry[%s] appeared.", basePath, name);
       }
     }
   }
 }
Пример #24
0
  public static File persist(
      final IncrementalIndex index,
      final Interval dataInterval,
      File outDir,
      ProgressIndicator progress)
      throws IOException {
    final long firstTimestamp = index.facts.firstKey().getTimestamp();
    final long lastTimestamp = index.facts.lastKey().getTimestamp();
    if (!(dataInterval.contains(firstTimestamp) && dataInterval.contains(lastTimestamp))) {
      throw new IAE(
          "interval[%s] does not encapsulate the full range of timestamps[%s, %s]",
          dataInterval, new DateTime(firstTimestamp), new DateTime(lastTimestamp));
    }

    if (!outDir.exists()) {
      outDir.mkdirs();
    }
    if (!outDir.isDirectory()) {
      throw new ISE("Can only persist to directories, [%s] wasn't a directory", outDir);
    }

    final List<String> dimensions =
        Lists.transform(
            Lists.newArrayList(index.dimensionOrder.keySet()),
            new Function<String, String>() {
              @Override
              public String apply(@Nullable String input) {
                return input.toLowerCase();
              }
            });
    final List<String> metrics = Lists.newArrayListWithCapacity(index.metrics.length);
    for (int i = 0; i < index.metrics.length; ++i) {
      metrics.add(index.metrics[i].getName().toLowerCase());
    }

    log.info("Starting persist for interval[%s], rows[%,d]", dataInterval, index.size());
    return merge(
        Arrays.<IndexableAdapter>asList(
            new IncrementalIndexAdapter(dataInterval, index, dimensions, metrics)),
        index.metrics,
        outDir,
        progress);
  }
Пример #25
0
    public static String runTask(String[] args) throws Exception {
      final String schema = args[0];
      String version = args[1];

      final HadoopIngestionSpec theSchema =
          HadoopDruidIndexerConfig.jsonMapper.readValue(schema, HadoopIngestionSpec.class);
      final HadoopDruidIndexerConfig config =
          HadoopDruidIndexerConfig.fromSchema(
              theSchema.withTuningConfig(theSchema.getTuningConfig().withVersion(version)));

      HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(config);

      log.info("Starting a hadoop index generator job...");
      if (job.run()) {
        return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(job.getPublishedSegments());
      }

      return null;
    }
Пример #26
0
  @Override
  public void entryRemoved(String name) {
    synchronized (lock) {
      if (currentlyLoading == null) {
        log.warn(
            "Server[%s] an entry[%s] was removed even though it wasn't loading!?", basePath, name);
        return;
      }
      if (!name.equals(currentlyLoading.getSegmentIdentifier())) {
        log.warn(
            "Server[%s] entry [%s] was removed even though it's not what is currently loading[%s]",
            basePath, name, currentlyLoading);
        return;
      }
      actionCompleted();
      log.info("Server[%s] done processing [%s]", basePath, name);
    }

    doNext();
  }
Пример #27
0
  public static void setupClasspath(Path distributedClassPath, Job job) throws IOException {
    String classpathProperty = System.getProperty("druid.hadoop.internal.classpath");
    if (classpathProperty == null) {
      classpathProperty = System.getProperty("java.class.path");
    }

    String[] jarFiles = classpathProperty.split(File.pathSeparator);

    final Configuration conf = job.getConfiguration();
    final FileSystem fs = distributedClassPath.getFileSystem(conf);

    if (fs instanceof LocalFileSystem) {
      return;
    }

    for (String jarFilePath : jarFiles) {
      File jarFile = new File(jarFilePath);
      if (jarFile.getName().endsWith(".jar")) {
        final Path hdfsPath = new Path(distributedClassPath, jarFile.getName());

        if (!existing.contains(hdfsPath)) {
          if (jarFile.getName().matches(".*SNAPSHOT(-selfcontained)?\\.jar$")
              || !fs.exists(hdfsPath)) {
            log.info("Uploading jar to path[%s]", hdfsPath);
            ByteStreams.copy(
                Files.newInputStreamSupplier(jarFile),
                new OutputSupplier<OutputStream>() {
                  @Override
                  public OutputStream getOutput() throws IOException {
                    return fs.create(hdfsPath);
                  }
                });
          }

          existing.add(hdfsPath);
        }

        DistributedCache.addFileToClassPath(hdfsPath, conf, fs);
      }
    }
  }
Пример #28
0
    public static String runTask(String[] args) throws Exception {
      final String schema = args[0];
      final String workingPath = args[1];
      final String segmentOutputPath = args[2];

      final HadoopIngestionSpec theSchema =
          HadoopDruidIndexerConfig.jsonMapper.readValue(schema, HadoopIngestionSpec.class);
      final HadoopDruidIndexerConfig config =
          HadoopDruidIndexerConfig.fromSchema(
              theSchema
                  .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath))
                  .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath)));

      Jobby job = new HadoopDruidDetermineConfigurationJob(config);

      log.info("Starting a hadoop determine configuration job...");
      if (job.run()) {
        return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(config.getSchema());
      }

      return null;
    }
  @JsonCreator
  public InlineSchemasAvroBytesDecoder(
      @JacksonInject @Json ObjectMapper mapper,
      @JsonProperty("schemas") Map<String, Map<String, Object>> schemas)
      throws Exception {
    Preconditions.checkArgument(
        schemas != null && schemas.size() > 0,
        "at least one schema must be provided in schemas attribute");

    this.schemas = schemas;

    schemaObjs = new HashMap<>(schemas.size());
    for (Map.Entry<String, Map<String, Object>> e : schemas.entrySet()) {

      int id = Integer.parseInt(e.getKey());

      Map<String, Object> schema = e.getValue();
      String schemaStr = mapper.writeValueAsString(schema);
      ;

      logger.info("Schema string [%s] = [%s]", id, schemaStr);
      schemaObjs.put(id, new Schema.Parser().parse(schemaStr));
    }
  }
Пример #30
0
  @Override
  public void writeIndexes(List<IntBuffer> segmentRowNumConversions, Closer closer)
      throws IOException {
    final SerializerUtils serializerUtils = new SerializerUtils();
    long dimStartTime = System.currentTimeMillis();

    String bmpFilename = String.format("%s.inverted", dimensionName);
    bitmapWriter =
        new GenericIndexedWriter<>(
            ioPeon, bmpFilename, indexSpec.getBitmapSerdeFactory().getObjectStrategy());
    bitmapWriter.open();

    final MappedByteBuffer dimValsMapped = Files.map(dictionaryFile);
    closer.register(
        new Closeable() {
          @Override
          public void close() throws IOException {
            ByteBufferUtils.unmap(dimValsMapped);
          }
        });

    if (!dimensionName.equals(serializerUtils.readString(dimValsMapped))) {
      throw new ISE("dimensions[%s] didn't equate!?  This is a major WTF moment.", dimensionName);
    }
    Indexed<String> dimVals = GenericIndexed.read(dimValsMapped, GenericIndexed.STRING_STRATEGY);
    log.info("Starting dimension[%s] with cardinality[%,d]", dimensionName, dimVals.size());

    final BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory();
    final BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory();

    RTree tree = null;
    spatialWriter = null;
    boolean hasSpatial = capabilities.hasSpatialIndexes();
    spatialIoPeon = new TmpFileIOPeon();
    if (hasSpatial) {
      BitmapFactory bmpFactory = bitmapSerdeFactory.getBitmapFactory();
      String spatialFilename = String.format("%s.spatial", dimensionName);
      spatialWriter =
          new ByteBufferWriter<ImmutableRTree>(
              spatialIoPeon,
              spatialFilename,
              new IndexedRTree.ImmutableRTreeObjectStrategy(bmpFactory));
      spatialWriter.open();
      tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory);
    }

    IndexSeeker[] dictIdSeeker = toIndexSeekers(adapters, dimConversions, dimensionName);

    // Iterate all dim values's dictionary id in ascending order which in line with dim values's
    // compare result.
    for (int dictId = 0; dictId < dimVals.size(); dictId++) {
      progress.progress();
      List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(adapters.size());
      for (int j = 0; j < adapters.size(); ++j) {
        int seekedDictId = dictIdSeeker[j].seek(dictId);
        if (seekedDictId != IndexSeeker.NOT_EXIST) {
          convertedInverteds.add(
              new ConvertingIndexedInts(
                  adapters.get(j).getBitmapIndex(dimensionName, seekedDictId),
                  segmentRowNumConversions.get(j)));
        }
      }

      MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
      for (Integer row :
          CombiningIterable.createSplatted(
              convertedInverteds, Ordering.<Integer>natural().nullsFirst())) {
        if (row != IndexMerger.INVALID_ROW) {
          bitset.add(row);
        }
      }
      if ((dictId == 0) && (Iterables.getFirst(dimVals, "") == null)) {
        bitset.or(nullRowsBitmap);
      }

      bitmapWriter.write(bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset));

      if (hasSpatial) {
        String dimVal = dimVals.get(dictId);
        if (dimVal != null) {
          List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
          float[] coords = new float[stringCoords.size()];
          for (int j = 0; j < coords.length; j++) {
            coords[j] = Float.valueOf(stringCoords.get(j));
          }
          tree.insert(coords, bitset);
        }
      }
    }

    log.info(
        "Completed dimension[%s] in %,d millis.",
        dimensionName, System.currentTimeMillis() - dimStartTime);

    if (hasSpatial) {
      spatialWriter.write(ImmutableRTree.newImmutableFromMutable(tree));
    }
  }