Пример #1
0
 @Override
 public byte[] get(NamedKey key) {
   try (ResourceHolder<MemcachedClientIF> clientHolder = client.get()) {
     Future<Object> future;
     try {
       future = clientHolder.get().asyncGet(computeKeyHash(memcachedPrefix, key));
     } catch (IllegalStateException e) {
       // operation did not get queued in time (queue is full)
       errorCount.incrementAndGet();
       log.warn(e, "Unable to queue cache operation");
       return null;
     }
     try {
       byte[] bytes = (byte[]) future.get(timeout, TimeUnit.MILLISECONDS);
       if (bytes != null) {
         hitCount.incrementAndGet();
       } else {
         missCount.incrementAndGet();
       }
       return bytes == null ? null : deserializeValue(key, bytes);
     } catch (TimeoutException e) {
       timeoutCount.incrementAndGet();
       future.cancel(false);
       return null;
     } catch (InterruptedException e) {
       Thread.currentThread().interrupt();
       throw Throwables.propagate(e);
     } catch (ExecutionException e) {
       errorCount.incrementAndGet();
       log.warn(e, "Exception pulling item from cache");
       return null;
     }
   }
 }
  @Override
  public void configure(Binder binder) {
    // Instantiate eagerly so that we get everything registered and put into the Lifecycle
    // This makes the shutdown run pretty darn near last.

    try {
      // Reflection to try and allow non Log4j2 stuff to run. This acts as a gateway to stop errors
      // in the next few lines
      final Class<?> logManagerClazz = Class.forName("org.apache.logging.log4j.LogManager");

      final LoggerContextFactory contextFactory = LogManager.getFactory();
      if (!(contextFactory instanceof Log4jContextFactory)) {
        log.warn(
            "Expected [%s] found [%s]. Unknown class for context factory. Not logging shutdown",
            Log4jContextFactory.class.getCanonicalName(),
            contextFactory.getClass().getCanonicalName());
        return;
      }
      final ShutdownCallbackRegistry registry =
          ((Log4jContextFactory) contextFactory).getShutdownCallbackRegistry();
      if (!(registry instanceof Log4jShutdown)) {
        log.warn(
            "Shutdown callback registry expected class [%s] found [%s]. Skipping shutdown registry",
            Log4jShutdown.class.getCanonicalName(), registry.getClass().getCanonicalName());
        return;
      }
      binder.bind(Log4jShutdown.class).toInstance((Log4jShutdown) registry);
      binder
          .bind(Key.get(Log4jShutterDowner.class, Names.named("ForTheEagerness")))
          .to(Log4jShutterDowner.class)
          .asEagerSingleton();
    } catch (ClassNotFoundException | ClassCastException | LinkageError e) {
      log.warn(e, "Not registering log4j shutdown hooks. Not using log4j?");
    }
  }
  public FileUtils.FileCopyResult getSegmentFiles(
      String region, String container, String path, File outDir) throws SegmentLoadingException {
    CloudFilesObjectApiProxy objectApi =
        new CloudFilesObjectApiProxy(cloudFilesApi, region, container);
    final CloudFilesByteSource byteSource = new CloudFilesByteSource(objectApi, path);

    try {
      final FileUtils.FileCopyResult result =
          CompressionUtils.unzip(byteSource, outDir, CloudFilesUtils.CLOUDFILESRETRY, true);
      log.info("Loaded %d bytes from [%s] to [%s]", result.size(), path, outDir.getAbsolutePath());
      return result;
    } catch (Exception e) {
      try {
        org.apache.commons.io.FileUtils.deleteDirectory(outDir);
      } catch (IOException ioe) {
        log.warn(
            ioe,
            "Failed to remove output directory [%s] for segment pulled from [%s]",
            outDir.getAbsolutePath(),
            path);
      }
      throw new SegmentLoadingException(e, e.getMessage());
    } finally {
      try {
        byteSource.closeStream();
      } catch (IOException ioe) {
        log.warn(ioe, "Failed to close payload for segmente pulled from [%s]", path);
      }
    }
  }
Пример #4
0
  @Override
  public Map<NamedKey, byte[]> getBulk(Iterable<NamedKey> keys) {
    try (ResourceHolder<MemcachedClientIF> clientHolder = client.get()) {
      Map<String, NamedKey> keyLookup =
          Maps.uniqueIndex(
              keys,
              new Function<NamedKey, String>() {
                @Override
                public String apply(@Nullable NamedKey input) {
                  return computeKeyHash(memcachedPrefix, input);
                }
              });

      Map<NamedKey, byte[]> results = Maps.newHashMap();

      BulkFuture<Map<String, Object>> future;
      try {
        future = clientHolder.get().asyncGetBulk(keyLookup.keySet());
      } catch (IllegalStateException e) {
        // operation did not get queued in time (queue is full)
        errorCount.incrementAndGet();
        log.warn(e, "Unable to queue cache operation");
        return results;
      }

      try {
        Map<String, Object> some = future.getSome(timeout, TimeUnit.MILLISECONDS);

        if (future.isTimeout()) {
          future.cancel(false);
          timeoutCount.incrementAndGet();
        }
        missCount.addAndGet(keyLookup.size() - some.size());
        hitCount.addAndGet(some.size());

        for (Map.Entry<String, Object> entry : some.entrySet()) {
          final NamedKey key = keyLookup.get(entry.getKey());
          final byte[] value = (byte[]) entry.getValue();
          if (value != null) {
            results.put(key, deserializeValue(key, value));
          }
        }

        return results;
      } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw Throwables.propagate(e);
      } catch (ExecutionException e) {
        errorCount.incrementAndGet();
        log.warn(e, "Exception pulling item from cache");
        return results;
      }
    }
  }
  public boolean scheduleAndWait(
      final String id, ExtractionNamespace namespace, long waitForFirstRun) {
    if (scheduleOrUpdate(id, namespace)) {
      log.debug("Scheduled new namespace [%s]: %s", id, namespace);
    } else {
      log.debug("Namespace [%s] already running: %s", id, namespace);
    }

    final NamespaceImplData namespaceImplData = implData.get(id);
    if (namespaceImplData == null) {
      log.warn("NamespaceLookupExtractorFactory[%s] - deleted during start", id);
      return false;
    }

    boolean success = false;
    try {
      success = namespaceImplData.firstRun.await(waitForFirstRun, TimeUnit.MILLISECONDS);
    } catch (InterruptedException e) {
      log.error(e, "NamespaceLookupExtractorFactory[%s] - interrupted during start", id);
    }
    if (!success) {
      delete(id);
    }
    return success;
  }
Пример #6
0
  private Map<String, Object> buildStringKeyMap(ByteBuffer input) {
    Map<String, Object> theMap = Maps.newHashMap();

    try {
      DynamicMessage message = DynamicMessage.parseFrom(descriptor, ByteString.copyFrom(input));
      Map<Descriptors.FieldDescriptor, Object> allFields = message.getAllFields();

      for (Map.Entry<Descriptors.FieldDescriptor, Object> entry : allFields.entrySet()) {
        String name = entry.getKey().getName();
        if (theMap.containsKey(name)) {
          continue;
          // Perhaps throw an exception here?
          // throw new RuntimeException("dupicate key " + name + " in " + message);
        }
        Object value = entry.getValue();
        if (value instanceof Descriptors.EnumValueDescriptor) {
          Descriptors.EnumValueDescriptor desc = (Descriptors.EnumValueDescriptor) value;
          value = desc.getName();
        }

        theMap.put(name, value);
      }

    } catch (InvalidProtocolBufferException e) {
      log.warn(e, "Problem with protobuf something");
    }
    return theMap;
  }
 @LifecycleStop
 public void stop() {
   if (log4jShutdown != null) {
     log.debug("Shutting down log4j");
     log4jShutdown.stop();
   } else {
     log.warn("Log4j shutdown was registered in lifecycle but no shutdown object exists!");
   }
 }
Пример #8
0
 @Override
 public void newEntry(String name, Map properties) {
   synchronized (lock) {
     if (currentlyLoading == null) {
       log.warn(
           "Server[%s] a new entry[%s] appeared, even though nothing is currently loading[%s]",
           basePath, name, currentlyLoading);
     } else {
       if (!name.equals(currentlyLoading.getSegmentIdentifier())) {
         log.warn(
             "Server[%s] a new entry[%s] appeared that is not the currently loading entry[%s]",
             basePath, name, currentlyLoading);
       } else {
         log.info("Server[%s]'s currently loading entry[%s] appeared.", basePath, name);
       }
     }
   }
 }
Пример #9
0
 private static void awaitNextRetry(final Throwable e, final int nTry)
     throws InterruptedException {
   final long baseSleepMillis = 1000;
   final long maxSleepMillis = 60000;
   final double fuzzyMultiplier = Math.min(Math.max(1 + 0.2 * new Random().nextGaussian(), 0), 2);
   final long sleepMillis =
       (long) (Math.min(maxSleepMillis, baseSleepMillis * Math.pow(2, nTry)) * fuzzyMultiplier);
   log.warn(e, "Failed on try %d, retrying in %,dms.", nTry, sleepMillis);
   Thread.sleep(sleepMillis);
 }
Пример #10
0
 @JsonCreator
 public RandomFirehoseFactory(
     @JsonProperty("sleepUsec") Long sleepUsec,
     @JsonProperty("maxGeneratedRows") Long maxGeneratedRows,
     @JsonProperty("seed") Long seed,
     @JsonProperty("nTokens") Integer nTokens,
     @JsonProperty("nPerSleep") Integer nPerSleep) {
   long nsec = (sleepUsec > 0) ? sleepUsec * 1000L : 0;
   long msec = nsec / 1000000L;
   this.delayMsec = msec;
   this.delayNsec = (int) (nsec - (msec * 1000000L));
   this.maxGeneratedRows = maxGeneratedRows;
   this.seed = seed;
   this.nTokens = nTokens;
   this.nPerSleep = nPerSleep;
   if (nTokens <= 0) {
     log.warn("nTokens parameter " + nTokens + " ignored; must be greater than or equal to 1");
     nTokens = 1;
   }
   if (nPerSleep <= 0) {
     log.warn("nPerSleep parameter " + nPerSleep + " ignored; must be greater than or equal to 1");
     nPerSleep = 1;
   }
   log.info("maxGeneratedRows=" + maxGeneratedRows);
   log.info("seed=" + ((seed == 0L) ? "random value" : seed));
   log.info("nTokens=" + nTokens);
   log.info("nPerSleep=" + nPerSleep);
   double dmsec = (double) delayMsec + ((double) this.delayNsec) / 1000000.;
   if (dmsec > 0.0) {
     log.info("sleep period=" + dmsec + "msec");
     log.info(
         "approximate max rate of record generation="
             + (nPerSleep * 1000. / dmsec)
             + "/sec"
             + "  or  "
             + (60. * nPerSleep * 1000. / dmsec)
             + "/minute");
   } else {
     log.info("sleep period= NONE");
     log.info("approximate max rate of record generation= as fast as possible");
   }
 }
Пример #11
0
  @Override
  public void entryRemoved(String name) {
    synchronized (lock) {
      if (currentlyLoading == null) {
        log.warn(
            "Server[%s] an entry[%s] was removed even though it wasn't loading!?", basePath, name);
        return;
      }
      if (!name.equals(currentlyLoading.getSegmentIdentifier())) {
        log.warn(
            "Server[%s] entry [%s] was removed even though it's not what is currently loading[%s]",
            basePath, name, currentlyLoading);
        return;
      }
      actionCompleted();
      log.info("Server[%s] done processing [%s]", basePath, name);
    }

    doNext();
  }
Пример #12
0
 @Override
 public void put(NamedKey key, byte[] value) {
   try (final ResourceHolder<MemcachedClientIF> clientHolder = client.get()) {
     clientHolder
         .get()
         .set(computeKeyHash(memcachedPrefix, key), expiration, serializeValue(key, value));
   } catch (IllegalStateException e) {
     // operation did not get queued in time (queue is full)
     errorCount.incrementAndGet();
     log.warn(e, "Unable to queue cache operation");
   }
 }
Пример #13
0
 public static boolean isInstanceReady(ServerDiscoverySelector serviceProvider) {
   try {
     Server instance = serviceProvider.pick();
     if (instance == null) {
       LOG.warn("Unable to find a host");
       return false;
     }
   } catch (Exception e) {
     LOG.error(e, "Caught exception waiting for host");
     return false;
   }
   return true;
 }
 private static void recursivelyDelete(final File dir) {
   if (dir != null) {
     if (dir.isDirectory()) {
       final File[] files = dir.listFiles();
       if (files != null) {
         for (File file : files) {
           recursivelyDelete(file);
         }
       }
     } else {
       if (!dir.delete()) {
         log.warn("Could not delete file at [%s]", dir.getAbsolutePath());
       }
     }
   }
 }
Пример #15
0
 public static long zipAndCopyDir(
     File baseDir, OutputStream baseOutputStream, Progressable progressable) throws IOException {
   long size = 0L;
   try (ZipOutputStream outputStream = new ZipOutputStream(baseOutputStream)) {
     List<String> filesToCopy = Arrays.asList(baseDir.list());
     for (String fileName : filesToCopy) {
       final File fileToCopy = new File(baseDir, fileName);
       if (java.nio.file.Files.isRegularFile(fileToCopy.toPath())) {
         size += copyFileToZipStream(fileToCopy, outputStream, progressable);
       } else {
         log.warn(
             "File at [%s] is not a regular file! skipping as part of zip", fileToCopy.getPath());
       }
     }
     outputStream.flush();
   }
   return size;
 }
Пример #16
0
  public static void createTable(final DBI dbi, final String tableName, final String sql) {
    try {
      dbi.withHandle(
          new HandleCallback<Void>() {
            @Override
            public Void withHandle(Handle handle) throws Exception {
              List<Map<String, Object>> table =
                  handle.select(String.format("SHOW tables LIKE '%s'", tableName));

              if (table.isEmpty()) {
                log.info("Creating table[%s]", tableName);
                handle.createStatement(sql).execute();
              } else {
                log.info("Table[%s] existed: [%s]", tableName, table);
              }

              return null;
            }
          });
    } catch (Exception e) {
      log.warn(e, "Exception creating table");
    }
  }
Пример #17
0
 public void createTable(final String tableName, final Iterable<String> sql) {
   try {
     retryWithHandle(
         new HandleCallback<Void>() {
           @Override
           public Void withHandle(Handle handle) throws Exception {
             if (!tableExists(handle, tableName)) {
               log.info("Creating table[%s]", tableName);
               final Batch batch = handle.createBatch();
               for (String s : sql) {
                 batch.add(s);
               }
               batch.execute();
             } else {
               log.info("Table[%s] already exists", tableName);
             }
             return null;
           }
         });
   } catch (Exception e) {
     log.warn(e, "Exception creating table");
   }
 }
Пример #18
0
  public void stop() {
    synchronized (handlers) {
      List<Exception> exceptions = Lists.newArrayList();

      for (Stage stage : Lists.reverse(stagesOrdered())) {
        final CopyOnWriteArrayList<Handler> stageHandlers = handlers.get(stage);
        final ListIterator<Handler> iter = stageHandlers.listIterator(stageHandlers.size());
        while (iter.hasPrevious()) {
          final Handler handler = iter.previous();
          try {
            handler.stop();
          } catch (Exception e) {
            log.warn(e, "exception thrown when stopping %s", handler);
            exceptions.add(e);
          }
        }
      }
      started.set(false);

      if (!exceptions.isEmpty()) {
        throw Throwables.propagate(exceptions.get(0));
      }
    }
  }
  // For testing purposes this is protected
  protected <T extends ExtractionNamespace> ListenableFuture<?> schedule(
      final String id,
      final T namespace,
      final ExtractionNamespaceCacheFactory<T> factory,
      final Runnable postRunnable,
      final String cacheId) {
    log.debug("Trying to update namespace [%s]", id);
    final NamespaceImplData implDatum = implData.get(id);
    if (implDatum != null) {
      synchronized (implDatum.enabled) {
        if (implDatum.enabled.get()) {
          // We also check at the end of the function, but fail fast here
          throw new IAE(
              "Namespace [%s] already exists! Leaving prior running", namespace.toString());
        }
      }
    }
    final long updateMs = namespace.getPollMs();
    final CountDownLatch startLatch = new CountDownLatch(1);

    final Runnable command =
        new Runnable() {
          @Override
          public void run() {
            try {
              startLatch.await(); // wait for "election" to leadership or cancellation
              if (!Thread.currentThread().isInterrupted()) {
                final Map<String, String> cache = getCacheMap(cacheId);
                final String preVersion = lastVersion.get(id);
                final Callable<String> runnable =
                    factory.getCachePopulator(id, namespace, preVersion, cache);

                tasksStarted.incrementAndGet();
                final String newVersion = runnable.call();
                if (preVersion != null && preVersion.equals(newVersion)) {
                  throw new CancellationException(
                      String.format("Version `%s` already exists", preVersion));
                }
                if (newVersion != null) {
                  lastVersion.put(id, newVersion);
                }
                postRunnable.run();
                log.debug("Namespace [%s] successfully updated", id);
              }
            } catch (Throwable t) {
              delete(cacheId);
              if (t instanceof CancellationException) {
                log.debug(t, "Namespace [%s] cancelled", id);
              } else {
                log.error(t, "Failed update namespace [%s]", namespace);
              }
              if (Thread.currentThread().isInterrupted()) {
                throw Throwables.propagate(t);
              }
            }
          }
        };

    ListenableFuture<?> future;
    try {
      if (updateMs > 0) {
        future =
            listeningScheduledExecutorService.scheduleAtFixedRate(
                command, 0, updateMs, TimeUnit.MILLISECONDS);
      } else {
        future = listeningScheduledExecutorService.schedule(command, 0, TimeUnit.MILLISECONDS);
      }

      final NamespaceImplData me = new NamespaceImplData(future, namespace, id);
      final NamespaceImplData other = implData.putIfAbsent(id, me);
      if (other != null) {
        if (!future.isDone() && !future.cancel(true)) {
          log.warn("Unable to cancel future for namespace[%s] on race loss", id);
        }
        throw new IAE("Namespace [%s] already exists! Leaving prior running", namespace);
      } else {
        if (!me.enabled.compareAndSet(false, true)) {
          log.wtf("How did someone enable this before ME?");
        }
        log.debug("I own namespace [%s]", id);
        return future;
      }
    } finally {
      startLatch.countDown();
    }
  }
  public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
      jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
      throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
      jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(
        JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
        JobHelper.distributedClassPath(
            getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
        job);

    Throwable throwable = null;
    try {
      job.submit();
      log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
      final boolean success = job.waitForCompletion(true);
      if (!success) {
        final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
        if (reports != null) {
          for (final TaskReport report : reports) {
            log.error(
                "Error in task [%s] : %s",
                report.getTaskId(), Arrays.toString(report.getDiagnostics()));
          }
        }
        return null;
      }
      try {
        loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
        writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
      } catch (IOException ex) {
        log.error(ex, "Could not fetch counters");
      }
      final JobID jobID = job.getJobID();

      final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
      final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
      final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
      final List<Path> goodPaths = new ArrayList<>();
      while (it.hasNext()) {
        final LocatedFileStatus locatedFileStatus = it.next();
        if (locatedFileStatus.isFile()) {
          final Path myPath = locatedFileStatus.getPath();
          if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
            goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
          }
        }
      }
      if (goodPaths.isEmpty()) {
        log.warn("No good data found at [%s]", jobDir);
        return null;
      }
      final List<DataSegment> returnList =
          ImmutableList.copyOf(
              Lists.transform(
                  goodPaths,
                  new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                      try {
                        if (!fs.exists(input)) {
                          throw new ISE(
                              "Somehow [%s] was found but [%s] is missing at [%s]",
                              ConvertingOutputFormat.DATA_SUCCESS_KEY,
                              ConvertingOutputFormat.DATA_FILE_KEY,
                              jobDir);
                        }
                      } catch (final IOException e) {
                        throw Throwables.propagate(e);
                      }
                      try (final InputStream stream = fs.open(input)) {
                        return HadoopDruidConverterConfig.jsonMapper.readValue(
                            stream, DataSegment.class);
                      } catch (final IOException e) {
                        throw Throwables.propagate(e);
                      }
                    }
                  }));
      if (returnList.size() == segments.size()) {
        return returnList;
      } else {
        throw new ISE(
            "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
            segments.size(), returnList.size(), jobDir);
      }
    } catch (InterruptedException | ClassNotFoundException e) {
      RuntimeException exception = Throwables.propagate(e);
      throwable = exception;
      throw exception;
    } catch (Throwable t) {
      throwable = t;
      throw t;
    } finally {
      try {
        cleanup(job);
      } catch (IOException e) {
        if (throwable != null) {
          throwable.addSuppressed(e);
        } else {
          log.error(e, "Could not clean up job [%s]", job.getJobID());
        }
      }
    }
  }
    @Override
    protected void map(String key, String value, final Context context)
        throws IOException, InterruptedException {
      final InputSplit split = context.getInputSplit();
      if (!(split instanceof DatasourceInputSplit)) {
        throw new IAE(
            "Unexpected split type. Expected [%s] was [%s]",
            DatasourceInputSplit.class.getCanonicalName(), split.getClass().getCanonicalName());
      }

      final String tmpDirLoc = context.getConfiguration().get(TMP_FILE_LOC_KEY);
      final File tmpDir = Paths.get(tmpDirLoc).toFile();

      final DataSegment segment =
          Iterables.getOnlyElement(((DatasourceInputSplit) split).getSegments()).getSegment();

      final HadoopDruidConverterConfig config =
          converterConfigFromConfiguration(context.getConfiguration());

      context.setStatus("DOWNLOADING");
      context.progress();
      final Path inPath = new Path(JobHelper.getURIFromSegment(segment));
      final File inDir = new File(tmpDir, "in");

      if (inDir.exists() && !inDir.delete()) {
        log.warn("Could not delete [%s]", inDir);
      }

      if (!inDir.mkdir() && (!inDir.exists() || inDir.isDirectory())) {
        log.warn("Unable to make directory");
      }

      final long inSize =
          JobHelper.unzipNoGuava(inPath, context.getConfiguration(), inDir, context);
      log.debug("Loaded %d bytes into [%s] for converting", inSize, inDir.getAbsolutePath());
      context.getCounter(COUNTER_GROUP, COUNTER_LOADED).increment(inSize);

      context.setStatus("CONVERTING");
      context.progress();
      final File outDir = new File(tmpDir, "out");
      if (!outDir.mkdir() && (!outDir.exists() || !outDir.isDirectory())) {
        throw new IOException(String.format("Could not create output directory [%s]", outDir));
      }
      HadoopDruidConverterConfig.INDEX_MERGER.convert(
          inDir, outDir, config.getIndexSpec(), JobHelper.progressIndicatorForContext(context));
      if (config.isValidate()) {
        context.setStatus("Validating");
        HadoopDruidConverterConfig.INDEX_IO.validateTwoSegments(inDir, outDir);
      }
      context.progress();
      context.setStatus("Starting PUSH");
      final Path baseOutputPath = new Path(config.getSegmentOutputPath());
      final FileSystem outputFS = baseOutputPath.getFileSystem(context.getConfiguration());
      final DataSegment finalSegmentTemplate =
          segment.withVersion(segment.getVersion() + "_converted");
      final DataSegment finalSegment =
          JobHelper.serializeOutIndex(
              finalSegmentTemplate,
              context.getConfiguration(),
              context,
              context.getTaskAttemptID(),
              outDir,
              JobHelper.makeSegmentOutputPath(baseOutputPath, outputFS, finalSegmentTemplate));
      context.progress();
      context.setStatus("Finished PUSH");
      final String finalSegmentString =
          HadoopDruidConverterConfig.jsonMapper.writeValueAsString(finalSegment);
      context
          .getConfiguration()
          .set(ConvertingOutputFormat.PUBLISHED_SEGMENT_KEY, finalSegmentString);
      context.write(new Text("dataSegment"), new Text(finalSegmentString));

      context.getCounter(COUNTER_GROUP, COUNTER_WRITTEN).increment(finalSegment.getSize());
      context.progress();
      context.setStatus("Ready To Commit");
    }
  @Override
  public Sequence<T> run(final Query<T> queryParam) {
    final GroupByQuery query = (GroupByQuery) queryParam;
    final Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> indexAccumulatorPair =
        GroupByQueryHelper.createIndexAccumulatorPair(query, configSupplier.get());
    final Pair<List, Accumulator<List, T>> bySegmentAccumulatorPair =
        GroupByQueryHelper.createBySegmentAccumulatorPair();
    final boolean bySegment = query.getContextBySegment(false);
    final int priority = query.getContextPriority(0);

    if (Iterables.isEmpty(queryables)) {
      log.warn("No queryables found.");
    }
    ListenableFuture<List<Void>> futures =
        Futures.allAsList(
            Lists.newArrayList(
                Iterables.transform(
                    queryables,
                    new Function<QueryRunner<T>, ListenableFuture<Void>>() {
                      @Override
                      public ListenableFuture<Void> apply(final QueryRunner<T> input) {
                        return exec.submit(
                            new AbstractPrioritizedCallable<Void>(priority) {
                              @Override
                              public Void call() throws Exception {
                                try {
                                  if (bySegment) {
                                    input
                                        .run(queryParam)
                                        .accumulate(
                                            bySegmentAccumulatorPair.lhs,
                                            bySegmentAccumulatorPair.rhs);
                                  } else {
                                    input
                                        .run(queryParam)
                                        .accumulate(
                                            indexAccumulatorPair.lhs, indexAccumulatorPair.rhs);
                                  }

                                  return null;
                                } catch (QueryInterruptedException e) {
                                  throw Throwables.propagate(e);
                                } catch (Exception e) {
                                  log.error(e, "Exception with one of the sequences!");
                                  throw Throwables.propagate(e);
                                }
                              }
                            });
                      }
                    })));

    // Let the runners complete
    try {
      queryWatcher.registerQuery(query, futures);
      final Number timeout = query.getContextValue("timeout", (Number) null);
      if (timeout == null) {
        futures.get();
      } else {
        futures.get(timeout.longValue(), TimeUnit.MILLISECONDS);
      }
    } catch (InterruptedException e) {
      log.warn(e, "Query interrupted, cancelling pending results, query id [%s]", query.getId());
      futures.cancel(true);
      throw new QueryInterruptedException("Query interrupted");
    } catch (CancellationException e) {
      throw new QueryInterruptedException("Query cancelled");
    } catch (TimeoutException e) {
      log.info("Query timeout, cancelling pending results for query id [%s]", query.getId());
      futures.cancel(true);
      throw new QueryInterruptedException("Query timeout");
    } catch (ExecutionException e) {
      throw Throwables.propagate(e.getCause());
    }

    if (bySegment) {
      return Sequences.simple(bySegmentAccumulatorPair.lhs);
    }

    return Sequences.simple(
        Iterables.transform(
            indexAccumulatorPair.lhs.iterableWithPostAggregations(null),
            new Function<Row, T>() {
              @Override
              public T apply(Row input) {
                return (T) input;
              }
            }));
  }
Пример #23
0
  @Override
  public void getSegmentFiles(final DataSegment segment, final File outDir)
      throws SegmentLoadingException {
    final S3Coords s3Coords = new S3Coords(segment);

    log.info("Pulling index at path[%s] to outDir[%s]", s3Coords, outDir);

    if (!isObjectInBucket(s3Coords)) {
      throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords);
    }

    if (!outDir.exists()) {
      outDir.mkdirs();
    }

    if (!outDir.isDirectory()) {
      throw new ISE("outDir[%s] must be a directory.", outDir);
    }

    try {
      S3Utils.retryS3Operation(
          new Callable<Void>() {
            @Override
            public Void call() throws Exception {
              long startTime = System.currentTimeMillis();
              S3Object s3Obj = null;

              try {
                s3Obj = s3Client.getObject(s3Coords.bucket, s3Coords.path);

                try (InputStream in = s3Obj.getDataInputStream()) {
                  final String key = s3Obj.getKey();
                  if (key.endsWith(".zip")) {
                    CompressionUtils.unzip(in, outDir);
                  } else if (key.endsWith(".gz")) {
                    final File outFile = new File(outDir, toFilename(key, ".gz"));
                    ByteStreams.copy(
                        new GZIPInputStream(in), Files.newOutputStreamSupplier(outFile));
                  } else {
                    ByteStreams.copy(
                        in, Files.newOutputStreamSupplier(new File(outDir, toFilename(key, ""))));
                  }
                  log.info(
                      "Pull of file[%s] completed in %,d millis",
                      s3Obj, System.currentTimeMillis() - startTime);
                  return null;
                } catch (IOException e) {
                  throw new IOException(
                      String.format("Problem decompressing object[%s]", s3Obj), e);
                }
              } finally {
                S3Utils.closeStreamsQuietly(s3Obj);
              }
            }
          });
    } catch (Exception e) {
      try {
        FileUtils.deleteDirectory(outDir);
      } catch (IOException ioe) {
        log.warn(
            ioe,
            "Failed to remove output directory for segment[%s] after exception: %s",
            segment.getIdentifier(),
            outDir);
      }
      throw new SegmentLoadingException(e, e.getMessage());
    }
  }