Example #1
0
 private static Map<String, Set<Path>> getPermMap(Configuration conf) {
   String permLoc = conf.get("hdfsproxy.user.permissions.file.location", "user-permissions.xml");
   if (conf.getResource(permLoc) == null) {
     LOG.warn("HdfsProxy user permissions file not found");
     return null;
   }
   Configuration permConf = new Configuration(false);
   permConf.addResource(permLoc);
   Map<String, Set<Path>> map = new HashMap<String, Set<Path>>();
   for (Map.Entry<String, String> e : permConf) {
     String k = e.getKey();
     String v = e.getValue();
     if (k != null && k.length() != 0 && v != null && v.length() != 0) {
       Set<Path> pathSet = new HashSet<Path>();
       String[] paths = v.split(",\\s*");
       for (String p : paths) {
         if (p.length() != 0) {
           pathSet.add(new Path(p));
         }
       }
       map.put(k, pathSet);
     }
   }
   return map;
 }
Example #2
0
 private static Map<String, Set<BigInteger>> getCertsMap(Configuration conf) {
   String certsLoc = conf.get("hdfsproxy.user.certs.file.location", "user-certs.xml");
   if (conf.getResource(certsLoc) == null) {
     LOG.warn("HdfsProxy user certs file not found");
     return null;
   }
   Configuration certsConf = new Configuration(false);
   certsConf.addResource(certsLoc);
   Map<String, Set<BigInteger>> map = new HashMap<String, Set<BigInteger>>();
   for (Map.Entry<String, String> e : certsConf) {
     String k = e.getKey();
     String v = e.getValue().trim();
     if (k != null && k.length() != 0 && v != null && v.length() != 0) {
       Set<BigInteger> numSet = new HashSet<BigInteger>();
       String[] serialnumbers = v.split("\\s*,\\s*");
       for (String num : serialnumbers) {
         if (num.length() != 0) {
           numSet.add(new BigInteger(num, 16));
         }
       }
       map.put(k, numSet);
     }
   }
   return map;
 }
Example #3
0
  public void setConf(Configuration conf) {
    this.conf = conf;
    this.tikaConfig = null;

    // do we want a custom Tika configuration file
    // deprecated since Tika 0.7 which is based on
    // a service provider based configuration
    String customConfFile = conf.get("tika.config.file");
    if (customConfFile != null) {
      try {
        // see if a Tika config file can be found in the job file
        URL customTikaConfig = conf.getResource(customConfFile);
        if (customTikaConfig != null) tikaConfig = new TikaConfig(customTikaConfig);
      } catch (Exception e1) {
        String message = "Problem loading custom Tika configuration from " + customConfFile;
        LOG.error(message, e1);
      }
    } else {
      try {
        tikaConfig = new TikaConfig(this.getClass().getClassLoader());
      } catch (Exception e2) {
        String message = "Problem loading default Tika configuration";
        LOG.error(message, e2);
      }
    }

    this.htmlParseFilters = new HtmlParseFilters(getConf());
    this.utils = new DOMContentUtils(conf);
    this.cachingPolicy =
        getConf().get("parser.caching.forbidden.policy", Nutch.CACHING_FORBIDDEN_CONTENT);
  }
Example #4
0
  /**
   * Setup ssl configuration on the job configuration to enable hsftp access from map job. Also copy
   * the ssl configuration file to Distributed cache
   *
   * @param configuration - Reference to job's configruation handle
   * @throws java.io.IOException - Exception if unable to locate ssl config file
   */
  private void setupSSLConfig(Configuration configuration) throws IOException {

    Path sslConfigPath =
        new Path(configuration.getResource(inputOptions.getSslConfigurationFile()).toString());

    addSSLFilesToDistCache(configuration, sslConfigPath);
    configuration.set(DistCpConstants.CONF_LABEL_SSL_CONF, sslConfigPath.getName());
    configuration.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfigPath.getName());
  }
Example #5
0
  private void copyConfig(LlapOptions options, FileSystem lfs, Path confPath, String f)
      throws IOException {
    if (f.equals("llap-daemon-site.xml")) {
      FSDataOutputStream confStream = lfs.create(new Path(confPath, f));

      Configuration copy = resolve(conf, "llap-daemon-site.xml");

      for (Entry<Object, Object> props : options.getConfig().entrySet()) {
        // overrides
        copy.set((String) props.getKey(), (String) props.getValue());
      }

      copy.writeXml(confStream);
      confStream.close();
    } else {
      // they will be file:// URLs
      lfs.copyFromLocalFile(new Path(conf.getResource(f).toString()), confPath);
    }
  }
Example #6
0
  private void run(String[] args) throws Exception {
    LlapOptionsProcessor optionsProcessor = new LlapOptionsProcessor();
    LlapOptions options = optionsProcessor.processOptions(args);

    if (options == null) {
      // help
      return;
    }

    Path tmpDir = new Path(options.getDirectory());

    if (conf == null) {
      throw new Exception("Cannot load any configuration to run command");
    }

    FileSystem fs = FileSystem.get(conf);
    FileSystem lfs = FileSystem.getLocal(conf).getRawFileSystem();

    // needed so that the file is actually loaded into configuration.
    for (String f : NEEDED_CONFIGS) {
      conf.addResource(f);
      if (conf.getResource(f) == null) {
        throw new Exception("Unable to find required config file: " + f);
      }
    }
    for (String f : OPTIONAL_CONFIGS) {
      conf.addResource(f);
    }
    conf.reloadConfiguration();

    if (options.getName() != null) {
      // update service registry configs - caveat: this has nothing to do with the actual settings
      // as read by the AM
      // if needed, use --hiveconf llap.daemon.service.hosts=@llap0 to dynamically switch between
      // instances
      conf.set(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName());
    }

    if (options.getSize() != -1) {
      if (options.getCache() != -1) {
        Preconditions.checkArgument(
            options.getCache() < options.getSize(),
            "Cache has to be smaller than the container sizing");
      }
      if (options.getXmx() != -1) {
        Preconditions.checkArgument(
            options.getXmx() < options.getSize(),
            "Working memory has to be smaller than the container sizing");
      }
      if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)) {
        Preconditions.checkArgument(
            options.getXmx() + options.getCache() < options.getSize(),
            "Working memory + cache has to be smaller than the containing sizing ");
      }
    }

    final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1);
    if (options.getSize() != -1) {
      final long containerSize = options.getSize() / (1024 * 1024);
      Preconditions.checkArgument(
          containerSize >= minAlloc,
          "Container size should be greater than minimum allocation(%s)",
          minAlloc + "m");
      conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize);
    }

    if (options.getExecutors() != -1) {
      conf.setLong(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, options.getExecutors());
      // TODO: vcpu settings - possibly when DRFA works right
    }

    if (options.getCache() != -1) {
      conf.setLong(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, options.getCache());
    }

    if (options.getXmx() != -1) {
      // Needs more explanation here
      // Xmx is not the max heap value in JDK8
      // You need to subtract 50% of the survivor fraction from this, to get actual usable memory
      // before it goes into GC
      conf.setLong(
          ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname,
          (long) (options.getXmx()) / (1024 * 1024));
    }

    for (Entry<Object, Object> props : options.getConfig().entrySet()) {
      conf.set((String) props.getKey(), (String) props.getValue());
    }

    URL logger = conf.getResource("llap-daemon-log4j2.properties");

    if (null == logger) {
      throw new Exception("Unable to find required config file: llap-daemon-log4j2.properties");
    }

    Path home = new Path(System.getenv("HIVE_HOME"));
    Path scripts = new Path(new Path(new Path(home, "scripts"), "llap"), "bin");

    if (!lfs.exists(home)) {
      throw new Exception("Unable to find HIVE_HOME:" + home);
    } else if (!lfs.exists(scripts)) {
      LOG.warn("Unable to find llap scripts:" + scripts);
    }

    Path libDir = new Path(tmpDir, "lib");

    String tezLibs = conf.get("tez.lib.uris");
    if (tezLibs == null) {
      LOG.warn("Missing tez.lib.uris in tez-site.xml");
    }
    if (LOG.isDebugEnabled()) {
      LOG.debug("Copying tez libs from " + tezLibs);
    }
    lfs.mkdirs(libDir);
    fs.copyToLocalFile(new Path(tezLibs), new Path(libDir, "tez.tar.gz"));
    CompressionUtils.unTar(new Path(libDir, "tez.tar.gz").toString(), libDir.toString(), true);
    lfs.delete(new Path(libDir, "tez.tar.gz"), false);

    lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(LlapInputFormat.class)), libDir);
    lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(HiveInputFormat.class)), libDir);

    // copy default aux classes (json/hbase)

    for (String className : DEFAULT_AUX_CLASSES) {
      localizeJarForClass(lfs, libDir, className, false);
    }

    if (options.getIsHBase()) {
      try {
        localizeJarForClass(lfs, libDir, HBASE_SERDE_CLASS, true);
        Job fakeJob = new Job(new JobConf()); // HBase API is convoluted.
        TableMapReduceUtil.addDependencyJars(fakeJob);
        Collection<String> hbaseJars = fakeJob.getConfiguration().getStringCollection("tmpjars");
        for (String jarPath : hbaseJars) {
          if (!jarPath.isEmpty()) {
            lfs.copyFromLocalFile(new Path(jarPath), libDir);
          }
        }
      } catch (Throwable t) {
        String err = "Failed to add HBase jars. Use --auxhbase=false to avoid localizing them";
        LOG.error(err);
        System.err.println(err);
        throw new RuntimeException(t);
      }
    }

    String auxJars = options.getAuxJars();
    if (auxJars != null && !auxJars.isEmpty()) {
      // TODO: transitive dependencies warning?
      String[] jarPaths = auxJars.split(",");
      for (String jarPath : jarPaths) {
        if (!jarPath.isEmpty()) {
          lfs.copyFromLocalFile(new Path(jarPath), libDir);
        }
      }
    }

    Path confPath = new Path(tmpDir, "conf");
    lfs.mkdirs(confPath);

    for (String f : NEEDED_CONFIGS) {
      copyConfig(options, lfs, confPath, f);
    }
    for (String f : OPTIONAL_CONFIGS) {
      try {
        copyConfig(options, lfs, confPath, f);
      } catch (Throwable t) {
        LOG.info("Error getting an optional config " + f + "; ignoring: " + t.getMessage());
      }
    }

    lfs.copyFromLocalFile(new Path(logger.toString()), confPath);

    // extract configs for processing by the python fragments in Slider
    JSONObject configs = new JSONObject();

    configs.put(
        ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname,
        HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB));

    configs.put(
        HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname,
        HiveConf.getLongVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE));

    configs.put(
        HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT.varname,
        HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT));

    configs.put(
        ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname,
        HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB));

    configs.put(
        ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname,
        HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE));

    configs.put(
        ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname,
        HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_NUM_EXECUTORS));

    configs.put(
        YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
        conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1));

    configs.put(
        YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES,
        conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, -1));

    FSDataOutputStream os = lfs.create(new Path(tmpDir, "config.json"));
    OutputStreamWriter w = new OutputStreamWriter(os);
    configs.write(w);
    w.close();
    os.close();

    lfs.close();
    fs.close();

    if (LOG.isDebugEnabled()) {
      LOG.debug("Exiting successfully");
    }
  }