private static Map<String, Set<Path>> getPermMap(Configuration conf) { String permLoc = conf.get("hdfsproxy.user.permissions.file.location", "user-permissions.xml"); if (conf.getResource(permLoc) == null) { LOG.warn("HdfsProxy user permissions file not found"); return null; } Configuration permConf = new Configuration(false); permConf.addResource(permLoc); Map<String, Set<Path>> map = new HashMap<String, Set<Path>>(); for (Map.Entry<String, String> e : permConf) { String k = e.getKey(); String v = e.getValue(); if (k != null && k.length() != 0 && v != null && v.length() != 0) { Set<Path> pathSet = new HashSet<Path>(); String[] paths = v.split(",\\s*"); for (String p : paths) { if (p.length() != 0) { pathSet.add(new Path(p)); } } map.put(k, pathSet); } } return map; }
private static Map<String, Set<BigInteger>> getCertsMap(Configuration conf) { String certsLoc = conf.get("hdfsproxy.user.certs.file.location", "user-certs.xml"); if (conf.getResource(certsLoc) == null) { LOG.warn("HdfsProxy user certs file not found"); return null; } Configuration certsConf = new Configuration(false); certsConf.addResource(certsLoc); Map<String, Set<BigInteger>> map = new HashMap<String, Set<BigInteger>>(); for (Map.Entry<String, String> e : certsConf) { String k = e.getKey(); String v = e.getValue().trim(); if (k != null && k.length() != 0 && v != null && v.length() != 0) { Set<BigInteger> numSet = new HashSet<BigInteger>(); String[] serialnumbers = v.split("\\s*,\\s*"); for (String num : serialnumbers) { if (num.length() != 0) { numSet.add(new BigInteger(num, 16)); } } map.put(k, numSet); } } return map; }
public void setConf(Configuration conf) { this.conf = conf; this.tikaConfig = null; // do we want a custom Tika configuration file // deprecated since Tika 0.7 which is based on // a service provider based configuration String customConfFile = conf.get("tika.config.file"); if (customConfFile != null) { try { // see if a Tika config file can be found in the job file URL customTikaConfig = conf.getResource(customConfFile); if (customTikaConfig != null) tikaConfig = new TikaConfig(customTikaConfig); } catch (Exception e1) { String message = "Problem loading custom Tika configuration from " + customConfFile; LOG.error(message, e1); } } else { try { tikaConfig = new TikaConfig(this.getClass().getClassLoader()); } catch (Exception e2) { String message = "Problem loading default Tika configuration"; LOG.error(message, e2); } } this.htmlParseFilters = new HtmlParseFilters(getConf()); this.utils = new DOMContentUtils(conf); this.cachingPolicy = getConf().get("parser.caching.forbidden.policy", Nutch.CACHING_FORBIDDEN_CONTENT); }
/** * Setup ssl configuration on the job configuration to enable hsftp access from map job. Also copy * the ssl configuration file to Distributed cache * * @param configuration - Reference to job's configruation handle * @throws java.io.IOException - Exception if unable to locate ssl config file */ private void setupSSLConfig(Configuration configuration) throws IOException { Path sslConfigPath = new Path(configuration.getResource(inputOptions.getSslConfigurationFile()).toString()); addSSLFilesToDistCache(configuration, sslConfigPath); configuration.set(DistCpConstants.CONF_LABEL_SSL_CONF, sslConfigPath.getName()); configuration.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfigPath.getName()); }
private void copyConfig(LlapOptions options, FileSystem lfs, Path confPath, String f) throws IOException { if (f.equals("llap-daemon-site.xml")) { FSDataOutputStream confStream = lfs.create(new Path(confPath, f)); Configuration copy = resolve(conf, "llap-daemon-site.xml"); for (Entry<Object, Object> props : options.getConfig().entrySet()) { // overrides copy.set((String) props.getKey(), (String) props.getValue()); } copy.writeXml(confStream); confStream.close(); } else { // they will be file:// URLs lfs.copyFromLocalFile(new Path(conf.getResource(f).toString()), confPath); } }
private void run(String[] args) throws Exception { LlapOptionsProcessor optionsProcessor = new LlapOptionsProcessor(); LlapOptions options = optionsProcessor.processOptions(args); if (options == null) { // help return; } Path tmpDir = new Path(options.getDirectory()); if (conf == null) { throw new Exception("Cannot load any configuration to run command"); } FileSystem fs = FileSystem.get(conf); FileSystem lfs = FileSystem.getLocal(conf).getRawFileSystem(); // needed so that the file is actually loaded into configuration. for (String f : NEEDED_CONFIGS) { conf.addResource(f); if (conf.getResource(f) == null) { throw new Exception("Unable to find required config file: " + f); } } for (String f : OPTIONAL_CONFIGS) { conf.addResource(f); } conf.reloadConfiguration(); if (options.getName() != null) { // update service registry configs - caveat: this has nothing to do with the actual settings // as read by the AM // if needed, use --hiveconf llap.daemon.service.hosts=@llap0 to dynamically switch between // instances conf.set(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName()); } if (options.getSize() != -1) { if (options.getCache() != -1) { Preconditions.checkArgument( options.getCache() < options.getSize(), "Cache has to be smaller than the container sizing"); } if (options.getXmx() != -1) { Preconditions.checkArgument( options.getXmx() < options.getSize(), "Working memory has to be smaller than the container sizing"); } if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)) { Preconditions.checkArgument( options.getXmx() + options.getCache() < options.getSize(), "Working memory + cache has to be smaller than the containing sizing "); } } final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1); if (options.getSize() != -1) { final long containerSize = options.getSize() / (1024 * 1024); Preconditions.checkArgument( containerSize >= minAlloc, "Container size should be greater than minimum allocation(%s)", minAlloc + "m"); conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize); } if (options.getExecutors() != -1) { conf.setLong(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, options.getExecutors()); // TODO: vcpu settings - possibly when DRFA works right } if (options.getCache() != -1) { conf.setLong(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, options.getCache()); } if (options.getXmx() != -1) { // Needs more explanation here // Xmx is not the max heap value in JDK8 // You need to subtract 50% of the survivor fraction from this, to get actual usable memory // before it goes into GC conf.setLong( ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, (long) (options.getXmx()) / (1024 * 1024)); } for (Entry<Object, Object> props : options.getConfig().entrySet()) { conf.set((String) props.getKey(), (String) props.getValue()); } URL logger = conf.getResource("llap-daemon-log4j2.properties"); if (null == logger) { throw new Exception("Unable to find required config file: llap-daemon-log4j2.properties"); } Path home = new Path(System.getenv("HIVE_HOME")); Path scripts = new Path(new Path(new Path(home, "scripts"), "llap"), "bin"); if (!lfs.exists(home)) { throw new Exception("Unable to find HIVE_HOME:" + home); } else if (!lfs.exists(scripts)) { LOG.warn("Unable to find llap scripts:" + scripts); } Path libDir = new Path(tmpDir, "lib"); String tezLibs = conf.get("tez.lib.uris"); if (tezLibs == null) { LOG.warn("Missing tez.lib.uris in tez-site.xml"); } if (LOG.isDebugEnabled()) { LOG.debug("Copying tez libs from " + tezLibs); } lfs.mkdirs(libDir); fs.copyToLocalFile(new Path(tezLibs), new Path(libDir, "tez.tar.gz")); CompressionUtils.unTar(new Path(libDir, "tez.tar.gz").toString(), libDir.toString(), true); lfs.delete(new Path(libDir, "tez.tar.gz"), false); lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(LlapInputFormat.class)), libDir); lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(HiveInputFormat.class)), libDir); // copy default aux classes (json/hbase) for (String className : DEFAULT_AUX_CLASSES) { localizeJarForClass(lfs, libDir, className, false); } if (options.getIsHBase()) { try { localizeJarForClass(lfs, libDir, HBASE_SERDE_CLASS, true); Job fakeJob = new Job(new JobConf()); // HBase API is convoluted. TableMapReduceUtil.addDependencyJars(fakeJob); Collection<String> hbaseJars = fakeJob.getConfiguration().getStringCollection("tmpjars"); for (String jarPath : hbaseJars) { if (!jarPath.isEmpty()) { lfs.copyFromLocalFile(new Path(jarPath), libDir); } } } catch (Throwable t) { String err = "Failed to add HBase jars. Use --auxhbase=false to avoid localizing them"; LOG.error(err); System.err.println(err); throw new RuntimeException(t); } } String auxJars = options.getAuxJars(); if (auxJars != null && !auxJars.isEmpty()) { // TODO: transitive dependencies warning? String[] jarPaths = auxJars.split(","); for (String jarPath : jarPaths) { if (!jarPath.isEmpty()) { lfs.copyFromLocalFile(new Path(jarPath), libDir); } } } Path confPath = new Path(tmpDir, "conf"); lfs.mkdirs(confPath); for (String f : NEEDED_CONFIGS) { copyConfig(options, lfs, confPath, f); } for (String f : OPTIONAL_CONFIGS) { try { copyConfig(options, lfs, confPath, f); } catch (Throwable t) { LOG.info("Error getting an optional config " + f + "; ignoring: " + t.getMessage()); } } lfs.copyFromLocalFile(new Path(logger.toString()), confPath); // extract configs for processing by the python fragments in Slider JSONObject configs = new JSONObject(); configs.put( ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB)); configs.put( HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, HiveConf.getLongVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE)); configs.put( HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT.varname, HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)); configs.put( ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB)); configs.put( ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE)); configs.put( ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_NUM_EXECUTORS)); configs.put( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1)); configs.put( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, -1)); FSDataOutputStream os = lfs.create(new Path(tmpDir, "config.json")); OutputStreamWriter w = new OutputStreamWriter(os); configs.write(w); w.close(); os.close(); lfs.close(); fs.close(); if (LOG.isDebugEnabled()) { LOG.debug("Exiting successfully"); } }