@VisibleForTesting void initConfig() throws IOException { this.cgroupPrefix = conf.get(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_HIERARCHY, "/hadoop-yarn"); this.cgroupMount = conf.getBoolean(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_MOUNT, false); this.cgroupMountPath = conf.get(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH, null); this.deleteCgroupTimeout = conf.getLong( YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT, YarnConfiguration.DEFAULT_NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT); // remove extra /'s at end or start of cgroupPrefix if (cgroupPrefix.charAt(0) == '/') { cgroupPrefix = cgroupPrefix.substring(1); } this.strictResourceUsageMode = conf.getBoolean( YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_STRICT_RESOURCE_USAGE, YarnConfiguration.DEFAULT_NM_LINUX_CONTAINER_CGROUPS_STRICT_RESOURCE_USAGE); int len = cgroupPrefix.length(); if (cgroupPrefix.charAt(len - 1) == '/') { cgroupPrefix = cgroupPrefix.substring(0, len - 1); } }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context) */ protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); if (conf.getBoolean("debug.on", false)) { LOG.setLevel(Level.DEBUG); System.out.println("in debug mode"); } fieldDelim = conf.get("field.delim", ","); subFieldDelim = conf.get("sub.field.delim", ":"); String ratingFilePrefix = conf.get("utp.rating.file.prefix", "rating"); isRatingFileSplit = ((FileSplit) context.getInputSplit()).getPath().getName().startsWith(ratingFilePrefix); String ratingStatFilePrefix = conf.get("utp.rating.stat.file.prefix", "stat"); isRatingStatFileSplit = ((FileSplit) context.getInputSplit()) .getPath() .getName() .startsWith(ratingStatFilePrefix); linearCorrelation = conf.getBoolean("utp.correlation.linear", true); int ratingTimeWindow = conf.getInt("utp.rating.time.window.hour", -1); ratingTimeCutoff = ratingTimeWindow > 0 ? System.currentTimeMillis() / 1000 - ratingTimeWindow * 60L * 60L : -1; minInputRating = conf.getInt("utp.min.input.rating", -1); minCorrelation = conf.getInt("utp.min.correlation", -1); userRatingWithContext = conf.getBoolean("utp.user.rating.with.context", false); LOG.info("isRatingFileSplit:" + isRatingFileSplit); }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#setup(org.apache.hadoop.mapreduce.Reducer.Context) */ protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); fieldDelim = conf.get("field.delim", ","); linearCorrelation = conf.getBoolean("utp.correlation.linear", true); correlationScale = conf.getInt("utp.correlation.linear.scale", 1000); maxRating = conf.getInt("utp.max.rating", 100); correlationModifier = conf.getFloat("utp.correlation.modifier", (float) 1.0); userRatingWithContext = conf.getBoolean("utp.user.rating.with.context", false); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { List<InputSplit> splits = new ArrayList<InputSplit>(); if (conf.getBoolean("table", false)) splits.addAll(tableInputFormat.getSplits(context)); if (conf.getBoolean("file", false)) { splits.addAll(fileInputFormat.getSplits(context)); } context.getConfiguration().setInt("mapred.reduce.tasks", splits.size()); return splits; }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context) */ protected void setup(Context context) throws IOException, InterruptedException { Configuration config = context.getConfiguration(); fieldDelimRegex = config.get("field.delim.regex", ","); isValidationMode = config.getBoolean("validation.mode", true); classCondtionWeighted = config.getBoolean("class.condition.weighted", false); String predictionMode = config.get("prediction.mode", "classification"); String regressionMethod = config.get("regression.method", "average"); isLinearRegression = predictionMode.equals("regression") && regressionMethod.equals("linearRegression"); }
/** Set the {@link Configuration} object */ public void setConf(Configuration conf) { this.conf = conf; this.maxContentLength = conf.getInt("ftp.content.limit", 64 * 1024); this.timeout = conf.getInt("ftp.timeout", 10000); this.userName = conf.get("ftp.username", "anonymous"); this.passWord = conf.get("ftp.password", "*****@*****.**"); this.serverTimeout = conf.getInt("ftp.server.timeout", 60 * 1000); this.keepConnection = conf.getBoolean("ftp.keep.connection", false); this.followTalk = conf.getBoolean("ftp.follow.talk", false); this.robots.setConf(conf); }
@Override public void start() { try { Configuration conf = getConf(); eagerInitListener.start(); taskTrackerManager.addJobInProgressListener(eagerInitListener); taskTrackerManager.addJobInProgressListener(jobListener); poolMgr = new PoolManager(conf); loadMgr = (LoadManager) ReflectionUtils.newInstance( conf.getClass( "mapred.fairscheduler.loadmanager", CapBasedLoadManager.class, LoadManager.class), conf); loadMgr.setTaskTrackerManager(taskTrackerManager); loadMgr.start(); taskSelector = (TaskSelector) ReflectionUtils.newInstance( conf.getClass( "mapred.fairscheduler.taskselector", DefaultTaskSelector.class, TaskSelector.class), conf); taskSelector.setTaskTrackerManager(taskTrackerManager); taskSelector.start(); Class<?> weightAdjClass = conf.getClass("mapred.fairscheduler.weightadjuster", null); if (weightAdjClass != null) { weightAdjuster = (WeightAdjuster) ReflectionUtils.newInstance(weightAdjClass, conf); } assignMultiple = conf.getBoolean("mapred.fairscheduler.assignmultiple", false); sizeBasedWeight = conf.getBoolean("mapred.fairscheduler.sizebasedweight", false); initialized = true; running = true; lastUpdateTime = clock.getTime(); // Start a thread to update deficits every UPDATE_INTERVAL if (runBackgroundUpdates) new UpdateThread().start(); // Register servlet with JobTracker's Jetty server if (taskTrackerManager instanceof JobTracker) { JobTracker jobTracker = (JobTracker) taskTrackerManager; StatusHttpServer infoServer = jobTracker.infoServer; infoServer.setAttribute("scheduler", this); infoServer.addServlet("scheduler", "/scheduler", FairSchedulerServlet.class); } } catch (Exception e) { // Can't load one of the managers - crash the JobTracker now while it is // starting up so that the user notices. throw new RuntimeException("Failed to start FairScheduler", e); } LOG.info("Successfully configured FairScheduler"); }
@Override public void setup(Context context) throws IOException, InterruptedException { conf = context.getConfiguration(); caseSensitive = conf.getBoolean("wordcount.case.sensitive", true); if (conf.getBoolean("wordcount.skip.patterns", true)) { URI[] patternsURIs = Job.getInstance(conf).getCacheFiles(); for (URI patternsURI : patternsURIs) { Path patternsPath = new Path(patternsURI.getPath()); String patternsFileName = patternsPath.getName().toString(); parseSkipFile(patternsFileName); } } }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#setup(org.apache.hadoop.mapreduce.Reducer.Context) */ protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); fieldDelim = conf.get("field.delim", ","); nearestByCount = conf.getBoolean("tom.nearest.by.count", true); nearestByDistance = conf.getBoolean("tom.nearest.by.distance", false); if (nearestByCount) { topMatchCount = conf.getInt("tom.top.match.count", 10); } else { topMatchDistance = conf.getInt("tom.top.match.distance", 200); } recordInOutput = conf.getBoolean("tom.record.in.output", false); compactOutput = conf.getBoolean("tom.compact.output", false); outputWithNoNeighbor = conf.getBoolean("tom.output.with.no.neighbor", false); }
protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); this.mapSleepCount = conf.getInt(MAP_SLEEP_COUNT, mapSleepCount); this.mapSleepDuration = mapSleepCount == 0 ? 0 : conf.getLong(MAP_SLEEP_TIME, 100) / mapSleepCount; vertexName = conf.get(org.apache.tez.mapreduce.hadoop.MRJobConfig.VERTEX_NAME); TaskAttemptID taId = context.getTaskAttemptID(); ObjectRegistry objectRegistry = ObjectRegistryFactory.getObjectRegistry(); String fooBarVal = (String) objectRegistry.get("FooBar"); if (null == fooBarVal) { LOG.info("Adding FooBar key to Object cache"); objectRegistry.add( ObjectLifeCycle.DAG, "FooBar", "BarFooFromTask" + taId.getTaskID().toString()); } else { LOG.info( "Got FooBar val from Object cache" + ", currentTaskId=" + taId.getTaskID().toString() + ", val=" + fooBarVal); } String[] taskIds = conf.getStrings(MAP_ERROR_TASK_IDS); if (taId.getId() + 1 >= context.getMaxMapAttempts()) { finalAttempt = true; } boolean found = false; if (taskIds != null) { if (taskIds.length == 1 && taskIds[0].equals("*")) { found = true; } if (!found) { for (String taskId : taskIds) { if (Integer.valueOf(taskId).intValue() == taId.getTaskID().getId()) { found = true; break; } } } } if (found) { if (!finalAttempt) { throwError = conf.getBoolean(MAP_THROW_ERROR, false); } throwFatal = conf.getBoolean(MAP_FATAL_ERROR, false); } }
static { try { // call newInstance() instead of using a shared instance from a cache // to avoid accidentally having it closed by someone else FileSystem fs = FileSystem.newInstance(FileSystem.getDefaultUri(CONF), CONF); if (!(fs instanceof DistributedFileSystem)) { String error = "Cannot connect to HDFS. " + CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY + "(" + CONF.get(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY) + ")" + " might be set incorrectly"; throw new RuntimeException(error); } DFS = (DistributedFileSystem) fs; } catch (IOException e) { throw new RuntimeException("couldn't retrieve FileSystem:\n" + e.getMessage(), e); } SUPPORTS_VOLUME_ID = CONF.getBoolean( DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT); }
public void initialize(Configuration conf, int reducerId) { int numFiles = FileNames.values().length; writers = new HDFSCSVWriter[numFiles]; for (int i = 0; i < numFiles; ++i) { writers[i] = new HDFSCSVWriter( conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.numPartitions", 1), conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|", true); } ArrayList<String> arguments = new ArrayList<String>(); arguments.add("id"); arguments.add("nickname"); writers[FileNames.PERSON.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("User.id"); arguments.add("User.id"); writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeEntry(arguments); arguments.clear(); }
/** * Does the user sending the HttpServletRequest have the administrator ACLs? If it isn't the case, * response will be modified to send an error to the user. * * @param servletContext * @param request * @param response used to send the error response if user does not have admin access. * @return true if admin-authorized, false otherwise * @throws IOException */ static boolean hasAdministratorAccess( ServletContext servletContext, HttpServletRequest request, HttpServletResponse response) throws IOException { Configuration conf = (Configuration) servletContext.getAttribute(CONF_CONTEXT_ATTRIBUTE); // If there is no authorization, anybody has administrator access. if (!conf.getBoolean(CommonConfigurationKeys.HADOOP_SECURITY_AUTHORIZATION, false)) { return true; } String remoteUser = request.getRemoteUser(); if (remoteUser == null) { response.sendError( HttpServletResponse.SC_UNAUTHORIZED, "Unauthenticated users are not " + "authorized to access this page."); return false; } if (servletContext.getAttribute(ADMINS_ACL) != null && !userHasAdministratorAccess(servletContext, remoteUser)) { response.sendError( HttpServletResponse.SC_UNAUTHORIZED, "User " + remoteUser + " is unauthorized to access this page."); return false; } return true; }
private void addRMDelegationToken(final String renewer, final Credentials credentials) throws IOException, YarnException { // Get the ResourceManager delegation rmToken final org.apache.hadoop.yarn.api.records.Token rmDelegationToken = yarnClient.getRMDelegationToken(new Text(renewer)); Token<RMDelegationTokenIdentifier> token; // TODO: Use the utility method getRMDelegationTokenService in ClientRMProxy to remove the // separate handling of // TODO: HA and non-HA cases when hadoop dependency is changed to hadoop 2.4 or above if (conf.getBoolean(RM_HA_ENABLED, DEFAULT_RM_HA_ENABLED)) { LOG.info("Yarn Resource Manager HA is enabled"); token = getRMHAToken(rmDelegationToken); } else { LOG.info("Yarn Resource Manager HA is not enabled"); InetSocketAddress rmAddress = conf.getSocketAddr( YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT); token = ConverterUtils.convertFromYarn(rmDelegationToken, rmAddress); } LOG.info("RM dt {}", token); credentials.addToken(token.getService(), token); }
public void run(String[] args) throws Exception { if (args.length != 2) { System.out.println("MatMulMap2 <inPath> <outPath>"); System.exit(-1); } inPath = new Path(args[0]); APath = new Path(inPath, "A"); BPath = new Path(inPath, "B"); outPath = new Path(args[1]); conf = new Configuration(); conf.addResource("matmul-conf.xml"); FileSystem fs = FileSystem.get(conf); long start, end; // prepare if ((conf.getBoolean("matmul.initialize", true)) || (!fs.exists(inPath))) { MatMulMap2Prep prep = new MatMulMap2Prep(); prep.setPath(APath, BPath); prep.run(); } fs.delete(outPath); start = System.currentTimeMillis(); conf.setLong("matmul.versionId", start); waitForJobFinish(configStage()); end = System.currentTimeMillis(); System.out.println("===map2 experiment===<time>[MatMulMap2]: " + (end - start) + " ms"); }
@Override public Configuration getConf() { if (conf.getBoolean("table", false)) { return tableInputFormat.getConf(); } return conf; }
public void setConfiguration(Configuration conf) { Lock.enableSetPartitionKey( conf.getBoolean( DFSConfigKeys.DFS_SET_PARTITION_KEY_ENABLED, DFSConfigKeys.DFS_SET_PARTITION_KEY_ENABLED_DEFAULT)); BaseINodeLock.setDefaultLockType(getPrecedingPathLockType(conf)); }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context) */ protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); fieldDelim = conf.get("field.delim", ","); fieldDelimRegex = conf.get("field.delim.regex", ","); distOrdinal = conf.getInt("tom.distance.ordinal", -1); recordInOutput = conf.getBoolean("tom.record.in.output", false); }
@Override public synchronized void serviceInit(Configuration conf) { this.maxTaskFailuresPerNode = conf.getInt( TezConfiguration.TEZ_MAX_TASK_FAILURES_PER_NODE, TezConfiguration.TEZ_MAX_TASK_FAILURES_PER_NODE_DEFAULT); this.nodeBlacklistingEnabled = conf.getBoolean( TezConfiguration.TEZ_NODE_BLACKLISTING_ENABLED, TezConfiguration.TEZ_NODE_BLACKLISTING_ENABLED_DEFAULT); this.blacklistDisablePercent = conf.getInt( TezConfiguration.TEZ_NODE_BLACKLISTING_IGNORE_THRESHOLD, TezConfiguration.TEZ_NODE_BLACKLISTING_IGNORE_THRESHOLD_DEFAULT); LOG.info( "blacklistDisablePercent is " + blacklistDisablePercent + ", blacklistingEnabled: " + nodeBlacklistingEnabled + ", maxTaskFailuresPerNode: " + maxTaskFailuresPerNode); if (blacklistDisablePercent < -1 || blacklistDisablePercent > 100) { throw new TezUncheckedException( "Invalid blacklistDisablePercent: " + blacklistDisablePercent + ". Should be an integer between 0 and 100 or -1 to disabled"); } }
public void setConf(Configuration conf) { this.conf = conf; scorePower = conf.getFloat("indexer.score.power", 0.5f); internalScoreFactor = conf.getFloat("db.score.link.internal", 1.0f); externalScoreFactor = conf.getFloat("db.score.link.external", 1.0f); countFiltered = conf.getBoolean("db.score.count.filtered", false); }
@Override public void setConf(Configuration conf) { if (conf.getBoolean("table", false)) { tableInputFormat.setConf(conf); } this.conf = conf; }
/** * Performs an HDF to text operation as a MapReduce job and returns total number of points * generated. * * @param inPath * @param outPath * @param datasetName * @param skipFillValue * @return * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static long HDFToTextMapReduce( Path inPath, Path outPath, String datasetName, boolean skipFillValue, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "HDFToText"); Configuration conf = job.getConfiguration(); job.setJarByClass(HDFToText.class); job.setJobName("HDFToText"); // Set Map function details job.setMapperClass(HDFToTextMap.class); job.setNumReduceTasks(0); // Set input information job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPath); if (conf.get("shape") == null) conf.setClass("shape", NASAPoint.class, Shape.class); conf.set("dataset", datasetName); conf.setBoolean("skipfillvalue", skipFillValue); // Set output information job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outPath); // Run the job boolean verbose = conf.getBoolean("verbose", false); job.waitForCompletion(verbose); Counters counters = job.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
/** * Inspect the log directory to recover any log file without * an active region server. */ void splitLogAfterStartup() { boolean retrySplitting = !conf.getBoolean("hbase.hlog.split.skip.errors", HLog.SPLIT_SKIP_ERRORS_DEFAULT); Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME); do { if (master.isStopped()) { LOG.warn("Master stopped while splitting logs"); break; } List<ServerName> serverNames = new ArrayList<ServerName>(); try { if (!this.fs.exists(logsDirPath)) return; FileStatus[] logFolders = FSUtils.listStatus(this.fs, logsDirPath, null); // Get online servers after getting log folders to avoid log folder deletion of newly // checked in region servers . see HBASE-5916 Set<ServerName> onlineServers = ((HMaster) master).getServerManager().getOnlineServers() .keySet(); if (logFolders == null || logFolders.length == 0) { LOG.debug("No log files to split, proceeding..."); return; } for (FileStatus status : logFolders) { String sn = status.getPath().getName(); // truncate splitting suffix if present (for ServerName parsing) if (sn.endsWith(HLog.SPLITTING_EXT)) { sn = sn.substring(0, sn.length() - HLog.SPLITTING_EXT.length()); } ServerName serverName = ServerName.parseServerName(sn); if (!onlineServers.contains(serverName)) { LOG.info("Log folder " + status.getPath() + " doesn't belong " + "to a known region server, splitting"); serverNames.add(serverName); } else { LOG.info("Log folder " + status.getPath() + " belongs to an existing region server"); } } splitLog(serverNames); retrySplitting = false; } catch (IOException ioe) { LOG.warn("Failed splitting of " + serverNames, ioe); if (!checkFileSystem()) { LOG.warn("Bad Filesystem, exiting"); Runtime.getRuntime().halt(1); } try { if (retrySplitting) { Thread.sleep(conf.getInt( "hbase.hlog.split.failure.retry.interval", 30 * 1000)); } } catch (InterruptedException e) { LOG.warn("Interrupted, aborting since cannot return w/o splitting"); Thread.currentThread().interrupt(); retrySplitting = false; Runtime.getRuntime().halt(1); } } } while (retrySplitting); }
@Override protected void serviceInit(Configuration conf) throws Exception { this.exitOnDispatchException = conf.getBoolean( Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, Dispatcher.DEFAULT_DISPATCHER_EXIT_ON_ERROR); super.serviceInit(conf); }
/** * Constructor that creates a connection to the local ZooKeeper ensemble. * * @param conf Configuration to use * @throws IOException if an internal replication error occurs * @throws RuntimeException if replication isn't enabled. */ public ReplicationAdmin(Configuration conf) throws IOException { if (!conf.getBoolean( HConstants.REPLICATION_ENABLE_KEY, HConstants.REPLICATION_ENABLE_DEFAULT)) { throw new RuntimeException( "hbase.replication isn't true, please " + "enable it in order to use replication"); } this.connection = ConnectionFactory.createConnection(conf); try { zkw = createZooKeeperWatcher(); try { this.replicationPeers = ReplicationFactory.getReplicationPeers(zkw, conf, this.connection); this.replicationPeers.init(); this.replicationQueuesClient = ReplicationFactory.getReplicationQueuesClient(zkw, conf, this.connection); this.replicationQueuesClient.init(); } catch (Exception exception) { if (zkw != null) { zkw.close(); } throw exception; } } catch (Exception exception) { if (connection != null) { connection.close(); } if (exception instanceof IOException) { throw (IOException) exception; } else if (exception instanceof RuntimeException) { throw (RuntimeException) exception; } else { throw new IOException("Error initializing the replication admin client.", exception); } } }
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); // pass client configuration into driver Properties clientInfos = new Properties(); for (Map.Entry<String, String> entry : conf) { clientInfos.setProperty(entry.getKey(), entry.getValue()); } try { conn = (PhoenixConnection) QueryUtil.getConnectionOnServer(clientInfos, conf); // We are dependent on rolling back before performing commits, so we need to be sure // that auto-commit is not turned on conn.setAutoCommit(false); final String tableNamesConf = conf.get(TABLE_NAMES_CONFKEY); final String logicalNamesConf = conf.get(LOGICAL_NAMES_CONFKEY); tableNames = TargetTableRefFunctions.NAMES_FROM_JSON.apply(tableNamesConf); logicalNames = TargetTableRefFunctions.NAMES_FROM_JSON.apply(logicalNamesConf); initColumnIndexes(); } catch (SQLException | ClassNotFoundException e) { throw new RuntimeException(e); } upsertListener = new MapperUpsertListener<RECORD>( context, conf.getBoolean(IGNORE_INVALID_ROW_CONFKEY, true)); upsertExecutor = buildUpsertExecutor(conf); preUpdateProcessor = PhoenixConfigurationUtil.loadPreUpsertProcessor(conf); }
@Override protected void setup(Context ctx) throws IOException, InterruptedException { try { Configuration conf = ctx.getConfiguration(); Path[] localFiles = DistributedCache.getLocalCacheFiles(conf); Preconditions.checkArgument( localFiles != null && localFiles.length >= 1, "missing paths from the DistributedCache"); Path inputVectorPath = HadoopUtil.getSingleCachedFile(conf); SequenceFileValueIterator<VectorWritable> iterator = new SequenceFileValueIterator<VectorWritable>(inputVectorPath, true, conf); try { inputVector = iterator.next().get(); } finally { Closeables.close(iterator, true); } int outDim = conf.getInt(OUTPUT_VECTOR_DIMENSION, Integer.MAX_VALUE); outputVector = conf.getBoolean(IS_SPARSE_OUTPUT, false) ? new RandomAccessSparseVector(outDim, 10) : new DenseVector(outDim); } catch (IOException ioe) { throw new IllegalStateException(ioe); } }
@Override public void configure(Configuration conf, FileSystem fs) { // read stopwords from file (stopwords will be empty set if file does not exist or is empty) String stopwordsFile = conf.get(Constants.StopwordList); stopwords = readInput(fs, stopwordsFile); String stemmedStopwordsFile = conf.get(Constants.StemmedStopwordList); stemmedStopwords = readInput(fs, stemmedStopwordsFile); isStopwordRemoval = !stopwords.isEmpty(); isStemming = conf.getBoolean(Constants.Stemming, true); VocabularyWritable vocab; try { vocab = (VocabularyWritable) HadoopAlign.loadVocab(new Path(conf.get(Constants.CollectionVocab)), fs); setVocab(vocab); } catch (Exception e) { LOG.warn("No vocabulary provided to tokenizer."); vocab = null; } LOG.warn( "Stemming is " + isStemming + "; Stopword removal is " + isStopwordRemoval + "; number of stopwords: " + stopwords.size() + "; stemmed: " + stemmedStopwords.size()); }
/** Destroy all services. */ public void destroy() { XLog log = new XLog(LogFactory.getLog(getClass())); log.trace("Shutting down"); boolean deleteRuntimeDir = false; if (conf != null) { deleteRuntimeDir = conf.getBoolean(CONF_DELETE_RUNTIME_DIR, false); } if (services != null) { List<Service> list = new ArrayList<Service>(services.values()); Collections.reverse(list); for (Service service : list) { try { log.trace("Destroying service[{0}]", service.getInterface()); if (service.getInterface() == XLogService.class) { log.info("Shutdown"); } service.destroy(); } catch (Throwable ex) { log.error( "Error destroying service[{0}], {1}", service.getInterface(), ex.getMessage(), ex); } } } if (deleteRuntimeDir) { try { IOUtils.delete(new File(runtimeDir)); } catch (IOException ex) { log.error("Error deleting runtime directory [{0}], {1}", runtimeDir, ex.getMessage(), ex); } } services = null; conf = null; SERVICES = null; }
/** * Get the descriptive name as {@link RegionState} does it but with hidden startkey optionally * * @param state * @param conf * @return descriptive string */ public static String getDescriptiveNameFromRegionStateForDisplay( RegionState state, Configuration conf) { if (conf.getBoolean(DISPLAY_KEYS_KEY, true)) return state.toDescriptiveString(); String descriptiveStringFromState = state.toDescriptiveString(); int idx = descriptiveStringFromState.lastIndexOf(" state="); String regionName = getRegionNameAsStringForDisplay(state.getRegion(), conf); return regionName + descriptiveStringFromState.substring(idx); }