public void setConf(Configuration conf) { this.conf = conf; scorePower = conf.getFloat("indexer.score.power", 0.5f); internalScoreFactor = conf.getFloat("db.score.link.internal", 1.0f); externalScoreFactor = conf.getFloat("db.score.link.external", 1.0f); countFiltered = conf.getBoolean("db.score.count.filtered", false); }
public void setConf(Configuration conf) { super.setConf(conf); if (conf == null) return; INC_RATE = conf.getFloat("db.fetch.schedule.adaptive.inc_rate", 0.2f); DEC_RATE = conf.getFloat("db.fetch.schedule.adaptive.dec_rate", 0.2f); MIN_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.min_interval", 60); MAX_INTERVAL = conf.getInt( "db.fetch.schedule.adaptive.max_interval", FetchSchedule.SECONDS_PER_DAY * 365); // 1 year SYNC_DELTA = conf.getBoolean("db.fetch.schedule.adaptive.sync_delta", true); SYNC_DELTA_RATE = conf.getFloat("db.fetch.schedule.adaptive.sync_delta_rate", 0.2f); }
@Override public void setConf(Configuration conf) { this.conf = conf; this.maximumStepSize = conf.getFloat(MAX_STEP_KEY, DEFAULT_MAX_STEP_VALUE); this.minimumStepSize = conf.getFloat(MIN_STEP_KEY, DEFAULT_MIN_STEP_VALUE); this.step = this.maximumStepSize; this.sufficientMemoryLevel = conf.getFloat(SUFFICIENT_MEMORY_LEVEL_KEY, DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE); this.tunerLookupPeriods = conf.getInt(LOOKUP_PERIODS_KEY, DEFAULT_LOOKUP_PERIODS); this.blockCachePercentMinRange = conf.getFloat( BLOCK_CACHE_SIZE_MIN_RANGE_KEY, conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT)); this.blockCachePercentMaxRange = conf.getFloat( BLOCK_CACHE_SIZE_MAX_RANGE_KEY, conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT)); this.globalMemStorePercentMinRange = conf.getFloat( MEMSTORE_SIZE_MIN_RANGE_KEY, HeapMemorySizeUtil.getGlobalMemStorePercent(conf, false)); this.globalMemStorePercentMaxRange = conf.getFloat( MEMSTORE_SIZE_MAX_RANGE_KEY, HeapMemorySizeUtil.getGlobalMemStorePercent(conf, false)); // Default value of periods to ignore is number of lookup periods this.numPeriodsToIgnore = conf.getInt(NUM_PERIODS_TO_IGNORE, this.tunerLookupPeriods); this.rollingStatsForCacheMisses = new RollingStatCalculator(this.tunerLookupPeriods); this.rollingStatsForFlushes = new RollingStatCalculator(this.tunerLookupPeriods); this.rollingStatsForEvictions = new RollingStatCalculator(this.tunerLookupPeriods); }
@Override public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); Path cMemMatrixPath = new Path(conf.get(RECONSTRUCTIONMATRIX)); Path dMemMatrixPath = new Path(conf.get(MATRIXY2X)); Path zmPath = new Path(conf.get(ZMPATH)); Path meanPath = new Path(conf.get(YMPATH)); int inMemMatrixNumRows = conf.getInt(YCOLS, 0); int inMemMatrixNumCols = conf.getInt(XCOLS, 0); ERR_SAMPLE_RATE = conf.getFloat(ERRSAMPLERATE, 1); Path tmpPath = cMemMatrixPath.getParent(); DistributedRowMatrix distMatrix = new DistributedRowMatrix(cMemMatrixPath, tmpPath, inMemMatrixNumRows, inMemMatrixNumCols); distMatrix.setConf(conf); matrixC = PCACommon.toDenseMatrix(distMatrix); distMatrix = new DistributedRowMatrix(dMemMatrixPath, tmpPath, inMemMatrixNumRows, inMemMatrixNumCols); distMatrix.setConf(conf); matrixY2X = PCACommon.toDenseMatrix(distMatrix); try { zm = PCACommon.toDenseVector(zmPath, conf); ym = PCACommon.toDenseVector(meanPath, conf); } catch (IOException e) { e.printStackTrace(); } xiCt = new DenseVector(matrixC.numRows()); sumOfErr = new DenseVector(matrixC.numRows()); sumOfyi = new DenseVector(matrixC.numRows()); sumOfyc = new DenseVector(matrixC.numRows()); }
/** * This functions modifies the splits list in place to retain only a random fraction of the input * splits. The fraction is expected in "starfish.profiler.sampling.fraction" as a number between 0 * and 1. The default value is 0.1. * * @param job The job context * @param splits The list of input splits to modify */ public static void sampleInputSplits(JobContext job, List<InputSplit> splits) { // Get the sampling fraction Configuration conf = job.getConfiguration(); double fraction = conf.getFloat(Profiler.PROFILER_SAMPLING_FRACTION, 0.1f); if (fraction < 0 || fraction > 1) throw new RuntimeException("ERROR: Invalid sampling fraction: " + fraction); // Handle corner cases if (fraction == 0 || splits.size() == 0) { splits.clear(); return; } if (fraction == 1) return; // Calculate the number of samples int numSplits = splits.size(); int sampleSize = (int) Math.round(numSplits * fraction); if (sampleSize == 0) sampleSize = 1; // Shuffle the splits Collections.shuffle(splits); // Retain only a sampleSize number of splits for (int i = splits.size() - 1; i >= sampleSize; --i) { splits.remove(i); } nf.setMaximumFractionDigits(2); LOG.info("Executing only " + nf.format(fraction * 100) + "% of the map tasks"); }
InputErrorTracker(Configuration conf) { // default threshold : 0.01% errorThreshold = conf.getFloat(BAD_RECORD_THRESHOLD_CONF_KEY, 0.0001f); minErrors = conf.getLong(BAD_RECORD_MIN_COUNT_CONF_KEY, 2); numRecords = 0; numErrors = 0; }
@Override public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); missingMass = conf.getFloat("MissingMass", 0.0f); nodeCnt = conf.getInt("NodeCount", 0); }
/** Start the JobTracker process, listen on the indicated port */ JobTracker(Configuration conf) throws IOException { // // Grab some static constants // maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2); RETIRE_JOB_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.interval", 24 * 60 * 60 * 1000); RETIRE_JOB_CHECK_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.check", 60 * 1000); TASK_ALLOC_EPSILON = conf.getFloat("mapred.jobtracker.taskalloc.loadbalance.epsilon", 0.2f); PAD_FRACTION = conf.getFloat("mapred.jobtracker.taskalloc.capacitypad", 0.1f); MIN_SLOTS_FOR_PADDING = 3 * maxCurrentTasks; // This is a directory of temporary submission files. We delete it // on startup, and can delete any files that we're done with this.conf = conf; JobConf jobConf = new JobConf(conf); this.systemDir = jobConf.getSystemDir(); this.fs = FileSystem.get(conf); FileUtil.fullyDelete(fs, systemDir); fs.mkdirs(systemDir); // Same with 'localDir' except it's always on the local disk. jobConf.deleteLocalFiles(SUBDIR); // Set ports, start RPC servers, etc. InetSocketAddress addr = getAddress(conf); this.localMachine = addr.getHostName(); this.port = addr.getPort(); this.interTrackerServer = RPC.getServer(this, addr.getPort(), 10, false, conf); this.interTrackerServer.start(); Properties p = System.getProperties(); for (Iterator it = p.keySet().iterator(); it.hasNext(); ) { String key = (String) it.next(); String val = (String) p.getProperty(key); LOG.info("Property '" + key + "' is " + val); } this.infoPort = conf.getInt("mapred.job.tracker.info.port", 50030); this.infoServer = new JobTrackerInfoServer(this, infoPort); this.infoServer.start(); this.startTime = System.currentTimeMillis(); new Thread(this.expireTrackers).start(); new Thread(this.retireJobs).start(); new Thread(this.initJobs).start(); }
static NaiveBayesModel readModelFromTempDir(Path base, Configuration conf) { float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f); // read feature sums and label sums Vector scoresPerLabel = null; Vector scoresPerFeature = null; for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>( new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) { String key = record.getFirst().toString(); VectorWritable value = record.getSecond(); if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE)) { scoresPerFeature = value.get(); } else if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_LABEL)) { scoresPerLabel = value.get(); } } Preconditions.checkNotNull(scoresPerFeature); Preconditions.checkNotNull(scoresPerLabel); Matrix scoresPerLabelAndFeature = new SparseMatrix(scoresPerLabel.size(), scoresPerFeature.size()); for (Pair<IntWritable, VectorWritable> entry : new SequenceFileDirIterable<IntWritable, VectorWritable>( new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(), conf)) { scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get()); } Vector perlabelThetaNormalizer = null; for (Pair<Text, VectorWritable> entry : new SequenceFileDirIterable<Text, VectorWritable>( new Path(base, TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(), conf)) { if (entry.getFirst().toString().equals(TrainNaiveBayesJob.LABEL_THETA_NORMALIZER)) { perlabelThetaNormalizer = entry.getSecond().get(); } } Preconditions.checkNotNull(perlabelThetaNormalizer); return new NaiveBayesModel( scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel, perlabelThetaNormalizer, alphaI); }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#setup(org.apache.hadoop.mapreduce.Reducer.Context) */ protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); fieldDelim = conf.get("field.delim", ","); linearCorrelation = conf.getBoolean("utp.correlation.linear", true); correlationScale = conf.getInt("utp.correlation.linear.scale", 1000); maxRating = conf.getInt("utp.max.rating", 100); correlationModifier = conf.getFloat("utp.correlation.modifier", (float) 1.0); userRatingWithContext = conf.getBoolean("utp.user.rating.with.context", false); }
@Override public void contextualize(Configuration conf, AppContext context) { this.context = context; Map<JobId, Job> allJobs = context.getAllJobs(); for (Map.Entry<JobId, Job> entry : allJobs.entrySet()) { final Job job = entry.getValue(); mapperStatistics.put(job, new DataStatistics()); reducerStatistics.put(job, new DataStatistics()); slowTaskRelativeTresholds.put( job, conf.getFloat(MRJobConfig.SPECULATIVE_SLOWTASK_THRESHOLD, 1.0f)); } }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f); vectorSize = (int) Math.ceil( (double) (-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0 / HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
/* (non-Javadoc) * @see org.chombo.mr.Transformer.TransformerMapper#setup(org.apache.hadoop.mapreduce.Mapper.Context) */ protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration config = context.getConfiguration(); String strategy = config.get("novelty.gen.strategy", "selfInformation"); int maxRating = config.getInt("rating.scale", 100); RedisCache cache = RedisCache.createRedisCache(config, "ch"); if (strategy.equals("selfInformation")) { // based on rating distribution String countMaxValueKeyPrefix = config.get("count.max.value.key.prefix"); int engaementDistrScale = cache.getIntMax(countMaxValueKeyPrefix); registerTransformers(1, new Transformer.NullTransformer()); registerTransformers( 2, new IndividualNovelty.SelfInformation(engaementDistrScale, maxRating)); } else if (strategy.equals("nonLinearInverse")) { // based on rating double param = config.getFloat("quadratic.param", (float) 0.8); String ratingMaxValueKeyPrefix = config.get("rating.max.value.key.prefix"); int maxRatingInData = cache.getIntMax(ratingMaxValueKeyPrefix); registerTransformers( 1, new IndividualNovelty.NonLinearInverse(maxRating, param, maxRatingInData)); registerTransformers(2, new Transformer.NullTransformer()); } }
/** * Enable profiling for only a fraction of the MR tasks. The fraction is expected in * "starfish.profiler.sampling.fraction" as a number between 0 and 1. The default value is 0.1. * * <p>This function sets the Hadoop parameters mapred.task.profile.maps and * mapred.task.profile.reducers * * @param conf The job configuration * @return False if something goes wrong */ public static boolean sampleTasksToProfile(Configuration conf) { double fraction = conf.getFloat(Profiler.PROFILER_SAMPLING_FRACTION, 0.1f); if (fraction == 0d) { // Nothing to profile return false; } List<SFInputSplit> splits = null; try { splits = SFInputSplit.getInputSplits(conf); } catch (IOException e) { LOG.error("Unable to create input splits", e); return false; } catch (InterruptedException e) { LOG.error("Unable to create input splits", e); return false; } catch (ClassNotFoundException e) { LOG.error("Unable to create input splits", e); return false; } // Specify which mappers to profile if (splits != null && splits.size() != 0) { conf.set("mapred.task.profile.maps", sampleTasksToProfile(splits.size(), fraction)); } // Specify which reducers to profile int numReducers = conf.getInt(MR_RED_TASKS, 1); if (numReducers != 0) { conf.set("mapred.task.profile.reduces", sampleTasksToProfile(numReducers, fraction)); } nf.setMaximumFractionDigits(2); LOG.info("Profiling only " + nf.format(fraction * 100) + "% of the tasks"); return true; }
@Override public void setup(Context context) { this.context = context; classIndex = ClassIndex.getInstance(); Configuration conf = context.getConfiguration(); float threshold = conf.getFloat(Util.CONF_MINIMUM_DF_OF_HOSTS, 0); try { Path headerPath = new Path(conf.get(Util.CONF_HEADER_PATH)); LOG.info("Reading dataset header..." + headerPath.toString()); header = new DataSetHeader(conf, headerPath); if (!classIndex.isPopulated()) { classIndex.init(conf); classIndex.populateIndex(); } if (threshold > 0.0) { LOG.info("loading DF values"); hostsWithMinimumDF = Util.getHostsWithDocumentFrequencies(conf, threshold); } LOG.info("loading training data..."); loadTrainingInstances(conf.get(Util.CONF_TRAINING_DATE)); } catch (Exception e) { LOG.error("setup failed with an exception!"); e.printStackTrace(); setupFailedException = e; } }
@Override public void initialize(VertexManagerPluginContext context) { Configuration conf; try { conf = TezUtils.createConfFromUserPayload(context.getUserPayload()); } catch (IOException e) { throw new TezUncheckedException(e); } this.context = context; this.slowStartMinSrcCompletionFraction = conf.getFloat( ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION, ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION_DEFAULT); this.slowStartMaxSrcCompletionFraction = conf.getFloat( ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION, ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION_DEFAULT); if (slowStartMinSrcCompletionFraction < 0 || slowStartMaxSrcCompletionFraction < slowStartMinSrcCompletionFraction) { throw new IllegalArgumentException( "Invalid values for slowStartMinSrcCompletionFraction" + "/slowStartMaxSrcCompletionFraction. Min cannot be < 0 and " + "max cannot be < min."); } enableAutoParallelism = conf.getBoolean( ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL_DEFAULT); desiredTaskInputDataSize = conf.getLong( ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE, ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE_DEFAULT); minTaskParallelism = conf.getInt( ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM, ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM_DEFAULT); LOG.info( "Shuffle Vertex Manager: settings" + " minFrac:" + slowStartMinSrcCompletionFraction + " maxFrac:" + slowStartMaxSrcCompletionFraction + " auto:" + enableAutoParallelism + " desiredTaskIput:" + desiredTaskInputDataSize + " minTasks:" + minTaskParallelism); Map<String, EdgeProperty> inputs = context.getInputVertexEdgeProperties(); for (Map.Entry<String, EdgeProperty> entry : inputs.entrySet()) { if (entry.getValue().getDataMovementType() == DataMovementType.SCATTER_GATHER) { String vertex = entry.getKey(); bipartiteSources.put(vertex, new HashSet<Integer>()); } } if (bipartiteSources.isEmpty()) { throw new TezUncheckedException("Atleast 1 bipartite source should exist"); } // dont track the source tasks here since those tasks may themselves be // dynamically changed as the DAG progresses. }
@Override public synchronized void setConf(Configuration conf) { super.setConf(conf); padFraction = conf.getFloat("mapred.jobtracker.taskalloc.capacitypad", 0.01f); this.eagerTaskInitializationListener = new EagerTaskInitializationListener(conf); }
public static float getFloat(String name) { Configuration conf = Services.get().getConf(); return conf.getFloat(name, ConfigUtils.FLOAT_DEFAULT); }
@Override public void initialize() { Configuration conf; try { conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); } catch (IOException e) { throw new TezUncheckedException(e); } this.slowStartMinSrcCompletionFraction = conf.getFloat( ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION, ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION_DEFAULT); float defaultSlowStartMaxSrcFraction = ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION_DEFAULT; if (slowStartMinSrcCompletionFraction > defaultSlowStartMaxSrcFraction) { defaultSlowStartMaxSrcFraction = slowStartMinSrcCompletionFraction; } this.slowStartMaxSrcCompletionFraction = conf.getFloat( ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION, defaultSlowStartMaxSrcFraction); if (slowStartMinSrcCompletionFraction < 0 || slowStartMaxSrcCompletionFraction > 1 || slowStartMaxSrcCompletionFraction < slowStartMinSrcCompletionFraction) { throw new IllegalArgumentException( "Invalid values for slowStartMinSrcCompletionFraction" + "/slowStartMaxSrcCompletionFraction. Min cannot be < 0, max cannot be > 1," + " and max cannot be < min."); } enableAutoParallelism = conf.getBoolean( ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL_DEFAULT); desiredTaskInputDataSize = conf.getLong( ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE, ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE_DEFAULT); minTaskParallelism = Math.max( 1, conf.getInt( ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM, ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM_DEFAULT)); LOG.info( "Shuffle Vertex Manager: settings" + " minFrac:" + slowStartMinSrcCompletionFraction + " maxFrac:" + slowStartMaxSrcCompletionFraction + " auto:" + enableAutoParallelism + " desiredTaskIput:" + desiredTaskInputDataSize + " minTasks:" + minTaskParallelism); updatePendingTasks(); if (enableAutoParallelism) { getContext().vertexReconfigurationPlanned(); } // dont track the source tasks here since those tasks may themselves be // dynamically changed as the DAG progresses. }
/** * Create the memory manager. * * @param conf use the configuration to find the maximum size of the memory pool. */ MemoryManager(Configuration conf) { HiveConf.ConfVars poolVar = HiveConf.ConfVars.HIVE_ORC_FILE_MEMORY_POOL; double maxLoad = conf.getFloat(poolVar.varname, poolVar.defaultFloatVal); totalMemoryPool = Math.round(ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax() * maxLoad); }