@SuppressWarnings("deprecation") @Override protected void cleanup(Context context) throws IOException, InterruptedException { super.cleanup(context); List<Cluster> newKMeansClusters = new ArrayList<Cluster>(); List<Cluster> newCanopyClusters = new ArrayList<Cluster>(); for (Cluster kMeansCluster : _clusters.keySet()) { Cluster canopyCluster = _kMeansToCanopyMap.get(kMeansCluster); // Set a new Cluster center Vector center = new Vector(); center.setElements(new double[kMeansCluster.getCenterVector().getElements().length]); List<Vector> vectors = new ArrayList<Vector>(); for (Vector currentVector : _clusters.get(kMeansCluster)) { vectors.add(new Vector(currentVector)); // Sums the vectors to a new vector in order to find the one that is the closest to all // others, it will be our new cluster center. for (int i = 0; i < currentVector.getElements().length; i++) center.getElements()[i] += currentVector.getElements()[i]; } // Divides the vector's elements in order to find its real location (it will be a fictive // vector) for (int i = 0; i < center.getElements().length; i++) center.getElements()[i] = center.getElements()[i] / vectors.size(); Cluster newKMeansCluster = new Cluster(center); canopyCluster.setIsCovered(newKMeansCluster.isConvergedWithOtherCluster(kMeansCluster)); newKMeansClusters.add(newKMeansCluster); newCanopyClusters.add(canopyCluster); // Adding the vectors to the new cluster center for (Vector vector : vectors) { context.write(newKMeansCluster, vector); } } Configuration conf = context.getConfiguration(); Path outPath = new Path(conf.get("centers.path")); FileSystem fs = FileSystem.get(conf); if (fs.exists(outPath)) fs.delete(outPath, true); SequenceFile.Writer writer = SequenceFile.createWriter( fs, context.getConfiguration(), outPath, Cluster.class, Cluster.class); context.getCounter(Counter.CONVERGED).setValue(0); for (int i = 0; i < newKMeansClusters.size(); i++) { writer.append(newCanopyClusters.get(i), newKMeansClusters.get(i)); if (newCanopyClusters.get(i).getIsCovered()) context.getCounter(Counter.CONVERGED).increment(1); } writer.close(); }
@Override protected void setup( final Reducer<Text, CountofDoubleWritable, GeoWaveOutputKey, DistortionEntry>.Context context) throws IOException, InterruptedException { super.setup(context); final ScopedJobConfiguration config = new ScopedJobConfiguration( context.getConfiguration(), KMeansDistortionMapReduce.class, KMeansDistortionMapReduce.LOGGER); final int k = config.getInt(JumpParameters.Jump.COUNT_OF_CENTROIDS, -1); if (k > 0) { expectedK = k; } try { centroidManager = new CentroidManagerGeoWave<Object>( context, KMeansDistortionMapReduce.class, KMeansDistortionMapReduce.LOGGER); } catch (final Exception e) { KMeansDistortionMapReduce.LOGGER.warn("Unable to initialize centroid manager", e); throw new IOException("Unable to initialize centroid manager", e); } batchId = config.getString(GlobalParameters.Global.PARENT_BATCH_ID, centroidManager.getBatchId()); }
@SuppressWarnings("unchecked") protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); // String path = conf.get(Constant.CONFIG_PATH); String numStr = conf.get(Constant.USER_TAG_THRESHOLD); if (numStr != null && !numStr.isEmpty()) { num = Integer.parseInt(numStr); } String hostPath = conf.get(Constant.HOST_PATH); Path host = new Path(hostPath); ArrayList<String> hosts = new ArrayList<String>(); if (fs.exists(host)) { FSDataInputStream input = fs.open(host); BufferedReader reader = new BufferedReader(new InputStreamReader(input)); String line = null; while ((line = reader.readLine()) != null) { hosts.add(line); } } // logger.info(hosts); String expiredStr = conf.get(Constant.JEDIS_USER_EXPIRED); if (expiredStr != null && !expiredStr.isEmpty()) { expiredTime = Integer.parseInt(expiredStr); } String userRedisName = conf.get("USER_REDIS_NAME"); tryTime = Integer.parseInt(conf.get(Constant.TRY_TIMES)); HashMap<String, String> map = new HashMap<String, String>(); map.put(userRegion, userRedisName); RedisConfig.getInstance().setRegionRedisMap(map); RedisConfig.getInstance().loadhdfs(hosts); jedis = JedisUtils.getInstance(); }
public void setup(Context context) throws IOException, InterruptedException { super.setup(context); try { record = VerticaOutputFormat.getValue(context.getConfiguration()); } catch (Exception e) { throw new IOException(e); } }
/** Configures the Reduce plan, the POPackage operator and the reporter thread */ @SuppressWarnings("unchecked") @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); inIllustrator = inIllustrator(context); if (inIllustrator) pack = getPack(context); Configuration jConf = context.getConfiguration(); SpillableMemoryManager.configure(ConfigurationUtil.toProperties(jConf)); context .getConfiguration() .set( PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId())); sJobContext = context; sJobConfInternal.set(context.getConfiguration()); sJobConf = context.getConfiguration(); try { PigContext.setPackageImportList( (ArrayList<String>) ObjectSerializer.deserialize(jConf.get("udf.import.list"))); pigContext = (PigContext) ObjectSerializer.deserialize(jConf.get("pig.pigContext")); // This attempts to fetch all of the generated code from the distributed cache, and resolve // it SchemaTupleBackend.initialize(jConf, pigContext); if (rp == null) rp = (PhysicalPlan) ObjectSerializer.deserialize(jConf.get("pig.reducePlan")); stores = PlanHelper.getPhysicalOperators(rp, POStore.class); if (!inIllustrator) pack = (POPackage) ObjectSerializer.deserialize(jConf.get("pig.reduce.package")); // To be removed if (rp.isEmpty()) log.debug("Reduce Plan empty!"); else { ByteArrayOutputStream baos = new ByteArrayOutputStream(); rp.explain(baos); log.debug(baos.toString()); } pigReporter = new ProgressableReporter(); if (!(rp.isEmpty())) { roots = rp.getRoots().toArray(new PhysicalOperator[1]); leaf = rp.getLeaves().get(0); } // Get the UDF specific context MapRedUtil.setupUDFContext(jConf); } catch (IOException ioe) { String msg = "Problem while configuring reduce plan."; throw new RuntimeException(msg, ioe); } log.info( "Aliases being processed per job phase (AliasName[line,offset]): " + jConf.get("pig.alias.location")); Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get()); }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); minSupport = context .getConfiguration() .getInt(DictionaryVectorizer.MIN_SUPPORT, DictionaryVectorizer.DEFAULT_MIN_SUPPORT); parseWeibo = context.getConfiguration().getBoolean(DictionaryVectorizer.PARSE_WEIBO, false); }
@Override protected void setup(Reducer.Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); iteration = conf.getInt("iteration", -1); System.out.println("Setting up Reducer " + iteration); centroids = KMeansUtil.readCentroids(conf); System.out.println("Reducer " + iteration + " Received " + centroids.size() + " centroids as"); KMeansUtil.showOutput(centroids); }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Map.Entry<Integer, List<VectorWritable>> entry : representativePoints.entrySet()) { IntWritable iw = new IntWritable(entry.getKey()); for (VectorWritable vw : entry.getValue()) { context.write(iw, vw); } } super.cleanup(context); }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); uBiasFW = fs.create( new Path( conf.get("hadoop.cache.path") + "uBias/" + ((long) (Math.random() * 1000000000)) + ".dat")); }
@Override protected void setup(Context context) throws IOException, InterruptedException { if (context.getCacheFiles() != null && context.getCacheFiles().length > 0) { InputStream in = new FileInputStream(new File("./mergedLineCounts2")); BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); String line; while ((line = reader.readLine()) != null) { String[] split = line.split("\t"); wordCounts.put(split[0], Integer.parseInt(split[1])); } } super.setup(context); }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, "")); for (Pair<String, Long> e : PFPGrowth.readFList(context.getConfiguration())) { featureReverseMap.add(e.getFirst()); freqList.add(e.getSecond()); } maxHeapSize = Integer.valueOf(params.get(PFPGrowth.MAX_HEAPSIZE, "50")); minSupport = Integer.valueOf(params.get(PFPGrowth.MIN_SUPPORT, "3")); maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0); numFeatures = featureReverseMap.size(); useFP2 = "true".equals(params.get(PFPGrowth.USE_FPG2)); }
/** * Will be called once all the intermediate keys and values are processed. So right place to * stop the reporter thread. */ @Override protected void cleanup(Context context) throws IOException, InterruptedException { super.cleanup(context); if (errorInReduce) { // there was an error in reduce - just return return; } if (PigMapReduce.sJobConfInternal.get().get("pig.stream.in.reduce", "false").equals("true")) { // If there is a stream in the pipeline we could // potentially have more to process - so lets // set the flag stating that all map input has been sent // already and then lets run the pipeline one more time // This will result in nothing happening in the case // where there is no stream in the pipeline rp.endOfAllInput = true; runPipeline(leaf); } if (!inIllustrator) { for (POStore store : stores) { if (!initialized) { MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context); store.setStoreImpl(impl); store.setUp(); } store.tearDown(); } } // Calling EvalFunc.finish() UDFFinishVisitor finisher = new UDFFinishVisitor(rp, new DependencyOrderWalker<PhysicalOperator, PhysicalPlan>(rp)); try { finisher.visit(); } catch (VisitorException e) { throw new IOException("Error trying to finish UDFs", e); } PhysicalOperator.setReporter(null); initialized = false; }
@SuppressWarnings("deprecation") @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); _kMeansToCanopyMap = new HashMap<Cluster, Cluster>(); Cluster canopyCluster = new Cluster(); Cluster kMeansCluster = new Cluster(); Configuration conf = context.getConfiguration(); Path centroids = new Path(conf.get("centers.path")); FileSystem fs = FileSystem.get(conf); SequenceFile.Reader reader = new SequenceFile.Reader(fs, centroids, conf); while (reader.next(canopyCluster, kMeansCluster)) { _kMeansToCanopyMap.put(new Cluster(kMeansCluster), new Cluster(canopyCluster)); } reader.close(); }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); representativePoints = RepresentativePointsMapper.getRepresentativePoints(conf); }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { super.cleanup(context); uBiasFW.close(); }
public void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { super.reduce(key, values, context); }
@Override protected void cleanup(org.apache.hadoop.mapreduce.Reducer.Context context) throws IOException, InterruptedException { super.cleanup(context); }