Пример #1
0
    @SuppressWarnings("deprecation")
    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
      super.cleanup(context);
      List<Cluster> newKMeansClusters = new ArrayList<Cluster>();
      List<Cluster> newCanopyClusters = new ArrayList<Cluster>();

      for (Cluster kMeansCluster : _clusters.keySet()) {
        Cluster canopyCluster = _kMeansToCanopyMap.get(kMeansCluster);

        // Set a new Cluster center
        Vector center = new Vector();
        center.setElements(new double[kMeansCluster.getCenterVector().getElements().length]);
        List<Vector> vectors = new ArrayList<Vector>();

        for (Vector currentVector : _clusters.get(kMeansCluster)) {
          vectors.add(new Vector(currentVector));

          // Sums the vectors to a new vector in order to find the one that is the closest to all
          // others, it will be our new cluster center.
          for (int i = 0; i < currentVector.getElements().length; i++)
            center.getElements()[i] += currentVector.getElements()[i];
        }

        // Divides the vector's elements in order to find its real location (it will be a fictive
        // vector)
        for (int i = 0; i < center.getElements().length; i++)
          center.getElements()[i] = center.getElements()[i] / vectors.size();

        Cluster newKMeansCluster = new Cluster(center);
        canopyCluster.setIsCovered(newKMeansCluster.isConvergedWithOtherCluster(kMeansCluster));
        newKMeansClusters.add(newKMeansCluster);
        newCanopyClusters.add(canopyCluster);

        // Adding the vectors to the new cluster center
        for (Vector vector : vectors) {
          context.write(newKMeansCluster, vector);
        }
      }

      Configuration conf = context.getConfiguration();
      Path outPath = new Path(conf.get("centers.path"));
      FileSystem fs = FileSystem.get(conf);

      if (fs.exists(outPath)) fs.delete(outPath, true);

      SequenceFile.Writer writer =
          SequenceFile.createWriter(
              fs, context.getConfiguration(), outPath, Cluster.class, Cluster.class);
      context.getCounter(Counter.CONVERGED).setValue(0);

      for (int i = 0; i < newKMeansClusters.size(); i++) {
        writer.append(newCanopyClusters.get(i), newKMeansClusters.get(i));

        if (newCanopyClusters.get(i).getIsCovered())
          context.getCounter(Counter.CONVERGED).increment(1);
      }

      writer.close();
    }
    @Override
    protected void setup(
        final Reducer<Text, CountofDoubleWritable, GeoWaveOutputKey, DistortionEntry>.Context
            context)
        throws IOException, InterruptedException {
      super.setup(context);
      final ScopedJobConfiguration config =
          new ScopedJobConfiguration(
              context.getConfiguration(),
              KMeansDistortionMapReduce.class,
              KMeansDistortionMapReduce.LOGGER);

      final int k = config.getInt(JumpParameters.Jump.COUNT_OF_CENTROIDS, -1);
      if (k > 0) {
        expectedK = k;
      }

      try {
        centroidManager =
            new CentroidManagerGeoWave<Object>(
                context, KMeansDistortionMapReduce.class, KMeansDistortionMapReduce.LOGGER);
      } catch (final Exception e) {
        KMeansDistortionMapReduce.LOGGER.warn("Unable to initialize centroid manager", e);
        throw new IOException("Unable to initialize centroid manager", e);
      }

      batchId =
          config.getString(GlobalParameters.Global.PARENT_BATCH_ID, centroidManager.getBatchId());
    }
 @SuppressWarnings("unchecked")
 protected void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   Configuration conf = context.getConfiguration();
   FileSystem fs = FileSystem.get(conf);
   // String path = conf.get(Constant.CONFIG_PATH);
   String numStr = conf.get(Constant.USER_TAG_THRESHOLD);
   if (numStr != null && !numStr.isEmpty()) {
     num = Integer.parseInt(numStr);
   }
   String hostPath = conf.get(Constant.HOST_PATH);
   Path host = new Path(hostPath);
   ArrayList<String> hosts = new ArrayList<String>();
   if (fs.exists(host)) {
     FSDataInputStream input = fs.open(host);
     BufferedReader reader = new BufferedReader(new InputStreamReader(input));
     String line = null;
     while ((line = reader.readLine()) != null) {
       hosts.add(line);
     }
   }
   // logger.info(hosts);
   String expiredStr = conf.get(Constant.JEDIS_USER_EXPIRED);
   if (expiredStr != null && !expiredStr.isEmpty()) {
     expiredTime = Integer.parseInt(expiredStr);
   }
   String userRedisName = conf.get("USER_REDIS_NAME");
   tryTime = Integer.parseInt(conf.get(Constant.TRY_TIMES));
   HashMap<String, String> map = new HashMap<String, String>();
   map.put(userRegion, userRedisName);
   RedisConfig.getInstance().setRegionRedisMap(map);
   RedisConfig.getInstance().loadhdfs(hosts);
   jedis = JedisUtils.getInstance();
 }
Пример #4
0
 public void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   try {
     record = VerticaOutputFormat.getValue(context.getConfiguration());
   } catch (Exception e) {
     throw new IOException(e);
   }
 }
    /** Configures the Reduce plan, the POPackage operator and the reporter thread */
    @SuppressWarnings("unchecked")
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);
      inIllustrator = inIllustrator(context);
      if (inIllustrator) pack = getPack(context);
      Configuration jConf = context.getConfiguration();
      SpillableMemoryManager.configure(ConfigurationUtil.toProperties(jConf));
      context
          .getConfiguration()
          .set(
              PigConstants.TASK_INDEX,
              Integer.toString(context.getTaskAttemptID().getTaskID().getId()));
      sJobContext = context;
      sJobConfInternal.set(context.getConfiguration());
      sJobConf = context.getConfiguration();
      try {
        PigContext.setPackageImportList(
            (ArrayList<String>) ObjectSerializer.deserialize(jConf.get("udf.import.list")));
        pigContext = (PigContext) ObjectSerializer.deserialize(jConf.get("pig.pigContext"));

        // This attempts to fetch all of the generated code from the distributed cache, and resolve
        // it
        SchemaTupleBackend.initialize(jConf, pigContext);

        if (rp == null)
          rp = (PhysicalPlan) ObjectSerializer.deserialize(jConf.get("pig.reducePlan"));
        stores = PlanHelper.getPhysicalOperators(rp, POStore.class);

        if (!inIllustrator)
          pack = (POPackage) ObjectSerializer.deserialize(jConf.get("pig.reduce.package"));
        // To be removed
        if (rp.isEmpty()) log.debug("Reduce Plan empty!");
        else {
          ByteArrayOutputStream baos = new ByteArrayOutputStream();
          rp.explain(baos);
          log.debug(baos.toString());
        }
        pigReporter = new ProgressableReporter();
        if (!(rp.isEmpty())) {
          roots = rp.getRoots().toArray(new PhysicalOperator[1]);
          leaf = rp.getLeaves().get(0);
        }

        // Get the UDF specific context
        MapRedUtil.setupUDFContext(jConf);

      } catch (IOException ioe) {
        String msg = "Problem while configuring reduce plan.";
        throw new RuntimeException(msg, ioe);
      }

      log.info(
          "Aliases being processed per job phase (AliasName[line,offset]): "
              + jConf.get("pig.alias.location"));

      Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get());
    }
 @Override
 protected void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   minSupport =
       context
           .getConfiguration()
           .getInt(DictionaryVectorizer.MIN_SUPPORT, DictionaryVectorizer.DEFAULT_MIN_SUPPORT);
   parseWeibo = context.getConfiguration().getBoolean(DictionaryVectorizer.PARSE_WEIBO, false);
 }
Пример #7
0
 @Override
 protected void setup(Reducer.Context context) throws IOException, InterruptedException {
   super.setup(context);
   Configuration conf = context.getConfiguration();
   iteration = conf.getInt("iteration", -1);
   System.out.println("Setting up Reducer " + iteration);
   centroids = KMeansUtil.readCentroids(conf);
   System.out.println("Reducer " + iteration + " Received " + centroids.size() + " centroids as");
   KMeansUtil.showOutput(centroids);
 }
 @Override
 protected void cleanup(Context context) throws IOException, InterruptedException {
   for (Map.Entry<Integer, List<VectorWritable>> entry : representativePoints.entrySet()) {
     IntWritable iw = new IntWritable(entry.getKey());
     for (VectorWritable vw : entry.getValue()) {
       context.write(iw, vw);
     }
   }
   super.cleanup(context);
 }
Пример #9
0
 @Override
 protected void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   Configuration conf = context.getConfiguration();
   FileSystem fs = FileSystem.get(conf);
   uBiasFW =
       fs.create(
           new Path(
               conf.get("hadoop.cache.path")
                   + "uBias/"
                   + ((long) (Math.random() * 1000000000))
                   + ".dat"));
 }
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      if (context.getCacheFiles() != null && context.getCacheFiles().length > 0) {

        InputStream in = new FileInputStream(new File("./mergedLineCounts2"));
        BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
        String line;

        while ((line = reader.readLine()) != null) {
          String[] split = line.split("\t");
          wordCounts.put(split[0], Integer.parseInt(split[1]));
        }
      }

      super.setup(context);
    }
  @Override
  protected void setup(Context context) throws IOException, InterruptedException {

    super.setup(context);
    Parameters params =
        new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));

    for (Pair<String, Long> e : PFPGrowth.readFList(context.getConfiguration())) {
      featureReverseMap.add(e.getFirst());
      freqList.add(e.getSecond());
    }

    maxHeapSize = Integer.valueOf(params.get(PFPGrowth.MAX_HEAPSIZE, "50"));
    minSupport = Integer.valueOf(params.get(PFPGrowth.MIN_SUPPORT, "3"));

    maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0);
    numFeatures = featureReverseMap.size();
    useFP2 = "true".equals(params.get(PFPGrowth.USE_FPG2));
  }
Пример #12
0
    /**
     * Will be called once all the intermediate keys and values are processed. So right place to
     * stop the reporter thread.
     */
    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
      super.cleanup(context);

      if (errorInReduce) {
        // there was an error in reduce - just return
        return;
      }

      if (PigMapReduce.sJobConfInternal.get().get("pig.stream.in.reduce", "false").equals("true")) {
        // If there is a stream in the pipeline we could
        // potentially have more to process - so lets
        // set the flag stating that all map input has been sent
        // already and then lets run the pipeline one more time
        // This will result in nothing happening in the case
        // where there is no stream in the pipeline
        rp.endOfAllInput = true;
        runPipeline(leaf);
      }

      if (!inIllustrator) {
        for (POStore store : stores) {
          if (!initialized) {
            MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context);
            store.setStoreImpl(impl);
            store.setUp();
          }
          store.tearDown();
        }
      }

      // Calling EvalFunc.finish()
      UDFFinishVisitor finisher =
          new UDFFinishVisitor(rp, new DependencyOrderWalker<PhysicalOperator, PhysicalPlan>(rp));
      try {
        finisher.visit();
      } catch (VisitorException e) {
        throw new IOException("Error trying to finish UDFs", e);
      }

      PhysicalOperator.setReporter(null);
      initialized = false;
    }
Пример #13
0
    @SuppressWarnings("deprecation")
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);
      _kMeansToCanopyMap = new HashMap<Cluster, Cluster>();

      Cluster canopyCluster = new Cluster();
      Cluster kMeansCluster = new Cluster();

      Configuration conf = context.getConfiguration();
      Path centroids = new Path(conf.get("centers.path"));
      FileSystem fs = FileSystem.get(conf);
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, centroids, conf);

      while (reader.next(canopyCluster, kMeansCluster)) {
        _kMeansToCanopyMap.put(new Cluster(kMeansCluster), new Cluster(canopyCluster));
      }

      reader.close();
    }
 @Override
 protected void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   Configuration conf = context.getConfiguration();
   representativePoints = RepresentativePointsMapper.getRepresentativePoints(conf);
 }
Пример #15
0
 @Override
 protected void cleanup(Context context) throws IOException, InterruptedException {
   super.cleanup(context);
   uBiasFW.close();
 }
Пример #16
0
 public void reduce(LongWritable key, Iterable<Text> values, Context context)
     throws IOException, InterruptedException {
   super.reduce(key, values, context);
 }
Пример #17
0
 @Override
 protected void cleanup(org.apache.hadoop.mapreduce.Reducer.Context context)
     throws IOException, InterruptedException {
   super.cleanup(context);
 }