Ejemplo n.º 1
0
 /**
  * Same as getJobFeatureVector, but this one actually computes feature vector for all hashtags.
  *
  * @param input
  * @param output
  * @throws Exception
  */
 private static void getHashtagFeatureVector(String input, String output) throws Exception {
   Optimizedjob job =
       new Optimizedjob(new Configuration(), input, output, "Get feature vector for all hashtags");
   job.setClasses(HashtagMapper.class, HashtagReducer.class, null);
   job.setMapOutputClasses(Text.class, MapWritable.class);
   job.run();
 }
Ejemplo n.º 2
0
 /**
  * When we have feature vector for both #job and all other hashtags, we can use them to compute
  * inner products. The problem is how to share the feature vector for #job with all the mappers.
  * Here we're using the "Configuration" as the sharing mechanism, since the configuration object
  * is dispatched to all mappers at the beginning and used to setup the mappers.
  *
  * @param jobFeatureVector
  * @param input
  * @param output
  * @throws IOException
  * @throws ClassNotFoundException
  * @throws InterruptedException
  */
 private static void getHashtagSimilarities(String input, String output)
     throws IOException, ClassNotFoundException, InterruptedException {
   // Share the feature vector of #job to all mappers.
   Configuration conf = new Configuration();
   //	conf.setInt("mapredce.job.jvm.numtasks", -1);
   conf.set("mapred.child.java.opts", "-Xmx1024M");
   //	conf.setInt("dfs.block.size",327680);
   //	conf.setInt("mapred.max.split.size",327680);
   //	conf.setInt("mapred.min.split.size",327680);
   //	conf.setInt("mapred.map.tasks",16);
   Optimizedjob job =
       new Optimizedjob(
           conf, input, output, "Get similarity mapper between each and all other hashtags");
   // job.setClasses(SimilarityMapper.class, SimilarityReducer.class, SimilarityCombiner.class);
   job.setClasses(SimilarityMapper.class, SimilarityReducer.class, null);
   job.setMapOutputClasses(Text.class, IntWritable.class);
   job.run();
 }