@Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Dirfferent type entity similarity MR";
    job.setJobName(jobName);

    job.setJarByClass(DiffTypeSimilarity.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(DiffTypeSimilarity.SimilarityMapper.class);
    job.setReducerClass(DiffTypeSimilarity.SimilarityReducer.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(IdPairGroupComprator.class);
    job.setPartitionerClass(IdPairPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());

    int numReducer = job.getConfiguration().getInt("dts.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }
Example #2
0
  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Text analyzer MR";
    job.setJobName(jobName);

    job.setJarByClass(TextAnalyzer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(TextAnalyzer.AnalyzerMapper.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    Utility.setConfiguration(job.getConfiguration());
    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }
 /**
  * Gets distance between geo location values
  *
  * @param trgItemTxt
  * @param srcItemTxt
  * @param field
  * @return
  */
 private double getDistForLocation(String trgItemTxt, String srcItemTxt, Field field) {
   double dist = org.sifarish.util.Utility.getGeoDistance(trgItemTxt, srcItemTxt);
   dist /= field.getMaxDistance();
   dist = dist <= 1.0 ? dist : 1.0;
   return dist;
 }