@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Dirfferent type entity similarity MR"; job.setJobName(jobName); job.setJarByClass(DiffTypeSimilarity.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(DiffTypeSimilarity.SimilarityMapper.class); job.setReducerClass(DiffTypeSimilarity.SimilarityReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(IdPairGroupComprator.class); job.setPartitionerClass(IdPairPartitioner.class); Utility.setConfiguration(job.getConfiguration()); int numReducer = job.getConfiguration().getInt("dts.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Text analyzer MR"; job.setJobName(jobName); job.setJarByClass(TextAnalyzer.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(TextAnalyzer.AnalyzerMapper.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); Utility.setConfiguration(job.getConfiguration()); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
/** * Gets distance between geo location values * * @param trgItemTxt * @param srcItemTxt * @param field * @return */ private double getDistForLocation(String trgItemTxt, String srcItemTxt, Field field) { double dist = org.sifarish.util.Utility.getGeoDistance(trgItemTxt, srcItemTxt); dist /= field.getMaxDistance(); dist = dist <= 1.0 ? dist : 1.0; return dist; }