@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); // assume each input page of interest has one keyword, followed by a tab, // and multiple document ids separated by comma. String[] fields = line.split("\t", -1); String docId = fields[0]; double pageRank = Double.valueOf(fields[1]); // add this record to local top N map topN.put(pageRank, new Text(docId + "\t" + pageRank)); // if we have more than N records in top N now, remove the lowest if (topN.size() > N) { topN.remove(topN.firstKey()); } }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Text text : topN.values()) { context.write(NullWritable.get(), text); } }