public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); context.write(word, one); } }
public void reduce( Text key, Iterable<Text> values, org.apache.hadoop.mapreduce.Reducer<Text, Text, Text, InvertedListWritable>.Context context) throws IOException, InterruptedException { InvertedListWritable invertedList = new InvertedListWritable(); for (Text k : values) { StringTokenizer itr = new StringTokenizer(k.toString()); url = ""; abs = ""; if (itr.hasMoreTokens()) url = itr.nextToken(); while (itr.hasMoreTokens()) { abs += itr.nextToken() + " "; } dr = new ListURL(url, abs); invertedList.paddingValueKey(dr); } invertedList.quickSortNodeKey(); context.write(key, invertedList); }
public void map( Text key, Text val, org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { int i = 0, n = 0, j = 0, lj = 0, hj = 0; String tem = ""; initStopWordsMap(); // initialize the stop list String line = val.toString(); StringTokenizer itr = new StringTokenizer(line.toLowerCase(), tokenDelimiter); // set delimiter n = itr.countTokens(); cache = new String[n]; for (i = 0; i < n; i++) { cache[i] = new String(""); // initialize the cache } i = 0; while (itr.hasMoreTokens()) { cache[i] = itr.nextToken(); // padding the cache with the words of the content i++; } for (i = 0; i < n; i++) { keyWord = cache[i]; keyWord = keyWord.trim(); if (!hmStopWord.containsKey(keyWord)) { lj = i - 10; hj = i + 10; if (lj < 0) lj = 0; if (hj > n) hj = n; tem = " "; for (j = lj; j < hj; j++) tem += cache[j] + " "; location = new Text(); location.set(key.toString() + tem); context.write(new Text(keyWord), location); } } }