public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String file = value.toString(); String[] lines = file.split("\n"); for (String line : lines) { if (line.contains("<author>") && line.contains("</author>")) { String author = line.substring(8, line.indexOf("</a")); word.set(author); context.write(word, one); } else if (line.contains("<author>")) { String author = line.substring(8); word.set(author); context.write(word, one); } } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); context.write(word, one); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); splitposition = line.indexOf("\t"); labels = line.substring(0, splitposition); content = line.substring(splitposition + 1, line.length()); if (content.length() <= 5) { word.set(labels); int counter = Integer.parseInt(content); context.write(word, new IntWritable(counter)); return; } labeltokens = labels.split(","); contenttokens = tokenizeDoc(content); for (String label : labelspace) { for (String token : contenttokens) { // filter token, denotes the needed events; word.set(label + " " + token); context.write(word, new IntWritable(-1)); } } }
public void map( Text key, Text val, org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { int i = 0, n = 0, j = 0, lj = 0, hj = 0; String tem = ""; initStopWordsMap(); // initialize the stop list String line = val.toString(); StringTokenizer itr = new StringTokenizer(line.toLowerCase(), tokenDelimiter); // set delimiter n = itr.countTokens(); cache = new String[n]; for (i = 0; i < n; i++) { cache[i] = new String(""); // initialize the cache } i = 0; while (itr.hasMoreTokens()) { cache[i] = itr.nextToken(); // padding the cache with the words of the content i++; } for (i = 0; i < n; i++) { keyWord = cache[i]; keyWord = keyWord.trim(); if (!hmStopWord.containsKey(keyWord)) { lj = i - 10; hj = i + 10; if (lj < 0) lj = 0; if (hj > n) hj = n; tem = " "; for (j = lj; j < hj; j++) tem += cache[j] + " "; location = new Text(); location.set(key.toString() + tem); context.write(new Text(keyWord), location); } } }