public void reduce(Text key, Iterable<Pair> values, Context context) throws IOException, InterruptedException { int count = 0; int sum = 0; for (Pair value : values) { count += value.value1; sum += value.value2; } pair.setValue1(count); pair.setValue2(sum); context.write(key, pair); }
// map function to process 2gram public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); // break different fields in the line by tab // or one or more white space "\\s+" since 2gram the two words are not separated by tab String[] columns = line.split("\\s+"); // System.out.println(Arrays.toString(columns)); // convert to integer // http://stackoverflow.com/questions/8336607/how-to-check-if-the-value-is-integer-in-java try { Integer.parseInt(columns[2]); // convert lower case String word1 = columns[0].toLowerCase(); String word2 = columns[1].toLowerCase(); String[] listSubstrings = {"nu", "die", "kla"}; for (String sub : listSubstrings) { if (word1.contains(sub) || word2.contains(sub)) { String index = columns[2] + " " + sub; // System.out.println(index + " " + columns[3]); int volume = Integer.parseInt(columns[4]); yearSubstring.set(index); pair.setValue1(1); pair.setValue2(volume); context.write(yearSubstring, pair); } } } // end try catch (NumberFormatException e) { } // do nothing } // end map function