@Override public void reduce(Text key, Iterable<DoublePair> values, Context context) throws IOException, InterruptedException { // YOUR CODE HERE // Add DoublePair values(distance, ocurrences) for the document, before return map to the // master Double total_distance = new Double(0.0); Double total_ocu = new Double(0.0); for (DoublePair value : values) { total_distance += new Double(value.getDouble1()); total_ocu += new Double(value.getDouble2()); } context.write(key, new DoublePair(total_distance, total_ocu)); } // end combine1
@Override public void reduce(Text key, Iterable<DoublePair> values, Context context) throws IOException, InterruptedException { // YOUR CODE HERE // Add DoublePair values(distance, ocurrences) for the whole corpus Double total_distance = new Double(0.0); Double total_ocu = new Double(0.0); for (DoublePair value : values) { total_distance += value.getDouble1(); total_ocu += value.getDouble2(); } // Calculate occurrence rate Double result = new Double(0.0); if (total_distance != 0) result = ((total_distance * Math.pow(Math.log(total_distance), 3)) / total_ocu) * -1; context.write(new DoubleWritable(result), key); }
@Override public void map(WritableComparable docID, Text docContents, Context context) throws IOException, InterruptedException { Matcher matcher = WORD_PATTERN.matcher(docContents.toString()); Func func = funcFromNum(funcNum); // YOUR CODE HERE ArrayList<String> doc_words = new ArrayList<String>(); // Store all words within the document. ArrayList<Double> targetGram_pos = new ArrayList< Double>(); // Store the index of each occurrence of target word in the document DoublePair values = new DoublePair(); // DoublePair that store distance, ocurrences values.setDouble2(new Double(1.0)); // ocurrences = 0 Text output = new Text(); // Store each word within the document in doc_words while (matcher.find()) { doc_words.add(new String(matcher.group().toLowerCase())); } // Traverse the document and store each word within it in ArrayList doc_words, and at the same // time store the index of each occurence of target word within the document in targetGram_pos for (int i = 0; i < doc_words.size(); i++) { String word = doc_words.get(i); if (word.equals(targetGram)) targetGram_pos.add(new Double(i)); } // Traverse the doc_words ArrayList and find the distance between each word within the // document and the target word // If there were not any ocurrence of target word distance is 0 to all words int index_tw = 0; // index target word Double distance = new Double(0); // store the distance between current word and target word for (int i = 0; i < doc_words.size(); i++) { if (targetGram_pos.size() == 0) { // If target word is not within the document, distance for all words is // Double.POSITIVE_INFINITY distance = Double.POSITIVE_INFINITY; } else { if (doc_words .get(i) .equals( targetGram)) { // If word within the document is the same target word skip it and // go to the next word continue; } if (targetGram_pos.size() == 1) { // If there were just one entre of the target word distance = Math.abs(i - targetGram_pos.get(index_tw)); } else { if (index_tw < targetGram_pos.size() - 1) { // If this is not the LAST position of the ArrayList of indexes of the // target word if (Math.abs(i - targetGram_pos.get(index_tw)) > Math.abs( i - targetGram_pos.get( index_tw + 1))) { // Compare the lowest distance between the nearest two // indexes index_tw++; } } distance = Math.abs(i - targetGram_pos.get(index_tw)); } } values.setDouble1( new Double(func.f(distance))); // Evaluate dist on f(d) and store it on distance.d1 output.set(doc_words.get(i)); // Output key is each word context.write( output, values); // key, value: key: each word, value:Pair of Double(distance, num of // co-currences) } // end for } // end map1