private int median(ArrayList<Integer> l) { Collections.sort(l); int t = l.size(); return l.get(t / 2); }
@Override public void map(WritableComparable docID, Text docContents, Context context) throws IOException, InterruptedException { Matcher matcher = WORD_PATTERN.matcher(docContents.toString()); Func func = funcFromNum(funcNum); // YOUR CODE HERE ArrayList<String> doc_words = new ArrayList<String>(); // Store all words within the document. ArrayList<Double> targetGram_pos = new ArrayList< Double>(); // Store the index of each occurrence of target word in the document DoublePair values = new DoublePair(); // DoublePair that store distance, ocurrences values.setDouble2(new Double(1.0)); // ocurrences = 0 Text output = new Text(); // Store each word within the document in doc_words while (matcher.find()) { doc_words.add(new String(matcher.group().toLowerCase())); } // Traverse the document and store each word within it in ArrayList doc_words, and at the same // time store the index of each occurence of target word within the document in targetGram_pos for (int i = 0; i < doc_words.size(); i++) { String word = doc_words.get(i); if (word.equals(targetGram)) targetGram_pos.add(new Double(i)); } // Traverse the doc_words ArrayList and find the distance between each word within the // document and the target word // If there were not any ocurrence of target word distance is 0 to all words int index_tw = 0; // index target word Double distance = new Double(0); // store the distance between current word and target word for (int i = 0; i < doc_words.size(); i++) { if (targetGram_pos.size() == 0) { // If target word is not within the document, distance for all words is // Double.POSITIVE_INFINITY distance = Double.POSITIVE_INFINITY; } else { if (doc_words .get(i) .equals( targetGram)) { // If word within the document is the same target word skip it and // go to the next word continue; } if (targetGram_pos.size() == 1) { // If there were just one entre of the target word distance = Math.abs(i - targetGram_pos.get(index_tw)); } else { if (index_tw < targetGram_pos.size() - 1) { // If this is not the LAST position of the ArrayList of indexes of the // target word if (Math.abs(i - targetGram_pos.get(index_tw)) > Math.abs( i - targetGram_pos.get( index_tw + 1))) { // Compare the lowest distance between the nearest two // indexes index_tw++; } } distance = Math.abs(i - targetGram_pos.get(index_tw)); } } values.setDouble1( new Double(func.f(distance))); // Evaluate dist on f(d) and store it on distance.d1 output.set(doc_words.get(i)); // Output key is each word context.write( output, values); // key, value: key: each word, value:Pair of Double(distance, num of // co-currences) } // end for } // end map1