@Override public void map(WritableComparable docID, Text docContents, Context context) throws IOException, InterruptedException { Matcher matcher = WORD_PATTERN.matcher(docContents.toString()); Func func = this.funcFromNum(funcNum); String w = new String(); String v = new String(); ArrayList<Integer> tarIndice = new ArrayList<Integer>(); double fScore, distance; int counter = 0; while (matcher.find()) { docC.put(new Integer(counter), matcher.group()); counter++; } for (int i = 0; i < counter; i++) { for (int j = 0; j < n; j++) { if ((i + j) < counter) { w = w + " " + docC.get(new Integer(i + j)); } else { break; } } w = w.toLowerCase().substring(1); if (w.equals(targetGram)) { tarIndice.add(new Integer(i)); } w = new String(); } for (int i = 0; i < counter; i++) { for (int j = 0; j < n; j++) { if ((i + j) < counter) { v = v + " " + docC.get(new Integer(i + j)); } else { break; } } v = v.toLowerCase().substring(1); if (!v.equals(targetGram)) { word.set(v); distance = this.distance(tarIndice, i); fScore = func.f(distance); fSText.set(Double.toString(fScore) + " 1"); context.write(word, fSText); } v = new String(); } }
@Override public void map(WritableComparable docID, Text docContents, Context context) throws IOException, InterruptedException { Matcher matcher = WORD_PATTERN.matcher(docContents.toString()); Func func = funcFromNum(funcNum); // YOUR CODE HERE ArrayList<String> doc_words = new ArrayList<String>(); // Store all words within the document. ArrayList<Double> targetGram_pos = new ArrayList< Double>(); // Store the index of each occurrence of target word in the document DoublePair values = new DoublePair(); // DoublePair that store distance, ocurrences values.setDouble2(new Double(1.0)); // ocurrences = 0 Text output = new Text(); // Store each word within the document in doc_words while (matcher.find()) { doc_words.add(new String(matcher.group().toLowerCase())); } // Traverse the document and store each word within it in ArrayList doc_words, and at the same // time store the index of each occurence of target word within the document in targetGram_pos for (int i = 0; i < doc_words.size(); i++) { String word = doc_words.get(i); if (word.equals(targetGram)) targetGram_pos.add(new Double(i)); } // Traverse the doc_words ArrayList and find the distance between each word within the // document and the target word // If there were not any ocurrence of target word distance is 0 to all words int index_tw = 0; // index target word Double distance = new Double(0); // store the distance between current word and target word for (int i = 0; i < doc_words.size(); i++) { if (targetGram_pos.size() == 0) { // If target word is not within the document, distance for all words is // Double.POSITIVE_INFINITY distance = Double.POSITIVE_INFINITY; } else { if (doc_words .get(i) .equals( targetGram)) { // If word within the document is the same target word skip it and // go to the next word continue; } if (targetGram_pos.size() == 1) { // If there were just one entre of the target word distance = Math.abs(i - targetGram_pos.get(index_tw)); } else { if (index_tw < targetGram_pos.size() - 1) { // If this is not the LAST position of the ArrayList of indexes of the // target word if (Math.abs(i - targetGram_pos.get(index_tw)) > Math.abs( i - targetGram_pos.get( index_tw + 1))) { // Compare the lowest distance between the nearest two // indexes index_tw++; } } distance = Math.abs(i - targetGram_pos.get(index_tw)); } } values.setDouble1( new Double(func.f(distance))); // Evaluate dist on f(d) and store it on distance.d1 output.set(doc_words.get(i)); // Output key is each word context.write( output, values); // key, value: key: each word, value:Pair of Double(distance, num of // co-currences) } // end for } // end map1