Exemplo n.º 1
0
 @Override
 public void reduce(Text key, Iterable<DoublePair> values, Context context)
     throws IOException, InterruptedException {
   // YOUR CODE HERE
   // Add DoublePair values(distance, ocurrences) for the document, before return map to the
   // master
   Double total_distance = new Double(0.0);
   Double total_ocu = new Double(0.0);
   for (DoublePair value : values) {
     total_distance += new Double(value.getDouble1());
     total_ocu += new Double(value.getDouble2());
   }
   context.write(key, new DoublePair(total_distance, total_ocu));
 } // end combine1
Exemplo n.º 2
0
    @Override
    public void reduce(Text key, Iterable<DoublePair> values, Context context)
        throws IOException, InterruptedException {
      // YOUR CODE HERE
      // Add DoublePair values(distance, ocurrences) for the whole corpus
      Double total_distance = new Double(0.0);
      Double total_ocu = new Double(0.0);
      for (DoublePair value : values) {
        total_distance += value.getDouble1();
        total_ocu += value.getDouble2();
      }

      // Calculate occurrence rate
      Double result = new Double(0.0);
      if (total_distance != 0)
        result = ((total_distance * Math.pow(Math.log(total_distance), 3)) / total_ocu) * -1;

      context.write(new DoubleWritable(result), key);
    }
Exemplo n.º 3
0
    @Override
    public void map(WritableComparable docID, Text docContents, Context context)
        throws IOException, InterruptedException {

      Matcher matcher = WORD_PATTERN.matcher(docContents.toString());
      Func func = funcFromNum(funcNum);

      // YOUR CODE HERE
      ArrayList<String> doc_words = new ArrayList<String>(); // Store all words within the document.
      ArrayList<Double> targetGram_pos =
          new ArrayList<
              Double>(); // Store the index of each occurrence of target word in the document

      DoublePair values = new DoublePair(); // DoublePair that store distance, ocurrences
      values.setDouble2(new Double(1.0)); // ocurrences = 0
      Text output = new Text();

      // Store each word within the document in doc_words
      while (matcher.find()) {
        doc_words.add(new String(matcher.group().toLowerCase()));
      }

      // Traverse the document and store each word within it in ArrayList doc_words, and at the same
      // time store the index of each occurence of target word within the document in targetGram_pos
      for (int i = 0; i < doc_words.size(); i++) {
        String word = doc_words.get(i);
        if (word.equals(targetGram)) targetGram_pos.add(new Double(i));
      }

      // Traverse the doc_words ArrayList and find the distance between each word within the
      // document and the target word
      // If there were not any ocurrence of target word distance is 0 to all words
      int index_tw = 0; // index target word
      Double distance = new Double(0); // store the distance between current word and target word
      for (int i = 0; i < doc_words.size(); i++) {
        if (targetGram_pos.size()
            == 0) { // If target word is not within the document, distance for all words is
          // Double.POSITIVE_INFINITY
          distance = Double.POSITIVE_INFINITY;
        } else {
          if (doc_words
              .get(i)
              .equals(
                  targetGram)) { // If word within the document is the same target word skip it and
            // go to the next word
            continue;
          }
          if (targetGram_pos.size() == 1) { // If there were just one entre of the target word
            distance = Math.abs(i - targetGram_pos.get(index_tw));
          } else {
            if (index_tw
                < targetGram_pos.size()
                    - 1) { // If this is not the LAST position of the ArrayList of indexes of the
              // target word
              if (Math.abs(i - targetGram_pos.get(index_tw))
                  > Math.abs(
                      i
                          - targetGram_pos.get(
                              index_tw
                                  + 1))) { // Compare the lowest distance between the nearest two
                // indexes
                index_tw++;
              }
            }
            distance = Math.abs(i - targetGram_pos.get(index_tw));
          }
        }
        values.setDouble1(
            new Double(func.f(distance))); // Evaluate dist on f(d) and store it on distance.d1
        output.set(doc_words.get(i)); // Output key is each word
        context.write(
            output, values); // key, value: key: each word, value:Pair of Double(distance, num of
        // co-currences)
      } // end for
    } // end map1