Пример #1
0
 private int median(ArrayList<Integer> l) {
   Collections.sort(l);
   int t = l.size();
   return l.get(t / 2);
 }
Пример #2
0
    @Override
    public void map(WritableComparable docID, Text docContents, Context context)
        throws IOException, InterruptedException {

      Matcher matcher = WORD_PATTERN.matcher(docContents.toString());
      Func func = funcFromNum(funcNum);

      // YOUR CODE HERE
      ArrayList<String> doc_words = new ArrayList<String>(); // Store all words within the document.
      ArrayList<Double> targetGram_pos =
          new ArrayList<
              Double>(); // Store the index of each occurrence of target word in the document

      DoublePair values = new DoublePair(); // DoublePair that store distance, ocurrences
      values.setDouble2(new Double(1.0)); // ocurrences = 0
      Text output = new Text();

      // Store each word within the document in doc_words
      while (matcher.find()) {
        doc_words.add(new String(matcher.group().toLowerCase()));
      }

      // Traverse the document and store each word within it in ArrayList doc_words, and at the same
      // time store the index of each occurence of target word within the document in targetGram_pos
      for (int i = 0; i < doc_words.size(); i++) {
        String word = doc_words.get(i);
        if (word.equals(targetGram)) targetGram_pos.add(new Double(i));
      }

      // Traverse the doc_words ArrayList and find the distance between each word within the
      // document and the target word
      // If there were not any ocurrence of target word distance is 0 to all words
      int index_tw = 0; // index target word
      Double distance = new Double(0); // store the distance between current word and target word
      for (int i = 0; i < doc_words.size(); i++) {
        if (targetGram_pos.size()
            == 0) { // If target word is not within the document, distance for all words is
          // Double.POSITIVE_INFINITY
          distance = Double.POSITIVE_INFINITY;
        } else {
          if (doc_words
              .get(i)
              .equals(
                  targetGram)) { // If word within the document is the same target word skip it and
            // go to the next word
            continue;
          }
          if (targetGram_pos.size() == 1) { // If there were just one entre of the target word
            distance = Math.abs(i - targetGram_pos.get(index_tw));
          } else {
            if (index_tw
                < targetGram_pos.size()
                    - 1) { // If this is not the LAST position of the ArrayList of indexes of the
              // target word
              if (Math.abs(i - targetGram_pos.get(index_tw))
                  > Math.abs(
                      i
                          - targetGram_pos.get(
                              index_tw
                                  + 1))) { // Compare the lowest distance between the nearest two
                // indexes
                index_tw++;
              }
            }
            distance = Math.abs(i - targetGram_pos.get(index_tw));
          }
        }
        values.setDouble1(
            new Double(func.f(distance))); // Evaluate dist on f(d) and store it on distance.d1
        output.set(doc_words.get(i)); // Output key is each word
        context.write(
            output, values); // key, value: key: each word, value:Pair of Double(distance, num of
        // co-currences)
      } // end for
    } // end map1