Пример #1
0
    @Override
    public void map(WritableComparable docID, Text docContents, Context context)
        throws IOException, InterruptedException {

      Matcher matcher = WORD_PATTERN.matcher(docContents.toString());
      Func func = this.funcFromNum(funcNum);
      String w = new String();
      String v = new String();
      ArrayList<Integer> tarIndice = new ArrayList<Integer>();

      double fScore, distance;

      int counter = 0;
      while (matcher.find()) {
        docC.put(new Integer(counter), matcher.group());
        counter++;
      }

      for (int i = 0; i < counter; i++) {

        for (int j = 0; j < n; j++) {
          if ((i + j) < counter) {
            w = w + " " + docC.get(new Integer(i + j));
          } else {
            break;
          }
        }
        w = w.toLowerCase().substring(1);
        if (w.equals(targetGram)) {
          tarIndice.add(new Integer(i));
        }
        w = new String();
      }

      for (int i = 0; i < counter; i++) {

        for (int j = 0; j < n; j++) {
          if ((i + j) < counter) {
            v = v + " " + docC.get(new Integer(i + j));
          } else {
            break;
          }
        }
        v = v.toLowerCase().substring(1);

        if (!v.equals(targetGram)) {
          word.set(v);
          distance = this.distance(tarIndice, i);
          fScore = func.f(distance);
          fSText.set(Double.toString(fScore) + " 1");
          context.write(word, fSText);
        }
        v = new String();
      }
    }
Пример #2
0
    @Override
    public void map(WritableComparable docID, Text docContents, Context context)
        throws IOException, InterruptedException {

      Matcher matcher = WORD_PATTERN.matcher(docContents.toString());
      Func func = funcFromNum(funcNum);

      // YOUR CODE HERE
      ArrayList<String> doc_words = new ArrayList<String>(); // Store all words within the document.
      ArrayList<Double> targetGram_pos =
          new ArrayList<
              Double>(); // Store the index of each occurrence of target word in the document

      DoublePair values = new DoublePair(); // DoublePair that store distance, ocurrences
      values.setDouble2(new Double(1.0)); // ocurrences = 0
      Text output = new Text();

      // Store each word within the document in doc_words
      while (matcher.find()) {
        doc_words.add(new String(matcher.group().toLowerCase()));
      }

      // Traverse the document and store each word within it in ArrayList doc_words, and at the same
      // time store the index of each occurence of target word within the document in targetGram_pos
      for (int i = 0; i < doc_words.size(); i++) {
        String word = doc_words.get(i);
        if (word.equals(targetGram)) targetGram_pos.add(new Double(i));
      }

      // Traverse the doc_words ArrayList and find the distance between each word within the
      // document and the target word
      // If there were not any ocurrence of target word distance is 0 to all words
      int index_tw = 0; // index target word
      Double distance = new Double(0); // store the distance between current word and target word
      for (int i = 0; i < doc_words.size(); i++) {
        if (targetGram_pos.size()
            == 0) { // If target word is not within the document, distance for all words is
          // Double.POSITIVE_INFINITY
          distance = Double.POSITIVE_INFINITY;
        } else {
          if (doc_words
              .get(i)
              .equals(
                  targetGram)) { // If word within the document is the same target word skip it and
            // go to the next word
            continue;
          }
          if (targetGram_pos.size() == 1) { // If there were just one entre of the target word
            distance = Math.abs(i - targetGram_pos.get(index_tw));
          } else {
            if (index_tw
                < targetGram_pos.size()
                    - 1) { // If this is not the LAST position of the ArrayList of indexes of the
              // target word
              if (Math.abs(i - targetGram_pos.get(index_tw))
                  > Math.abs(
                      i
                          - targetGram_pos.get(
                              index_tw
                                  + 1))) { // Compare the lowest distance between the nearest two
                // indexes
                index_tw++;
              }
            }
            distance = Math.abs(i - targetGram_pos.get(index_tw));
          }
        }
        values.setDouble1(
            new Double(func.f(distance))); // Evaluate dist on f(d) and store it on distance.d1
        output.set(doc_words.get(i)); // Output key is each word
        context.write(
            output, values); // key, value: key: each word, value:Pair of Double(distance, num of
        // co-currences)
      } // end for
    } // end map1