コード例 #1
0
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line);
   while (tokenizer.hasMoreTokens()) {
     word.set(tokenizer.nextToken());
     context.write(word, one);
   }
 }
コード例 #2
0
 public void reduce(
     Text key,
     Iterable<Text> values,
     org.apache.hadoop.mapreduce.Reducer<Text, Text, Text, InvertedListWritable>.Context context)
     throws IOException, InterruptedException {
   InvertedListWritable invertedList = new InvertedListWritable();
   for (Text k : values) {
     StringTokenizer itr = new StringTokenizer(k.toString());
     url = "";
     abs = "";
     if (itr.hasMoreTokens()) url = itr.nextToken();
     while (itr.hasMoreTokens()) {
       abs += itr.nextToken() + " ";
     }
     dr = new ListURL(url, abs);
     invertedList.paddingValueKey(dr);
   }
   invertedList.quickSortNodeKey();
   context.write(key, invertedList);
 }
コード例 #3
0
    public void map(
        Text key,
        Text val,
        org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {
      int i = 0, n = 0, j = 0, lj = 0, hj = 0;
      String tem = "";

      initStopWordsMap(); // initialize  the stop list
      String line = val.toString();
      StringTokenizer itr =
          new StringTokenizer(line.toLowerCase(), tokenDelimiter); // set delimiter
      n = itr.countTokens();
      cache = new String[n];
      for (i = 0; i < n; i++) {
        cache[i] = new String(""); // initialize the cache
      }
      i = 0;
      while (itr.hasMoreTokens()) {
        cache[i] = itr.nextToken(); // padding the cache with the words of the content
        i++;
      }
      for (i = 0; i < n; i++) {
        keyWord = cache[i];
        keyWord = keyWord.trim();
        if (!hmStopWord.containsKey(keyWord)) {
          lj = i - 10;
          hj = i + 10;
          if (lj < 0) lj = 0;
          if (hj > n) hj = n;
          tem = " ";
          for (j = lj; j < hj; j++) tem += cache[j] + " ";
          location = new Text();
          location.set(key.toString() + tem);
          context.write(new Text(keyWord), location);
        }
      }
    }