Exemplo n.º 1
0
    protected void cleanup(Context context) throws IOException, InterruptedException {

      Map<Text, IntWritable> sortedMap = sortByValues(countMap);

      int counter = 0;
      for (Text key : sortedMap.keySet()) {
        if (counter++ == 100) {
          break;
        }
        context.write(key, sortedMap.get(key));
      }
    }
Exemplo n.º 2
0
 public void reduce(Text key, Iterable<IntWritable> values, Context context)
     throws IOException, InterruptedException {
   int sum = 0;
   for (IntWritable val : values) {
     sum += val.get();
   }
   countMap.put(new Text(key), new IntWritable(sum));
 }
Exemplo n.º 3
0
    public static <K extends Text, V extends IntWritable> Map<K, V> sortByValues(Map<K, V> map) {
      List<Map.Entry<K, V>> entries = new LinkedList<Map.Entry<K, V>>(map.entrySet());

      Collections.sort(
          entries,
          new Comparator<Map.Entry<K, V>>() {

            public int compare(Map.Entry<K, V> o1, Map.Entry<K, V> o2) {
              return o2.getValue().compareTo(o1.getValue());
            }
          });

      Map<K, V> sortedMap = new LinkedHashMap<K, V>();

      for (Map.Entry<K, V> entry : entries) {
        sortedMap.put(entry.getKey(), entry.getValue());
      }

      return sortedMap;
    }
    public void map(
        Text key,
        Text val,
        org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {
      int i = 0, n = 0, j = 0, lj = 0, hj = 0;
      String tem = "";

      initStopWordsMap(); // initialize  the stop list
      String line = val.toString();
      StringTokenizer itr =
          new StringTokenizer(line.toLowerCase(), tokenDelimiter); // set delimiter
      n = itr.countTokens();
      cache = new String[n];
      for (i = 0; i < n; i++) {
        cache[i] = new String(""); // initialize the cache
      }
      i = 0;
      while (itr.hasMoreTokens()) {
        cache[i] = itr.nextToken(); // padding the cache with the words of the content
        i++;
      }
      for (i = 0; i < n; i++) {
        keyWord = cache[i];
        keyWord = keyWord.trim();
        if (!hmStopWord.containsKey(keyWord)) {
          lj = i - 10;
          hj = i + 10;
          if (lj < 0) lj = 0;
          if (hj > n) hj = n;
          tem = " ";
          for (j = lj; j < hj; j++) tem += cache[j] + " ";
          location = new Text();
          location.set(key.toString() + tem);
          context.write(new Text(keyWord), location);
        }
      }
    }
  /*
   * Init of stop words hash map
   */
  public static void initStopWordsMap() {

    for (int i = 0; i < stopWords.length; i++) hmStopWord.put(stopWords[i], null);
  }