예제 #1
0
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {
      String file = value.toString();
      String[] lines = file.split("\n");

      for (String line : lines) {
        if (line.contains("<author>") && line.contains("</author>")) {
          String author = line.substring(8, line.indexOf("</a"));
          word.set(author);
          context.write(word, one);
        } else if (line.contains("<author>")) {
          String author = line.substring(8);
          word.set(author);
          context.write(word, one);
        }
      }
    }
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line);
   while (tokenizer.hasMoreTokens()) {
     word.set(tokenizer.nextToken());
     context.write(word, one);
   }
 }
예제 #3
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      String line = value.toString();

      splitposition = line.indexOf("\t");
      labels = line.substring(0, splitposition);
      content = line.substring(splitposition + 1, line.length());
      if (content.length() <= 5) {
        word.set(labels);
        int counter = Integer.parseInt(content);
        context.write(word, new IntWritable(counter));
        return;
      }
      labeltokens = labels.split(",");
      contenttokens = tokenizeDoc(content);

      for (String label : labelspace) {
        for (String token : contenttokens) {
          // filter token, denotes the needed events;
          word.set(label + " " + token);
          context.write(word, new IntWritable(-1));
        }
      }
    }
    public void map(
        Text key,
        Text val,
        org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {
      int i = 0, n = 0, j = 0, lj = 0, hj = 0;
      String tem = "";

      initStopWordsMap(); // initialize  the stop list
      String line = val.toString();
      StringTokenizer itr =
          new StringTokenizer(line.toLowerCase(), tokenDelimiter); // set delimiter
      n = itr.countTokens();
      cache = new String[n];
      for (i = 0; i < n; i++) {
        cache[i] = new String(""); // initialize the cache
      }
      i = 0;
      while (itr.hasMoreTokens()) {
        cache[i] = itr.nextToken(); // padding the cache with the words of the content
        i++;
      }
      for (i = 0; i < n; i++) {
        keyWord = cache[i];
        keyWord = keyWord.trim();
        if (!hmStopWord.containsKey(keyWord)) {
          lj = i - 10;
          hj = i + 10;
          if (lj < 0) lj = 0;
          if (hj > n) hj = n;
          tem = " ";
          for (j = lj; j < hj; j++) tem += cache[j] + " ";
          location = new Text();
          location.set(key.toString() + tem);
          context.write(new Text(keyWord), location);
        }
      }
    }