Пример #1
0
    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
      for (Entry<String, ArrayList<Long>> entry : args.entrySet()) {
        context.write(
            new Text(entry.getKey()),
            new Text(
                entry.getValue().get(0)
                    + " \t "
                    + entry.getValue().get(1)
                    + "\t"
                    + entry.getValue().size()));
      }

      context.write(new Text("0"), new Text(String.valueOf(usrCount)));
    }
    @Override
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {
      String line = value.toString();
      StringTokenizer tokenizer = new StringTokenizer(line, ": ");

      Boolean firstPageHandled = false;

      while (tokenizer.hasMoreTokens()) {
        String pageNum = tokenizer.nextToken().trim().toLowerCase();

        if (pageNum == null || pageNum.isEmpty()) {
          continue;
        }

        int outKey = Integer.parseInt(pageNum);
        int numLinks = 1;

        if (firstPageHandled) {
        } else {
          numLinks = 0;
          firstPageHandled = true;
        }

        context.write(new IntWritable(outKey), new IntWritable(numLinks));
      }
    }
  @Override
  protected void reduce(Text key, Iterable<LongWritable> values, Context context)
      throws IOException, InterruptedException {

    /*
     * Added by Xudong Zhang
     */
    if (parseWeibo) {
      String word = key.toString();
      if (!ChineseUtils.allChinese(word)) {
        context.getCounter("MyCounter", "NumWordsFilteredByWeiboParser").increment(1);
        return;
      }
    }

    long sum = 0;
    for (LongWritable value : values) {
      sum += value.get();
    }
    if (sum >= minSupport) {
      context.write(key, new LongWritable(sum));
    } else {
      context.getCounter("MyCounter", "NumWordsLessThanMinSupport").increment(1);
    }
  }
 public void reduce(Text key, Iterable<IntWritable> values, Context output)
     throws IOException, InterruptedException {
   int sum = 0;
   while (values.iterator().hasNext()) {
     sum += values.iterator().next().get();
   }
   output.write(key, new IntWritable(sum));
 }
Пример #5
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      String line = value.toString();
      String[] arr = line.split(",");

      word.set(arr[0]);
      context.write(word, one);
    }
 @Override
 protected void cleanup(Context context) throws IOException, InterruptedException {
   for (Pair<Integer, Integer> item : linksMap) {
     Integer[] items = {item.second, item.first};
     IntArrayWritable val = new IntArrayWritable(items);
     context.write(NullWritable.get(), val);
   }
 }
Пример #7
0
 @Override
 protected void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String values[] = StringUtils.split(value.toString(), "\t");
   for (String word : values) {
     context.write(new Text(word), new IntWritable(1));
   }
 }
Пример #8
0
 public void reduce(Text key, Iterable<IntWritable> values, Context context)
     throws IOException, InterruptedException {
   int i = 0;
   for (IntWritable value : values) {
     i++;
   }
   context.write(key, new IntWritable(i));
 }
Пример #9
0
 @Override
 protected void reduce(Text key, Iterable<IntWritable> value, Context context)
     throws IOException, InterruptedException {
   int count = 0;
   for (IntWritable i : value) {
     count += i.get();
   }
   t.set(count + "");
   context.write(key, t);
 }
Пример #10
0
    /* (non-Javadoc)
     * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context)
     */
    @Override
    protected void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      items = value.toString().split(fieldDelimRegex);
      outKey.initialize();
      outVal.initialize();
      if (classCondtionWeighted) {
        trainEntityId = items[2];
        testEntityId = items[0];
        rank = Integer.parseInt(items[3]);
        trainClassAttr = items[4];
        trainingFeaturePostProb = Double.parseDouble(items[5]);
        if (isValidationMode) {
          // validation mode
          testClassAttr = items[1];
          outKey.add(testEntityId, testClassAttr, rank);
        } else {
          // prediction mode
          outKey.add(testEntityId, rank);
        }
        outVal.add(trainEntityId, rank, trainClassAttr, trainingFeaturePostProb);
      } else {
        int index = 0;
        trainEntityId = items[index++];
        testEntityId = items[index++];
        rank = Integer.parseInt(items[index++]);
        trainClassAttr = items[index++];
        if (isValidationMode) {
          // validation mode
          testClassAttr = items[index++];
        }
        outVal.add(trainEntityId, rank, trainClassAttr);

        // for linear regression add numeric input field
        if (isLinearRegression) {
          trainRegrNumFld = items[index++];
          outVal.add(trainRegrNumFld);

          testRegrNumFld = items[index++];
          if (isValidationMode) {
            outKey.add(testEntityId, testClassAttr, testRegrNumFld, rank);
          } else {
            outKey.add(testEntityId, testRegrNumFld, rank);
          }
          outKey.add(testRegrNumFld);
        } else {
          if (isValidationMode) {
            outKey.add(testEntityId, testClassAttr, rank);
          } else {
            outKey.add(testEntityId, rank);
          }
        }
      }
      context.write(outKey, outVal);
    }
    @Override
    public void reduce(IntWritable key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
      Integer sum = 0;

      for (IntWritable val : values) {
        sum += val.get();
      }

      context.write(key, new IntWritable(sum));
    }
    public void map(LongWritable key, Text value, Context output)
        throws IOException, InterruptedException {
      parser.parse(value.toString());

      String param = jobconf.get("param.color");

      if (parser.getColor().equals(param)) {
        suit = parser.getSuit();
        output.write(new Text(suit), one);
      }
    }
Пример #13
0
  protected void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {

    String[] words = value.toString().split(",");
    StringBuilder stringBuilder = new StringBuilder();

    stringBuilder.append(words[0]);
    stringBuilder.append(",");
    stringBuilder.append(words[1]);
    String fstring = stringBuilder.toString();
    context.write(new CustomWritable(Integer.parseInt(words[2]), 1), new Text(fstring));
  }
Пример #14
0
    @Override
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      Hashtable<String, Integer> wordCounts = new Hashtable<String, Integer>();
      ArrayList docName = new ArrayList<String>();
      LinkedList wordName = new LinkedList<String>();
      while (values.iterator().hasNext()) {
        String[] items = values.iterator().next().toString().split("@");
        if (!wordName.contains(items[0])) {
          wordName.add(items[0]);
        }
        String[] keys = items[1].split(":|,");
        for (int i = 0; i < keys.length; i += 2) {
          if (!docName.contains(keys[i])) {
            docName.add(keys[i]);
            wordCounts.put(keys[i], 0);
          }
          int tmp = wordCounts.get(keys[i]);
          tmp += Integer.parseInt(keys[i + 1]);
          wordCounts.put(keys[i], tmp);
        }
      }

      for (int i = 0; i < docName.size() - 1; ++i) {
        for (int j = i + 1; j < docName.size(); ++j) {
          if (wordCounts.get(docName.get(i)) < wordCounts.get(docName.get(j))) {
            String stmp = docName.get(i).toString();
            docName.set(i, docName.get(j).toString());
            docName.set(j, stmp);
          }
        }
      }

      String retKey = wordName.get(0).toString();
      for (int i = 1; i < wordName.size(); ++i) {
        retKey += "," + wordName.get(i);
      }

      String retValue =
          ""; // ="\n" + docName.get(0).toString() + ":" +
              // wordCounts.get(docName.get(0).toString());
      for (int i = 0; i < docName.size(); ++i) {
        retValue +=
            "\n" + docName.get(i).toString() + ": " + wordCounts.get(docName.get(i).toString());
      }
      context.write(new Text(retKey), new Text(retValue));
    }
Пример #15
0
    @Override
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {

      // TODO Auto-generated method stub
      String[] line = value.toString().split("\u0001");
      if (line.length < 6) {
        return;
      }

      String id = line[0];
      String peerid = line[2];
      String filename = line[5];
      if ((id.equals("2") || id.equals("724")) && Pattern.matches(patternFilename, filename)) {
        context.write(new Text(id + "\t" + peerid), new IntWritable(1));
      }
    }
    @Override
    protected void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {

      parser.parse(value);
      if (parser.isValidTemperature()) {
        int airTemperature = parser.getAirTemperature();
        context.write(new Text(parser.getYear()), new IntWritable(airTemperature));
      } else if (parser.isMalformedTemperature()) {
        System.err.println("Ignoring possibly corrupt input: " + value);
        context.getCounter(Temperature.MALFORMED).increment(1);
      } else if (parser.isMissingTemperature()) {
        context.getCounter(Temperature.MISSING).increment(1);
      }

      // dynamic counter
      context.getCounter("TemperatureQuality", parser.getQuality()).increment(1);
    }
Пример #17
0
    @Override
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {
      Configuration conf = context.getConfiguration();
      String keywords = conf.get("keyword");
      // System.out.println(keywords);
      String line = value.toString();
      String[] terms = line.split("\t"); // terms[0] is required keyword
      String[] multiKeywords = keywords.split(" ");
      boolean foundflag = false;
      for (int i = 0; i < multiKeywords.length; ++i) {
        if (multiKeywords[i].equals(terms[0])) {
          foundflag = true;
        }
      }

      if (foundflag) {
        context.write(new Text("Keywords"), new Text(terms[0] + "@" + terms[1].toString()));
      }
    }
Пример #18
0
  @Override
  public void run() {
    // TODO Auto-generated method stub
    System.out.println("write:");
    for (Entry<Integer, float[]> entry : maps.entrySet()) {
      int entryKey = entry.getKey();
      float[] value = entry.getValue();
      String result = "";
      for (int i = 0; i < value.length; i++) {
        result += value[i] + ",";
      }
      result = result.substring(0, result.lastIndexOf(","));
      try {

        context.write(new Text(String.valueOf(entryKey)), new Text(result));
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
  }
  private void writeMapUnionResult(
      Context context,
      String appId,
      String accountId,
      String platform,
      String channel,
      String gameServer,
      String onlineRecords,
      String playerType)
      throws IOException, InterruptedException {

    String[] keyFields = new String[] {appId, accountId, platform, gameServer};
    String[] valFields = new String[] {channel, onlineRecords};

    mapKeyObj.setOutFields(keyFields);

    mapValObj.setOutFields(valFields);
    mapValObj.setSuffix(playerType);

    context.write(mapKeyObj, mapValObj);
  }
    @Override
    public void reduce(NullWritable key, Iterable<IntArrayWritable> values, Context context)
        throws IOException, InterruptedException {
      for (IntArrayWritable val : values) {

        IntWritable[] pair = (IntWritable[]) val.toArray();
        Integer pageId = pair[0].get();
        Integer count = pair[1].get();

        linksMap.add(new Pair<Integer, Integer>(count, pageId));

        if (linksMap.size() > N) {
          linksMap.remove(linksMap.first());
        }
      }

      for (Pair<Integer, Integer> item : linksMap) {
        IntWritable id = new IntWritable(item.second);
        IntWritable value = new IntWritable(item.first);
        context.write(id, value);
      }
    }
Пример #21
0
  public void map(Object key, Text value, Context context)
      throws IOException, InterruptedException {
    String[] feature = value.toString().split("\t");

    double dist = 0;
    double[] point = new double[feature.length - 3];
    for (int j = 3; j < feature.length; j++) {
      point[j - 3] = Double.parseDouble(feature[j].trim());
    }

    int len = centroidsList.length;
    double[] diffList = new double[len];
    double minDiff = Double.MAX_VALUE;
    int minIndex = -1;
    for (int i = 0; i < len; i++) {
      try {
        diffList[i] = calcDist(centroidsList[i], point);
      } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
      }
      if (diffList[i] < minDiff) {
        minDiff = diffList[i];
        minIndex = i;
      }
    }
    // DoubleWritable minDiffDW = new DoubleWritable(minDiff);
    Text keyWithCentroid = new Text("" + minIndex);

    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < point.length; i++) {
      sb.append(point[i] + "\t");
    }
    String pointStr = sb.toString().trim();

    Text pointText = new Text(1 + "#" + pointStr);
    context.write(keyWithCentroid, pointText);
  }
  public void map(Object key, Text value, Context context)
      throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    FileSplit split = (FileSplit) context.getInputSplit();
    String rootFolder = split.getPath().getParent().toString();
    String uri = rootFolder + "/" + value.toString();
    // C:/hadoopsample/input/images+"/"+image1.jpg";

    FileSystem fs = FileSystem.get(URI.create(uri), conf);
    FSDataInputStream in = null;
    try {
      in = fs.open(new Path(uri));
      java.io.ByteArrayOutputStream bout = new ByteArrayOutputStream();
      byte buffer[] = new byte[1024 * 1024];

      while (in.read(buffer, 0, buffer.length) >= 0) {
        bout.write(buffer);
      }
      context.write(value, new BytesWritable(bout.toByteArray()));
    } finally {
      IOUtils.closeStream(in);
    }
  }
Пример #23
0
    /* (non-Javadoc)
     * @see org.apache.hadoop.mapreduce.Reducer#reduce(KEYIN, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context)
     */
    protected void reduce(Tuple key, Iterable<Tuple> values, Context context)
        throws IOException, InterruptedException {
      if (stBld.length() > 0) {
        stBld.delete(0, stBld.length());
      }
      testEntityId = key.getString(0);
      stBld.append(testEntityId);

      // collect nearest neighbors
      count = 0;
      neighborhood.initialize();
      for (Tuple value : values) {
        int index = 0;
        trainEntityId = value.getString(index++);
        distance = value.getInt(index++);
        trainClassValue = value.getString(index++);
        if (classCondtionWeighted && neighborhood.IsInClassificationMode()) {
          trainingFeaturePostProb = value.getDouble(index++);
          if (inverseDistanceWeighted) {
            neighborhood.addNeighbor(
                trainEntityId, distance, trainClassValue, trainingFeaturePostProb, true);
          } else {
            neighborhood.addNeighbor(
                trainEntityId, distance, trainClassValue, trainingFeaturePostProb);
          }
        } else {
          Neighborhood.Neighbor neighbor =
              neighborhood.addNeighbor(trainEntityId, distance, trainClassValue);
          if (neighborhood.isInLinearRegressionMode()) {
            neighbor.setRegrInputVar(Double.parseDouble(value.getString(index++)));
          }
        }
        if (++count == topMatchCount) {
          break;
        }
      }
      if (neighborhood.isInLinearRegressionMode()) {
        String testRegrNumFld = isValidationMode ? key.getString(2) : key.getString(1);
        neighborhood.withRegrInputVar(Double.parseDouble(testRegrNumFld));
      }

      // class distribution
      neighborhood.processClassDitribution();
      if (outputClassDistr && neighborhood.IsInClassificationMode()) {
        if (classCondtionWeighted) {
          Map<String, Double> classDistr = neighborhood.getWeightedClassDitribution();
          double thisScore;
          for (String classVal : classDistr.keySet()) {
            thisScore = classDistr.get(classVal);
            // LOG.debug("classVal:" + classVal + " thisScore:" + thisScore);
            stBld.append(fieldDelim).append(classVal).append(fieldDelim).append(thisScore);
          }
        } else {
          Map<String, Integer> classDistr = neighborhood.getClassDitribution();
          int thisScore;
          for (String classVal : classDistr.keySet()) {
            thisScore = classDistr.get(classVal);
            stBld.append(classVal).append(fieldDelim).append(thisScore);
          }
        }
      }

      if (isValidationMode) {
        // actual class attr value
        testClassValActual = key.getString(1);
        stBld.append(fieldDelim).append(testClassValActual);
      }

      // predicted class value
      if (useCostBasedClassifier) {
        // use cost based arbitrator
        if (neighborhood.IsInClassificationMode()) {
          posClassProbab = neighborhood.getClassProb(posClassAttrValue);
          testClassValPredicted = costBasedArbitrator.classify(posClassProbab);
        }
      } else {
        // get directly
        if (neighborhood.IsInClassificationMode()) {
          testClassValPredicted = neighborhood.classify();
        } else {
          testClassValPredicted = "" + neighborhood.getPredictedValue();
        }
      }
      stBld.append(fieldDelim).append(testClassValPredicted);

      if (isValidationMode) {
        if (neighborhood.IsInClassificationMode()) {
          confMatrix.report(testClassValPredicted, testClassValActual);
        }
      }
      outVal.set(stBld.toString());
      context.write(NullWritable.get(), outVal);
    }
Пример #24
0
 // Iterates through hashmap and sends the key,value  to reducers
 @Override
 protected void cleanup(Context context) throws IOException, InterruptedException {
   for (String key : usercountMap.keySet()) {
     context.write(new Text(key), new IntWritable(usercountMap.get(key)));
   }
 }