コード例 #1
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {

      String valstr = value.toString().trim();

      if (valstr.length() == 0) return;

      DataPoint dp = new DataPoint(valstr);

      double[] vals = dp.getFeatures().getValArr();

      if (featnum != vals.length) throw new RuntimeException("当前record的特征长度与设置长度不同!");

      for (int i = 0; i < vals.length; i++) {
        context.write(new Text(String.valueOf(i)), new Text(String.valueOf(vals[i])));
      }
    }
コード例 #2
0
    protected void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {

      String rowkey_prefix = filename + "," + key.get();

      // 输入数据的格式是 :  uid\tbid\tlabel\tweigth\tweigth\tweigth\tweigth\tweigth\tweigth\t
      if (value.toString().split("\\s+").length < 4) return;
      DataPoint point = new DataPoint(value.toString());

      if (point.getFeatures() == null) return;

      ArrayList<Double> features = point.getFeatures().getValue();

      int slicenum = 0;
      ArrayList<Double> slice = new ArrayList<Double>(slicelen);
      for (int i = 0; i < features.size(); i++) {
        int index = i - slicenum * slicelen; // sub-feature index
        if (index < slicelen) {
          double cur_feat = features.get(i);
          slice.add(cur_feat);
        } else {
          // 注意到 这里的arraylist 用的是deepcopy  , 长度满足sliceLen 就输出
          WDataPoint sub_dp =
              new WDataPoint(
                  point.getUid(), point.getBid(), point.getLabel(), slice, 0.0, slicenum);

          context.write(new Text(rowkey_prefix + "," + slicenum), new Text(sub_dp.toString()));

          // 降档前特征加入下一条子特征数组内
          slicenum++;
          slice.clear(); // 可以清空, 最底层实现了安置复制
          double cur_feat = features.get(i);
          slice.add(cur_feat); // 这时sub_index == 0
        }
      }

      if (slice.size() != 0) {
        WDataPoint sub_dp =
            new WDataPoint(point.getUid(), point.getBid(), point.getLabel(), slice, 0.0, slicenum);
        context.write(new Text(rowkey_prefix + "," + slicenum), new Text(sub_dp.toString()));
        slice.clear(); // 可以清空, 最底层实现了安置复制
      }
    }