public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String valstr = value.toString().trim(); if (valstr.length() == 0) return; DataPoint dp = new DataPoint(valstr); double[] vals = dp.getFeatures().getValArr(); if (featnum != vals.length) throw new RuntimeException("当前record的特征长度与设置长度不同!"); for (int i = 0; i < vals.length; i++) { context.write(new Text(String.valueOf(i)), new Text(String.valueOf(vals[i]))); } }
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String rowkey_prefix = filename + "," + key.get(); // 输入数据的格式是 : uid\tbid\tlabel\tweigth\tweigth\tweigth\tweigth\tweigth\tweigth\t if (value.toString().split("\\s+").length < 4) return; DataPoint point = new DataPoint(value.toString()); if (point.getFeatures() == null) return; ArrayList<Double> features = point.getFeatures().getValue(); int slicenum = 0; ArrayList<Double> slice = new ArrayList<Double>(slicelen); for (int i = 0; i < features.size(); i++) { int index = i - slicenum * slicelen; // sub-feature index if (index < slicelen) { double cur_feat = features.get(i); slice.add(cur_feat); } else { // 注意到 这里的arraylist 用的是deepcopy , 长度满足sliceLen 就输出 WDataPoint sub_dp = new WDataPoint( point.getUid(), point.getBid(), point.getLabel(), slice, 0.0, slicenum); context.write(new Text(rowkey_prefix + "," + slicenum), new Text(sub_dp.toString())); // 降档前特征加入下一条子特征数组内 slicenum++; slice.clear(); // 可以清空, 最底层实现了安置复制 double cur_feat = features.get(i); slice.add(cur_feat); // 这时sub_index == 0 } } if (slice.size() != 0) { WDataPoint sub_dp = new WDataPoint(point.getUid(), point.getBid(), point.getLabel(), slice, 0.0, slicenum); context.write(new Text(rowkey_prefix + "," + slicenum), new Text(sub_dp.toString())); slice.clear(); // 可以清空, 最底层实现了安置复制 } }