@Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Entry<String, ArrayList<Long>> entry : args.entrySet()) { context.write( new Text(entry.getKey()), new Text( entry.getValue().get(0) + " \t " + entry.getValue().get(1) + "\t" + entry.getValue().size())); } context.write(new Text("0"), new Text(String.valueOf(usrCount))); }
@Override public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line, ": "); Boolean firstPageHandled = false; while (tokenizer.hasMoreTokens()) { String pageNum = tokenizer.nextToken().trim().toLowerCase(); if (pageNum == null || pageNum.isEmpty()) { continue; } int outKey = Integer.parseInt(pageNum); int numLinks = 1; if (firstPageHandled) { } else { numLinks = 0; firstPageHandled = true; } context.write(new IntWritable(outKey), new IntWritable(numLinks)); } }
@Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { /* * Added by Xudong Zhang */ if (parseWeibo) { String word = key.toString(); if (!ChineseUtils.allChinese(word)) { context.getCounter("MyCounter", "NumWordsFilteredByWeiboParser").increment(1); return; } } long sum = 0; for (LongWritable value : values) { sum += value.get(); } if (sum >= minSupport) { context.write(key, new LongWritable(sum)); } else { context.getCounter("MyCounter", "NumWordsLessThanMinSupport").increment(1); } }
public void reduce(Text key, Iterable<IntWritable> values, Context output) throws IOException, InterruptedException { int sum = 0; while (values.iterator().hasNext()) { sum += values.iterator().next().get(); } output.write(key, new IntWritable(sum)); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] arr = line.split(","); word.set(arr[0]); context.write(word, one); }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Pair<Integer, Integer> item : linksMap) { Integer[] items = {item.second, item.first}; IntArrayWritable val = new IntArrayWritable(items); context.write(NullWritable.get(), val); } }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String values[] = StringUtils.split(value.toString(), "\t"); for (String word : values) { context.write(new Text(word), new IntWritable(1)); } }
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int i = 0; for (IntWritable value : values) { i++; } context.write(key, new IntWritable(i)); }
@Override protected void reduce(Text key, Iterable<IntWritable> value, Context context) throws IOException, InterruptedException { int count = 0; for (IntWritable i : value) { count += i.get(); } t.set(count + ""); context.write(key, t); }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context) */ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { items = value.toString().split(fieldDelimRegex); outKey.initialize(); outVal.initialize(); if (classCondtionWeighted) { trainEntityId = items[2]; testEntityId = items[0]; rank = Integer.parseInt(items[3]); trainClassAttr = items[4]; trainingFeaturePostProb = Double.parseDouble(items[5]); if (isValidationMode) { // validation mode testClassAttr = items[1]; outKey.add(testEntityId, testClassAttr, rank); } else { // prediction mode outKey.add(testEntityId, rank); } outVal.add(trainEntityId, rank, trainClassAttr, trainingFeaturePostProb); } else { int index = 0; trainEntityId = items[index++]; testEntityId = items[index++]; rank = Integer.parseInt(items[index++]); trainClassAttr = items[index++]; if (isValidationMode) { // validation mode testClassAttr = items[index++]; } outVal.add(trainEntityId, rank, trainClassAttr); // for linear regression add numeric input field if (isLinearRegression) { trainRegrNumFld = items[index++]; outVal.add(trainRegrNumFld); testRegrNumFld = items[index++]; if (isValidationMode) { outKey.add(testEntityId, testClassAttr, testRegrNumFld, rank); } else { outKey.add(testEntityId, testRegrNumFld, rank); } outKey.add(testRegrNumFld); } else { if (isValidationMode) { outKey.add(testEntityId, testClassAttr, rank); } else { outKey.add(testEntityId, rank); } } } context.write(outKey, outVal); }
@Override public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { Integer sum = 0; for (IntWritable val : values) { sum += val.get(); } context.write(key, new IntWritable(sum)); }
public void map(LongWritable key, Text value, Context output) throws IOException, InterruptedException { parser.parse(value.toString()); String param = jobconf.get("param.color"); if (parser.getColor().equals(param)) { suit = parser.getSuit(); output.write(new Text(suit), one); } }
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] words = value.toString().split(","); StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append(words[0]); stringBuilder.append(","); stringBuilder.append(words[1]); String fstring = stringBuilder.toString(); context.write(new CustomWritable(Integer.parseInt(words[2]), 1), new Text(fstring)); }
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Hashtable<String, Integer> wordCounts = new Hashtable<String, Integer>(); ArrayList docName = new ArrayList<String>(); LinkedList wordName = new LinkedList<String>(); while (values.iterator().hasNext()) { String[] items = values.iterator().next().toString().split("@"); if (!wordName.contains(items[0])) { wordName.add(items[0]); } String[] keys = items[1].split(":|,"); for (int i = 0; i < keys.length; i += 2) { if (!docName.contains(keys[i])) { docName.add(keys[i]); wordCounts.put(keys[i], 0); } int tmp = wordCounts.get(keys[i]); tmp += Integer.parseInt(keys[i + 1]); wordCounts.put(keys[i], tmp); } } for (int i = 0; i < docName.size() - 1; ++i) { for (int j = i + 1; j < docName.size(); ++j) { if (wordCounts.get(docName.get(i)) < wordCounts.get(docName.get(j))) { String stmp = docName.get(i).toString(); docName.set(i, docName.get(j).toString()); docName.set(j, stmp); } } } String retKey = wordName.get(0).toString(); for (int i = 1; i < wordName.size(); ++i) { retKey += "," + wordName.get(i); } String retValue = ""; // ="\n" + docName.get(0).toString() + ":" + // wordCounts.get(docName.get(0).toString()); for (int i = 0; i < docName.size(); ++i) { retValue += "\n" + docName.get(i).toString() + ": " + wordCounts.get(docName.get(i).toString()); } context.write(new Text(retKey), new Text(retValue)); }
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub String[] line = value.toString().split("\u0001"); if (line.length < 6) { return; } String id = line[0]; String peerid = line[2]; String filename = line[5]; if ((id.equals("2") || id.equals("724")) && Pattern.matches(patternFilename, filename)) { context.write(new Text(id + "\t" + peerid), new IntWritable(1)); } }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { parser.parse(value); if (parser.isValidTemperature()) { int airTemperature = parser.getAirTemperature(); context.write(new Text(parser.getYear()), new IntWritable(airTemperature)); } else if (parser.isMalformedTemperature()) { System.err.println("Ignoring possibly corrupt input: " + value); context.getCounter(Temperature.MALFORMED).increment(1); } else if (parser.isMissingTemperature()) { context.getCounter(Temperature.MISSING).increment(1); } // dynamic counter context.getCounter("TemperatureQuality", parser.getQuality()).increment(1); }
@Override public void map(Object key, Text value, Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String keywords = conf.get("keyword"); // System.out.println(keywords); String line = value.toString(); String[] terms = line.split("\t"); // terms[0] is required keyword String[] multiKeywords = keywords.split(" "); boolean foundflag = false; for (int i = 0; i < multiKeywords.length; ++i) { if (multiKeywords[i].equals(terms[0])) { foundflag = true; } } if (foundflag) { context.write(new Text("Keywords"), new Text(terms[0] + "@" + terms[1].toString())); } }
@Override public void run() { // TODO Auto-generated method stub System.out.println("write:"); for (Entry<Integer, float[]> entry : maps.entrySet()) { int entryKey = entry.getKey(); float[] value = entry.getValue(); String result = ""; for (int i = 0; i < value.length; i++) { result += value[i] + ","; } result = result.substring(0, result.lastIndexOf(",")); try { context.write(new Text(String.valueOf(entryKey)), new Text(result)); } catch (Exception e) { e.printStackTrace(); } } }
private void writeMapUnionResult( Context context, String appId, String accountId, String platform, String channel, String gameServer, String onlineRecords, String playerType) throws IOException, InterruptedException { String[] keyFields = new String[] {appId, accountId, platform, gameServer}; String[] valFields = new String[] {channel, onlineRecords}; mapKeyObj.setOutFields(keyFields); mapValObj.setOutFields(valFields); mapValObj.setSuffix(playerType); context.write(mapKeyObj, mapValObj); }
@Override public void reduce(NullWritable key, Iterable<IntArrayWritable> values, Context context) throws IOException, InterruptedException { for (IntArrayWritable val : values) { IntWritable[] pair = (IntWritable[]) val.toArray(); Integer pageId = pair[0].get(); Integer count = pair[1].get(); linksMap.add(new Pair<Integer, Integer>(count, pageId)); if (linksMap.size() > N) { linksMap.remove(linksMap.first()); } } for (Pair<Integer, Integer> item : linksMap) { IntWritable id = new IntWritable(item.second); IntWritable value = new IntWritable(item.first); context.write(id, value); } }
public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String[] feature = value.toString().split("\t"); double dist = 0; double[] point = new double[feature.length - 3]; for (int j = 3; j < feature.length; j++) { point[j - 3] = Double.parseDouble(feature[j].trim()); } int len = centroidsList.length; double[] diffList = new double[len]; double minDiff = Double.MAX_VALUE; int minIndex = -1; for (int i = 0; i < len; i++) { try { diffList[i] = calcDist(centroidsList[i], point); } catch (Exception e) { e.printStackTrace(); System.exit(1); } if (diffList[i] < minDiff) { minDiff = diffList[i]; minIndex = i; } } // DoubleWritable minDiffDW = new DoubleWritable(minDiff); Text keyWithCentroid = new Text("" + minIndex); StringBuffer sb = new StringBuffer(); for (int i = 0; i < point.length; i++) { sb.append(point[i] + "\t"); } String pointStr = sb.toString().trim(); Text pointText = new Text(1 + "#" + pointStr); context.write(keyWithCentroid, pointText); }
public void map(Object key, Text value, Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); FileSplit split = (FileSplit) context.getInputSplit(); String rootFolder = split.getPath().getParent().toString(); String uri = rootFolder + "/" + value.toString(); // C:/hadoopsample/input/images+"/"+image1.jpg"; FileSystem fs = FileSystem.get(URI.create(uri), conf); FSDataInputStream in = null; try { in = fs.open(new Path(uri)); java.io.ByteArrayOutputStream bout = new ByteArrayOutputStream(); byte buffer[] = new byte[1024 * 1024]; while (in.read(buffer, 0, buffer.length) >= 0) { bout.write(buffer); } context.write(value, new BytesWritable(bout.toByteArray())); } finally { IOUtils.closeStream(in); } }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#reduce(KEYIN, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context) */ protected void reduce(Tuple key, Iterable<Tuple> values, Context context) throws IOException, InterruptedException { if (stBld.length() > 0) { stBld.delete(0, stBld.length()); } testEntityId = key.getString(0); stBld.append(testEntityId); // collect nearest neighbors count = 0; neighborhood.initialize(); for (Tuple value : values) { int index = 0; trainEntityId = value.getString(index++); distance = value.getInt(index++); trainClassValue = value.getString(index++); if (classCondtionWeighted && neighborhood.IsInClassificationMode()) { trainingFeaturePostProb = value.getDouble(index++); if (inverseDistanceWeighted) { neighborhood.addNeighbor( trainEntityId, distance, trainClassValue, trainingFeaturePostProb, true); } else { neighborhood.addNeighbor( trainEntityId, distance, trainClassValue, trainingFeaturePostProb); } } else { Neighborhood.Neighbor neighbor = neighborhood.addNeighbor(trainEntityId, distance, trainClassValue); if (neighborhood.isInLinearRegressionMode()) { neighbor.setRegrInputVar(Double.parseDouble(value.getString(index++))); } } if (++count == topMatchCount) { break; } } if (neighborhood.isInLinearRegressionMode()) { String testRegrNumFld = isValidationMode ? key.getString(2) : key.getString(1); neighborhood.withRegrInputVar(Double.parseDouble(testRegrNumFld)); } // class distribution neighborhood.processClassDitribution(); if (outputClassDistr && neighborhood.IsInClassificationMode()) { if (classCondtionWeighted) { Map<String, Double> classDistr = neighborhood.getWeightedClassDitribution(); double thisScore; for (String classVal : classDistr.keySet()) { thisScore = classDistr.get(classVal); // LOG.debug("classVal:" + classVal + " thisScore:" + thisScore); stBld.append(fieldDelim).append(classVal).append(fieldDelim).append(thisScore); } } else { Map<String, Integer> classDistr = neighborhood.getClassDitribution(); int thisScore; for (String classVal : classDistr.keySet()) { thisScore = classDistr.get(classVal); stBld.append(classVal).append(fieldDelim).append(thisScore); } } } if (isValidationMode) { // actual class attr value testClassValActual = key.getString(1); stBld.append(fieldDelim).append(testClassValActual); } // predicted class value if (useCostBasedClassifier) { // use cost based arbitrator if (neighborhood.IsInClassificationMode()) { posClassProbab = neighborhood.getClassProb(posClassAttrValue); testClassValPredicted = costBasedArbitrator.classify(posClassProbab); } } else { // get directly if (neighborhood.IsInClassificationMode()) { testClassValPredicted = neighborhood.classify(); } else { testClassValPredicted = "" + neighborhood.getPredictedValue(); } } stBld.append(fieldDelim).append(testClassValPredicted); if (isValidationMode) { if (neighborhood.IsInClassificationMode()) { confMatrix.report(testClassValPredicted, testClassValActual); } } outVal.set(stBld.toString()); context.write(NullWritable.get(), outVal); }
// Iterates through hashmap and sends the key,value to reducers @Override protected void cleanup(Context context) throws IOException, InterruptedException { for (String key : usercountMap.keySet()) { context.write(new Text(key), new IntWritable(usercountMap.get(key))); } }