/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#reduce(KEYIN, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context) */ protected void reduce(Tuple key, Iterable<Tuple> values, Context context) throws IOException, InterruptedException { sum = 0; count = 0; for (Tuple val : values) { count += val.getInt(0); sum += val.getInt(1); } avg = count > 0 ? sum / count : 0; stBld.delete(0, stBld.length()); stBld.append(key.toString()).append(fieldDelim); stBld.append(count).append(fieldDelim).append(sum).append(fieldDelim).append(avg); outVal.set(stBld.toString()); context.write(NullWritable.get(), outVal); }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#reduce(KEYIN, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context) */ protected void reduce(Tuple key, Iterable<Tuple> values, Context context) throws IOException, InterruptedException { if (stBld.length() > 0) { stBld.delete(0, stBld.length() - 1); } boolean first = true; count = 0; latestTimeStamp = 0; for (Tuple value : values) { eventType = value.getInt(0); timeStamp = value.getLong(1); if (first) { mostEngagingEventType = eventType; ++count; first = false; } else { // all occurences of the first event type if (eventType == mostEngagingEventType) { ++count; } } // latest time stamp if (timeStamp > latestTimeStamp) { latestTimeStamp = timeStamp; } } rating = ratingMapper.scoreForEvent(mostEngagingEventType, count); stBld .append(key.getString(0)) .append(fieldDelim) .append(key.getString(1)) .append(fieldDelim) .append(rating) .append(fieldDelim) .append(latestTimeStamp); if (outputDetail) { stBld.append(fieldDelim).append(mostEngagingEventType).append(fieldDelim).append(count); } valOut.set(stBld.toString()); context.write(NullWritable.get(), valOut); }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#reduce(KEYIN, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context) */ protected void reduce(TextInt key, Iterable<Tuple> values, Context context) throws IOException, InterruptedException { ratingCorrelations.clear(); ++logCounter; ratingStat = null; for (Tuple value : values) { if (((Integer) value.get(value.getSize() - 1)) == 0) { // in rating correlation ratingCorrelations.add(value.createClone()); context.getCounter("Predictor", "Rating correlation").increment(1); } else if (((Integer) value.get(value.getSize() - 1)) == 1) { // rating stat ratingStat = value.createClone(); } else { // in user rating if (!ratingCorrelations.isEmpty()) { String userID = value.getString(0); rating = value.getInt(1); if (userRatingWithContext) { ratingContext = value.getString(2); } // all rating correlations for (Tuple ratingCorrTup : ratingCorrelations) { context.getCounter("Predictor", "User rating").increment(1); itemID = ratingCorrTup.getString(0); ratingCorr = ratingCorrTup.getInt(1); weight = ratingCorrTup.getInt(2); modifyCorrelation(); int predRating = linearCorrelation ? (rating * ratingCorr) / maxRating : (rating * correlationScale + ratingCorr) / maxRating; if (predRating > 0) { // userID, itemID, predicted rating, correlation length, correlation coeff, input // rating std dev ratingStdDev = ratingStat != null ? ratingStat.getInt(0) : -1; if (userRatingWithContext) { valueOut.set( userID + fieldDelim + itemID + fieldDelim + ratingContext + fieldDelim + predRating + fieldDelim + weight + fieldDelim + ratingCorr + fieldDelim + ratingStdDev); } else { valueOut.set( userID + fieldDelim + itemID + fieldDelim + predRating + fieldDelim + weight + fieldDelim + ratingCorr + fieldDelim + ratingStdDev); } context.write(NullWritable.get(), valueOut); context.getCounter("Predictor", "Rating correlation").increment(1); } } } } } }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#reduce(KEYIN, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context) */ protected void reduce(Tuple key, Iterable<Tuple> values, Context context) throws IOException, InterruptedException { if (stBld.length() > 0) { stBld.delete(0, stBld.length()); } testEntityId = key.getString(0); stBld.append(testEntityId); // collect nearest neighbors count = 0; neighborhood.initialize(); for (Tuple value : values) { int index = 0; trainEntityId = value.getString(index++); distance = value.getInt(index++); trainClassValue = value.getString(index++); if (classCondtionWeighted && neighborhood.IsInClassificationMode()) { trainingFeaturePostProb = value.getDouble(index++); if (inverseDistanceWeighted) { neighborhood.addNeighbor( trainEntityId, distance, trainClassValue, trainingFeaturePostProb, true); } else { neighborhood.addNeighbor( trainEntityId, distance, trainClassValue, trainingFeaturePostProb); } } else { Neighborhood.Neighbor neighbor = neighborhood.addNeighbor(trainEntityId, distance, trainClassValue); if (neighborhood.isInLinearRegressionMode()) { neighbor.setRegrInputVar(Double.parseDouble(value.getString(index++))); } } if (++count == topMatchCount) { break; } } if (neighborhood.isInLinearRegressionMode()) { String testRegrNumFld = isValidationMode ? key.getString(2) : key.getString(1); neighborhood.withRegrInputVar(Double.parseDouble(testRegrNumFld)); } // class distribution neighborhood.processClassDitribution(); if (outputClassDistr && neighborhood.IsInClassificationMode()) { if (classCondtionWeighted) { Map<String, Double> classDistr = neighborhood.getWeightedClassDitribution(); double thisScore; for (String classVal : classDistr.keySet()) { thisScore = classDistr.get(classVal); // LOG.debug("classVal:" + classVal + " thisScore:" + thisScore); stBld.append(fieldDelim).append(classVal).append(fieldDelim).append(thisScore); } } else { Map<String, Integer> classDistr = neighborhood.getClassDitribution(); int thisScore; for (String classVal : classDistr.keySet()) { thisScore = classDistr.get(classVal); stBld.append(classVal).append(fieldDelim).append(thisScore); } } } if (isValidationMode) { // actual class attr value testClassValActual = key.getString(1); stBld.append(fieldDelim).append(testClassValActual); } // predicted class value if (useCostBasedClassifier) { // use cost based arbitrator if (neighborhood.IsInClassificationMode()) { posClassProbab = neighborhood.getClassProb(posClassAttrValue); testClassValPredicted = costBasedArbitrator.classify(posClassProbab); } } else { // get directly if (neighborhood.IsInClassificationMode()) { testClassValPredicted = neighborhood.classify(); } else { testClassValPredicted = "" + neighborhood.getPredictedValue(); } } stBld.append(fieldDelim).append(testClassValPredicted); if (isValidationMode) { if (neighborhood.IsInClassificationMode()) { confMatrix.report(testClassValPredicted, testClassValActual); } } outVal.set(stBld.toString()); context.write(NullWritable.get(), outVal); }