/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#reduce(KEYIN, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context) */ protected void reduce(Tuple key, Iterable<Tuple> values, Context context) throws IOException, InterruptedException { if (stBld.length() > 0) { stBld.delete(0, stBld.length() - 1); } boolean first = true; count = 0; latestTimeStamp = 0; for (Tuple value : values) { eventType = value.getInt(0); timeStamp = value.getLong(1); if (first) { mostEngagingEventType = eventType; ++count; first = false; } else { // all occurences of the first event type if (eventType == mostEngagingEventType) { ++count; } } // latest time stamp if (timeStamp > latestTimeStamp) { latestTimeStamp = timeStamp; } } rating = ratingMapper.scoreForEvent(mostEngagingEventType, count); stBld .append(key.getString(0)) .append(fieldDelim) .append(key.getString(1)) .append(fieldDelim) .append(rating) .append(fieldDelim) .append(latestTimeStamp); if (outputDetail) { stBld.append(fieldDelim).append(mostEngagingEventType).append(fieldDelim).append(count); } valOut.set(stBld.toString()); context.write(NullWritable.get(), valOut); }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#reduce(KEYIN, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context) */ protected void reduce(TextInt key, Iterable<Tuple> values, Context context) throws IOException, InterruptedException { ratingCorrelations.clear(); ++logCounter; ratingStat = null; for (Tuple value : values) { if (((Integer) value.get(value.getSize() - 1)) == 0) { // in rating correlation ratingCorrelations.add(value.createClone()); context.getCounter("Predictor", "Rating correlation").increment(1); } else if (((Integer) value.get(value.getSize() - 1)) == 1) { // rating stat ratingStat = value.createClone(); } else { // in user rating if (!ratingCorrelations.isEmpty()) { String userID = value.getString(0); rating = value.getInt(1); if (userRatingWithContext) { ratingContext = value.getString(2); } // all rating correlations for (Tuple ratingCorrTup : ratingCorrelations) { context.getCounter("Predictor", "User rating").increment(1); itemID = ratingCorrTup.getString(0); ratingCorr = ratingCorrTup.getInt(1); weight = ratingCorrTup.getInt(2); modifyCorrelation(); int predRating = linearCorrelation ? (rating * ratingCorr) / maxRating : (rating * correlationScale + ratingCorr) / maxRating; if (predRating > 0) { // userID, itemID, predicted rating, correlation length, correlation coeff, input // rating std dev ratingStdDev = ratingStat != null ? ratingStat.getInt(0) : -1; if (userRatingWithContext) { valueOut.set( userID + fieldDelim + itemID + fieldDelim + ratingContext + fieldDelim + predRating + fieldDelim + weight + fieldDelim + ratingCorr + fieldDelim + ratingStdDev); } else { valueOut.set( userID + fieldDelim + itemID + fieldDelim + predRating + fieldDelim + weight + fieldDelim + ratingCorr + fieldDelim + ratingStdDev); } context.write(NullWritable.get(), valueOut); context.getCounter("Predictor", "Rating correlation").increment(1); } } } } } }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#reduce(KEYIN, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context) */ protected void reduce(Tuple key, Iterable<Text> values, Context context) throws IOException, InterruptedException { srcEntityId = key.getString(0); count = 0; boolean doEmitNeighbor = false; valueList.clear(); for (Text value : values) { doEmitNeighbor = false; // count based neighbor if (nearestByCount) { doEmitNeighbor = true; if (++count >= topMatchCount) { doEmitNeighbor = false; } } // distance based neighbors if (nearestByDistance) { // distance based neighbor String[] items = value.toString().split(fieldDelim); distance = Integer.parseInt(items[items.length - 1]); if (distance <= topMatchDistance) { if (!nearestByCount) { doEmitNeighbor = true; } } else { doEmitNeighbor = false; } } if (doEmitNeighbor) { // along with neighbors if (compactOutput) { if (recordInOutput) { // contains id,record,rank - strip out entity ID and rank String[] valueItems = value.toString().split(fieldDelim); valueList.add(org.chombo.util.Utility.join(valueItems, 1, valueItems.length - 1)); } else { // contains id, rank valueList.add(value.toString()); } } else { outVal.set(srcEntityId + fieldDelim + value.toString()); context.write(NullWritable.get(), outVal); } } else { // only source entity if neighborhood condition not met if (outputWithNoNeighbor && !compactOutput) { outVal.set(srcEntityId); context.write(NullWritable.get(), outVal); } } } // emit in compact format if (compactOutput) { boolean doEmit = true; String srcRec = recordInOutput ? key.getString(1) : ""; int numNeighbor = valueList.size(); if (0 == numNeighbor) { // only source entity if neighborhood condition not met if (outputWithNoNeighbor) { outVal.set( recordInOutput ? srcEntityId + fieldDelim + srcRec + fieldDelim + numNeighbor : srcEntityId); } else { doEmit = false; } } else { String targetValues = org.chombo.util.Utility.join(valueList, fieldDelim); outVal.set( recordInOutput ? srcEntityId + fieldDelim + srcRec + fieldDelim + numNeighbor + targetValues : srcEntityId + fieldDelim + targetValues); } if (doEmit) { context.write(NullWritable.get(), outVal); } } }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Reducer#reduce(KEYIN, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context) */ protected void reduce(Tuple key, Iterable<Tuple> values, Context context) throws IOException, InterruptedException { if (stBld.length() > 0) { stBld.delete(0, stBld.length()); } testEntityId = key.getString(0); stBld.append(testEntityId); // collect nearest neighbors count = 0; neighborhood.initialize(); for (Tuple value : values) { int index = 0; trainEntityId = value.getString(index++); distance = value.getInt(index++); trainClassValue = value.getString(index++); if (classCondtionWeighted && neighborhood.IsInClassificationMode()) { trainingFeaturePostProb = value.getDouble(index++); if (inverseDistanceWeighted) { neighborhood.addNeighbor( trainEntityId, distance, trainClassValue, trainingFeaturePostProb, true); } else { neighborhood.addNeighbor( trainEntityId, distance, trainClassValue, trainingFeaturePostProb); } } else { Neighborhood.Neighbor neighbor = neighborhood.addNeighbor(trainEntityId, distance, trainClassValue); if (neighborhood.isInLinearRegressionMode()) { neighbor.setRegrInputVar(Double.parseDouble(value.getString(index++))); } } if (++count == topMatchCount) { break; } } if (neighborhood.isInLinearRegressionMode()) { String testRegrNumFld = isValidationMode ? key.getString(2) : key.getString(1); neighborhood.withRegrInputVar(Double.parseDouble(testRegrNumFld)); } // class distribution neighborhood.processClassDitribution(); if (outputClassDistr && neighborhood.IsInClassificationMode()) { if (classCondtionWeighted) { Map<String, Double> classDistr = neighborhood.getWeightedClassDitribution(); double thisScore; for (String classVal : classDistr.keySet()) { thisScore = classDistr.get(classVal); // LOG.debug("classVal:" + classVal + " thisScore:" + thisScore); stBld.append(fieldDelim).append(classVal).append(fieldDelim).append(thisScore); } } else { Map<String, Integer> classDistr = neighborhood.getClassDitribution(); int thisScore; for (String classVal : classDistr.keySet()) { thisScore = classDistr.get(classVal); stBld.append(classVal).append(fieldDelim).append(thisScore); } } } if (isValidationMode) { // actual class attr value testClassValActual = key.getString(1); stBld.append(fieldDelim).append(testClassValActual); } // predicted class value if (useCostBasedClassifier) { // use cost based arbitrator if (neighborhood.IsInClassificationMode()) { posClassProbab = neighborhood.getClassProb(posClassAttrValue); testClassValPredicted = costBasedArbitrator.classify(posClassProbab); } } else { // get directly if (neighborhood.IsInClassificationMode()) { testClassValPredicted = neighborhood.classify(); } else { testClassValPredicted = "" + neighborhood.getPredictedValue(); } } stBld.append(fieldDelim).append(testClassValPredicted); if (isValidationMode) { if (neighborhood.IsInClassificationMode()) { confMatrix.report(testClassValPredicted, testClassValActual); } } outVal.set(stBld.toString()); context.write(NullWritable.get(), outVal); }