@Override public int compare(WritableComparable w1, WritableComparable w2) { // consider only the base part of the key TextInt t1 = ((TextInt) w1); TextInt t2 = ((TextInt) w2); return t1.baseCompareTo(t2); }
@Override public int getPartition(TextInt key, Tuple value, int numPartitions) { // consider only base part of key return key.baseHashCode() % numPartitions; }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context) */ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] items = value.toString().split(fieldDelim); String itemID = items[0]; if (isRatingFileSplit) { // user rating context.getCounter("Record type count", "Rating").increment(1); boolean toInclude = true; for (int i = 1; i < items.length; ++i) { // all user ratings for this item ratings = items[i].split(subFieldDelim); // time sensitive recommendation toInclude = true; if (ratingTimeCutoff > 0) { timeStamp = Long.parseLong(ratings[2]); toInclude = timeStamp > ratingTimeCutoff; } // contextual recommendation if (userRatingWithContext) { ratingContext = ratings[3]; } // check for min input rating threshold inputRating = new Integer(ratings[1]); toInclude = toInclude && inputRating > minInputRating; if (toInclude) { // itemID keyOut.set(itemID, two); // userID, rating valOut.initialize(); if (userRatingWithContext) { valOut.add(ratings[0], inputRating, context, two); } else { valOut.add(ratings[0], inputRating, two); } context.write(keyOut, valOut); } } } else if (isRatingStatFileSplit) { // rating stat context.getCounter("Record type count", "Rating stat").increment(1); int ratingStdDev = Integer.parseInt(items[STD_DEV_ORD]); keyOut.set(itemID, one); valOut.initialize(); valOut.add(ratingStdDev, one); context.write(keyOut, valOut); } else { // item correlation context.getCounter("Record type count", "Correlation").increment(1); correlation = Integer.parseInt(items[2]); correlationLength = Integer.parseInt(items[3]); // if correlation is above min threshold if (correlation > minCorrelation) { // correlation of 1st item keyOut.set(items[0], zero); valOut.initialize(); if (linearCorrelation) { // other itemID, correlation, intersection length (weight) valOut.add(items[1], correlation, correlationLength, zero); } else { // other itemID, correlation, intersection length (weight) valOut.add(items[1], -correlation, correlationLength, zero); } context.write(keyOut, valOut); // correlation of second item keyOut.set(items[1], zero); valOut.initialize(); if (linearCorrelation) { // other itemID, correlation, intersection length (weight) valOut.add(items[0], correlation, correlationLength, zero); } else { // other itemID, correlation, intersection length (weight) valOut.add(items[0], -correlation, correlationLength, zero); } context.write(keyOut, valOut); } } }