Esempio n. 1
0
 @Override
 public int compare(WritableComparable w1, WritableComparable w2) {
   // consider only the base part of the key
   TextInt t1 = ((TextInt) w1);
   TextInt t2 = ((TextInt) w2);
   return t1.baseCompareTo(t2);
 }
Esempio n. 2
0
 @Override
 public int getPartition(TextInt key, Tuple value, int numPartitions) {
   // consider only base part of  key
   return key.baseHashCode() % numPartitions;
 }
Esempio n. 3
0
    /* (non-Javadoc)
     * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context)
     */
    @Override
    protected void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      String[] items = value.toString().split(fieldDelim);
      String itemID = items[0];
      if (isRatingFileSplit) {
        // user rating
        context.getCounter("Record type count", "Rating").increment(1);

        boolean toInclude = true;
        for (int i = 1; i < items.length; ++i) {
          // all user ratings for this item
          ratings = items[i].split(subFieldDelim);

          // time sensitive recommendation
          toInclude = true;
          if (ratingTimeCutoff > 0) {
            timeStamp = Long.parseLong(ratings[2]);
            toInclude = timeStamp > ratingTimeCutoff;
          }

          // contextual recommendation
          if (userRatingWithContext) {
            ratingContext = ratings[3];
          }

          // check for min input rating threshold
          inputRating = new Integer(ratings[1]);
          toInclude = toInclude && inputRating > minInputRating;

          if (toInclude) {
            // itemID
            keyOut.set(itemID, two);

            // userID, rating
            valOut.initialize();
            if (userRatingWithContext) {
              valOut.add(ratings[0], inputRating, context, two);
            } else {
              valOut.add(ratings[0], inputRating, two);
            }
            context.write(keyOut, valOut);
          }
        }
      } else if (isRatingStatFileSplit) {
        // rating stat
        context.getCounter("Record type count", "Rating stat").increment(1);
        int ratingStdDev = Integer.parseInt(items[STD_DEV_ORD]);
        keyOut.set(itemID, one);
        valOut.initialize();
        valOut.add(ratingStdDev, one);
        context.write(keyOut, valOut);
      } else {
        // item correlation
        context.getCounter("Record type count", "Correlation").increment(1);
        correlation = Integer.parseInt(items[2]);
        correlationLength = Integer.parseInt(items[3]);

        // if correlation is above min threshold
        if (correlation > minCorrelation) {
          // correlation of 1st item
          keyOut.set(items[0], zero);
          valOut.initialize();
          if (linearCorrelation) {
            // other itemID, correlation, intersection length (weight)
            valOut.add(items[1], correlation, correlationLength, zero);
          } else {
            // other itemID, correlation, intersection length (weight)
            valOut.add(items[1], -correlation, correlationLength, zero);
          }
          context.write(keyOut, valOut);

          // correlation of second item
          keyOut.set(items[1], zero);
          valOut.initialize();
          if (linearCorrelation) {
            // other itemID, correlation, intersection length (weight)
            valOut.add(items[0], correlation, correlationLength, zero);
          } else {
            // other itemID, correlation, intersection length (weight)
            valOut.add(items[0], -correlation, correlationLength, zero);
          }
          context.write(keyOut, valOut);
        }
      }
    }