@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { items = value.toString().split(fieldDelimRegex); outKey.initialize(); outVal.initialize(); int initValue = 0; for (int i = 0; i < quantityAttr; ++i) { outKey.append(items[i]); } if (isAggrFileSplit) { if (items.length >= quantityAttr) { // existing aggregation outVal.add( Integer.parseInt(items[quantityAttr]), Integer.parseInt(items[quantityAttr + 1])); } else { // first aggregation outVal.add(initValue, initValue); } } else { outVal.add((int) 1, Integer.parseInt(items[quantityAttr])); } context.write(outKey, outVal); }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context) */ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { items = value.toString().split(fieldDelimRegex); outKey.initialize(); outVal.initialize(); if (classCondtionWeighted) { trainEntityId = items[2]; testEntityId = items[0]; rank = Integer.parseInt(items[3]); trainClassAttr = items[4]; trainingFeaturePostProb = Double.parseDouble(items[5]); if (isValidationMode) { // validation mode testClassAttr = items[1]; outKey.add(testEntityId, testClassAttr, rank); } else { // prediction mode outKey.add(testEntityId, rank); } outVal.add(trainEntityId, rank, trainClassAttr, trainingFeaturePostProb); } else { int index = 0; trainEntityId = items[index++]; testEntityId = items[index++]; rank = Integer.parseInt(items[index++]); trainClassAttr = items[index++]; if (isValidationMode) { // validation mode testClassAttr = items[index++]; } outVal.add(trainEntityId, rank, trainClassAttr); // for linear regression add numeric input field if (isLinearRegression) { trainRegrNumFld = items[index++]; outVal.add(trainRegrNumFld); testRegrNumFld = items[index++]; if (isValidationMode) { outKey.add(testEntityId, testClassAttr, testRegrNumFld, rank); } else { outKey.add(testEntityId, testRegrNumFld, rank); } outKey.add(testRegrNumFld); } else { if (isValidationMode) { outKey.add(testEntityId, testClassAttr, rank); } else { outKey.add(testEntityId, rank); } } } context.write(outKey, outVal); }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context) */ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] items = value.toString().split(fieldDelim); eventType = Integer.parseInt(items[2]); timeStamp = Long.parseLong(items[3]); // user ID, item ID, event keyOut.initialize(); keyOut.add(items[0], items[1], eventType); valOut.initialize(); valOut.add(eventType, timeStamp); context.write(keyOut, valOut); }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context) */ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] items = value.toString().split(fieldDelimRegex); srcEntityId = items[0]; trgEntityId = items[1]; rank = Integer.parseInt(items[items.length - 1]); outKey.initialize(); if (recordInOutput) { // include source and taraget record if (recLength == -1) { recLength = (items.length - 3) / 2; srcRecBeg = 2; srcRecEnd = trgRecBeg = 2 + recLength; trgRecEnd = trgRecBeg + recLength; } srcRec = org.chombo.util.Utility.join(items, srcRecBeg, srcRecEnd, fieldDelim); trgRec = org.chombo.util.Utility.join(items, trgRecBeg, trgRecEnd, fieldDelim); outKey.add(srcEntityId, srcRec, rank); outVal.set(trgEntityId + fieldDelim + trgRec + fieldDelim + items[items.length - 1]); } else { // only target entity id and distance outKey.add(srcEntityId, rank); outVal.set(trgEntityId + fieldDelim + items[items.length - 1]); } context.write(outKey, outVal); }
/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context) */ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] items = value.toString().split(fieldDelim); String itemID = items[0]; if (isRatingFileSplit) { // user rating context.getCounter("Record type count", "Rating").increment(1); boolean toInclude = true; for (int i = 1; i < items.length; ++i) { // all user ratings for this item ratings = items[i].split(subFieldDelim); // time sensitive recommendation toInclude = true; if (ratingTimeCutoff > 0) { timeStamp = Long.parseLong(ratings[2]); toInclude = timeStamp > ratingTimeCutoff; } // contextual recommendation if (userRatingWithContext) { ratingContext = ratings[3]; } // check for min input rating threshold inputRating = new Integer(ratings[1]); toInclude = toInclude && inputRating > minInputRating; if (toInclude) { // itemID keyOut.set(itemID, two); // userID, rating valOut.initialize(); if (userRatingWithContext) { valOut.add(ratings[0], inputRating, context, two); } else { valOut.add(ratings[0], inputRating, two); } context.write(keyOut, valOut); } } } else if (isRatingStatFileSplit) { // rating stat context.getCounter("Record type count", "Rating stat").increment(1); int ratingStdDev = Integer.parseInt(items[STD_DEV_ORD]); keyOut.set(itemID, one); valOut.initialize(); valOut.add(ratingStdDev, one); context.write(keyOut, valOut); } else { // item correlation context.getCounter("Record type count", "Correlation").increment(1); correlation = Integer.parseInt(items[2]); correlationLength = Integer.parseInt(items[3]); // if correlation is above min threshold if (correlation > minCorrelation) { // correlation of 1st item keyOut.set(items[0], zero); valOut.initialize(); if (linearCorrelation) { // other itemID, correlation, intersection length (weight) valOut.add(items[1], correlation, correlationLength, zero); } else { // other itemID, correlation, intersection length (weight) valOut.add(items[1], -correlation, correlationLength, zero); } context.write(keyOut, valOut); // correlation of second item keyOut.set(items[1], zero); valOut.initialize(); if (linearCorrelation) { // other itemID, correlation, intersection length (weight) valOut.add(items[0], correlation, correlationLength, zero); } else { // other itemID, correlation, intersection length (weight) valOut.add(items[0], -correlation, correlationLength, zero); } context.write(keyOut, valOut); } } }