private static String setToString(LongFloatMap map) { LongPrimitiveIterator it = map.keySetIterator(); Collection<String> keyStrings = Lists.newArrayListWithCapacity(map.size()); while (it.hasNext()) { keyStrings.add(Long.toString(it.nextLong())); } return DelimitedDataUtils.encode(',', keyStrings); }
@Override public void process( Pair<Pair<Long, Integer>, Iterable<NumericIDValue>> input, Emitter<Pair<Long, NumericIDValue>> emitter) { Pair<Long, Integer> key = input.first(); long currentUserID = key.first(); if (key.second() == BEFORE) { // Last old data had no match, just output it if (previousUserPrefs != null) { Preconditions.checkNotNull(previousUserID); output(previousUserID, previousUserPrefs, null, null, emitter); previousUserPrefs = null; previousUserID = null; } LongFloatMap oldPrefs = new LongFloatMap(); for (NumericIDValue itemPref : input.second()) { float oldPrefValue = itemPref.getValue(); Preconditions.checkState(!Float.isNaN(oldPrefValue), "No prior pref value?"); // Apply decay factor here, if applicable: oldPrefs.increment(itemPref.getID(), doDecay ? oldPrefValue * decayFactor : oldPrefValue); } previousUserPrefs = oldPrefs; previousUserID = currentUserID; } else { // Last old data had no match, just output it if (previousUserPrefs != null && currentUserID != previousUserID) { Preconditions.checkNotNull(previousUserID); output(previousUserID, previousUserPrefs, null, null, emitter); previousUserPrefs = null; previousUserID = null; } LongFloatMap newPrefs = new LongFloatMap(); LongSet removedItemIDs = new LongSet(); for (NumericIDValue itemPref : input.second()) { long itemID = itemPref.getID(); float newPrefValue = itemPref.getValue(); if (Float.isNaN(newPrefValue)) { removedItemIDs.add(itemID); } else { newPrefs.increment(itemID, newPrefValue); } } output(currentUserID, previousUserPrefs, newPrefs, removedItemIDs, emitter); previousUserPrefs = null; previousUserID = null; } }
private void output( long userID, LongFloatMap oldPrefs, LongFloatMap newPrefs, LongSet removedItemIDs, Emitter<Pair<Long, NumericIDValue>> emitter) { // Old prefs may be null when there is no previous generation, for example, or the user is new. // First, write out existing prefs, possibly updated by new values if (oldPrefs != null && !oldPrefs.isEmpty()) { for (LongFloatMap.MapEntry entry : oldPrefs.entrySet()) { long itemID = entry.getKey(); float oldPrefValue = entry.getValue(); Preconditions.checkState(!Float.isNaN(oldPrefValue), "No prior pref value?"); // May be NaN if no new data at all, or new data has no update: float sum = oldPrefValue; if (newPrefs != null) { float newPrefValue = newPrefs.get(itemID); if (!Float.isNaN(newPrefValue)) { sum += newPrefValue; } } boolean remove = false; if (removedItemIDs != null && removedItemIDs.contains(itemID)) { remove = true; } else if (FastMath.abs(sum) <= zeroThreshold) { remove = true; } if (!remove) { emitter.emit(Pair.of(userID, new NumericIDValue(itemID, sum))); } } } // Now output new data, that didn't exist in old prefs if (newPrefs != null && !newPrefs.isEmpty()) { for (LongFloatMap.MapEntry entry : newPrefs.entrySet()) { long itemID = entry.getKey(); if (oldPrefs == null || !oldPrefs.containsKey(itemID)) { // It wasn't already written. If it exists in newPrefs, it's also not removed float newPrefValue = entry.getValue(); if (FastMath.abs(newPrefValue) > zeroThreshold) { emitter.emit(Pair.of(userID, new NumericIDValue(itemID, newPrefValue))); } } } } }