예제 #1
0
  public static KSketchIndex createInitialIndex(
      ClusterSettings settings, PCollection<Pair<Integer, RealVector>> input) {
    RealVector[] init = new RealVector[settings.getCrossFolds()];
    for (Pair<Integer, RealVector> rv : input.materialize()) {
      if (init[rv.first()] == null) {
        init[rv.first()] = rv.second();
      }
      boolean done = true;
      for (RealVector vec : init) {
        if (vec == null) {
          done = false;
          break;
        }
      }
      if (done) {
        break;
      }
    }

    KSketchIndex index =
        new KSketchIndex(
            settings.getCrossFolds(),
            init[0].getDimension(),
            settings.getIndexBits(),
            settings.getIndexSamples(),
            1729L); // TODO: something smarter, or figure out that I don't need this b/c I compute
                    // the projections up front
    for (int i = 0; i < init.length; i++) {
      index.add(init[i], i);
    }
    return index;
  }
예제 #2
0
  /** {@inheritDoc} */
  @Override
  public void join(
      K key, int id, Iterable<Pair<U, V>> pairs, Emitter<Pair<K, Pair<U, V>>> emitter) {
    if (!key.equals(lastKey)) {
      // Make sure that left side gets emitted.
      if (0 == lastId && 0 == id) {
        for (U u : leftValues) {
          emitter.emit(Pair.of(lastKey, Pair.of(u, (V) null)));
        }
      }
      lastKey = key;
      leftValues.clear();
    }
    if (id == 0) {
      for (Pair<U, V> pair : pairs) {
        if (pair.first() != null) leftValues.add(leftValueType.getDetachedValue(pair.first()));
      }
    } else {
      for (Pair<U, V> pair : pairs) {
        // Make sure that right side gets emitted.
        if (leftValues.isEmpty()) {
          leftValues.add(null);
        }
        for (U u : leftValues) {
          emitter.emit(Pair.of(lastKey, Pair.of(u, pair.second())));
        }
      }
    }

    lastId = id;
  }
예제 #3
0
  @Override
  public void process(
      Pair<Pair<Long, Integer>, Iterable<NumericIDValue>> input,
      Emitter<Pair<Long, NumericIDValue>> emitter) {
    Pair<Long, Integer> key = input.first();
    long currentUserID = key.first();

    if (key.second() == BEFORE) {

      // Last old data had no match, just output it
      if (previousUserPrefs != null) {
        Preconditions.checkNotNull(previousUserID);
        output(previousUserID, previousUserPrefs, null, null, emitter);
        previousUserPrefs = null;
        previousUserID = null;
      }

      LongFloatMap oldPrefs = new LongFloatMap();
      for (NumericIDValue itemPref : input.second()) {
        float oldPrefValue = itemPref.getValue();
        Preconditions.checkState(!Float.isNaN(oldPrefValue), "No prior pref value?");
        // Apply decay factor here, if applicable:
        oldPrefs.increment(itemPref.getID(), doDecay ? oldPrefValue * decayFactor : oldPrefValue);
      }

      previousUserPrefs = oldPrefs;
      previousUserID = currentUserID;

    } else {
      // Last old data had no match, just output it
      if (previousUserPrefs != null && currentUserID != previousUserID) {
        Preconditions.checkNotNull(previousUserID);
        output(previousUserID, previousUserPrefs, null, null, emitter);
        previousUserPrefs = null;
        previousUserID = null;
      }

      LongFloatMap newPrefs = new LongFloatMap();
      LongSet removedItemIDs = new LongSet();
      for (NumericIDValue itemPref : input.second()) {
        long itemID = itemPref.getID();
        float newPrefValue = itemPref.getValue();
        if (Float.isNaN(newPrefValue)) {
          removedItemIDs.add(itemID);
        } else {
          newPrefs.increment(itemID, newPrefValue);
        }
      }

      output(currentUserID, previousUserPrefs, newPrefs, removedItemIDs, emitter);

      previousUserPrefs = null;
      previousUserID = null;
    }
  }
예제 #4
0
 @Override
 public String map(Pair<Long, LongFloatMap> input) {
   return input.first().toString() + '\t' + setToString(input.second());
 }
예제 #5
0
 /**
  * Create a detached value for a table {@link Pair}.
  *
  * @param tableType The table type
  * @param value The value from which a detached value is to be created
  * @return The detached value
  * @see PType#getDetachedValue(Object)
  */
 public static <K, V> Pair<K, V> getDetachedValue(PTableType<K, V> tableType, Pair<K, V> value) {
   return Pair.of(
       tableType.getKeyType().getDetachedValue(value.first()),
       tableType.getValueType().getDetachedValue(value.second()));
 }