Java MutableSparseVector.set примеры использования

Язык программирования: Java

Пространство имен/Пакет: org.grouplens.lenskit.vectors

Класс/Тип: MutableSparseVector

Метод/Функция: set

Примеров на hotexamples.com: 6

Java MutableSparseVector.set - 6 примеров найдено. Это лучшие примеры Java кода для org.grouplens.lenskit.vectors.MutableSparseVector.set, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

create(7)

wrap(6)

set(6)

fast(5)

get(5)

multiply(4)

fill(3)

freeze(3)

norm(3)

shrinkDomain(2)

addChannel(1)

keySet(1)

keyDomain(1)

getOrAddChannelVector(1)

addScaled(1)

clear(1)

getOrAddChannel(1)

Пример #1

Показать файл

Файл: OrdRecRatingPredictor.java Проект: Elkfrawy/lenskit

  @Override
  public void predict(long uid, @Nonnull MutableSparseVector predictions) {
    logger.debug("predicting {} items for {}", predictions.keyDomain().size(), uid);
    OrdRecModel params = new OrdRecModel(quantizer);
    SparseVector ratings = makeUserVector(uid, userEventDao);
    LongSet keySet = LongUtils.setUnion(ratings.keySet(), predictions.keyDomain());
    MutableSparseVector scores = MutableSparseVector.create(keySet);
    itemScorer.score(uid, scores);
    params.train(ratings, scores);
    logger.debug("trained parameters for {}: {}", uid, params);

    Vector probabilities = Vector.createLength(params.getLevelCount());
    Long2ObjectMap<IVector> distChannel = null;
    if (reportDistribution) {
      distChannel = predictions.addChannel(RATING_PROBABILITY_CHANNEL);
    }

    for (VectorEntry e : predictions.fast(VectorEntry.State.EITHER)) {
      long iid = e.getKey();
      double score = scores.get(iid);
      params.getProbDistribution(score, probabilities);

      int mlIdx = probabilities.maxElementIndex();

      predictions.set(e, quantizer.getIndexValue(mlIdx));
      if (distChannel != null) {
        distChannel.put(e.getKey(), probabilities.immutable());
      }
    }
  }

Пример #2

Показать файл

Файл: SVDItemScorer.java Проект: Elkfrawy/coursera-intro-recommendation-systems

  /**
   * Score items in a vector. The key domain of the provided vector is the items to score, and the
   * score method sets the values for each item to its score (or unsets it, if no score can be
   * provided). The previous values are discarded.
   *
   * @param user The user ID.
   * @param scores The score vector.
   */
  @Override
  public void score(long user, @Nonnull MutableSparseVector scores) {
    // TODO Score the items in the key domain of scores

    for (VectorEntry e : scores.fast(VectorEntry.State.EITHER)) {
      long item = e.getKey();
      // TODO Set the scores
      double score = prediction(user, item);
      scores.set(e, score);
    }
  }

Пример #3

Показать файл

Файл: ScoredIdListBuilder.java Проект: Elkfrawy/lenskit

  /**
   * Build a sparse vector directly from the list of IDs. This allows a scored ID list builder to be
   * used to efficiently accumulate a sparse vector. If the same ID is added multiple times, the
   * first instance is used.
   *
   * @return A sparse vector containing the data accumulated.
   */
  public ImmutableSparseVector buildVector() {
    MutableSparseVector msv = MutableSparseVector.create(ids);
    final int size = size();
    for (int i = 0; i < size; i++) {
      msv.set(ids.get(i), scores.get(i));
    }

    for (ChannelStorage chan : channels.values()) {
      MutableSparseVector vchan = msv.getOrAddChannelVector(chan.symbol);
      for (int i = 0; i < size; i++) {
        vchan.set(ids.get(i), chan.values.get(i));
      }
    }

    for (TypedChannelStorage<?> chan : typedChannels.values()) {
      Long2ObjectMap vchan = msv.getOrAddChannel(chan.symbol);
      for (int i = 0; i < size; i++) {
        vchan.put(ids.get(i), chan.values.get(i));
      }
    }

    return msv.freeze();
  }

Пример #4

Показать файл

Файл: DistanceVectorSimilarity.java Проект: Bitaaa/lenskit

  @Override
  public double similarity(SparseVector vec1, SparseVector vec2) {
    final double distance;
    // One of the vector is empty
    if (Scalars.isZero(vec1.norm()) || Scalars.isZero(vec2.norm())) {
      return Double.NaN;
    }

    LongSet ts = LongUtils.setUnion(vec1.keySet(), vec2.keySet());

    MutableSparseVector v1 = MutableSparseVector.create(ts);
    v1.fill(0);
    v1.set(vec1);
    v1.multiply(1.0 / v1.norm());
    v1.addScaled(vec2, -1.0 / vec2.norm());

    distance = v1.norm();
    return 1 - distance;
  }

Пример #5

Показать файл

Файл: TFIDFModelBuilder.java Проект: jieshan/myNewTag

  /**
   * This method is where the model should actually be computed.
   *
   * @return The TF-IDF model (a model of item tag vectors).
   */
  @Override
  public TFIDFModel get() {
    // Build a map of tags to numeric IDs.  This lets you convert tags (which are strings)
    // into long IDs that you can use as keys in a tag vector.
    Map<String, Long> tagIds = buildTagIdMap();

    // Create a vector to accumulate document frequencies for the IDF computation
    MutableSparseVector docFreq = MutableSparseVector.create(tagIds.values());
    docFreq.fill(0);

    // We now proceed in 2 stages. First, we build a TF vector for each item.
    // While we do this, we also build the DF vector.
    // We will then apply the IDF to each TF vector and normalize it to a unit vector.

    // Create a map to store the item TF vectors.
    Map<Long, MutableSparseVector> itemVectors = Maps.newHashMap();

    // Create a work vector to accumulate each item's tag vector.
    // This vector will be re-used for each item.
    MutableSparseVector work = MutableSparseVector.create(tagIds.values());

    // Iterate over the items to compute each item's vector.
    LongSet items = dao.getItemIds();
    for (long item : items) {
      // Reset the work vector for this item's tags.
      // work.clear();
      work.fill(0);
      // Now the vector is empty (all keys are 'unset').

      HashMap<String, Integer> DFcount = new HashMap<String, Integer>();
      // TODO Populate the work vector with the number of times each tag is applied to this item.
      // TODO Increment the document frequency vector once for each unique tag on the item.
      List<String> tags = dao.getItemTags(item);
      // System.out.println(tags.toString());
      for (String tag : tags) {
        // System.out.println(tag);
        // System.out.println(tagIds.get(tag));
        // System.out.println(work.size());

        work.set(tagIds.get(tag), work.get(tagIds.get(tag)) + 1);
        if (!DFcount.containsKey(tag)) {
          DFcount.put(tag, 1);
          docFreq.set(tagIds.get(tag), docFreq.get(tagIds.get(tag)) + 1);
        }
      }

      /*for(VectorEntry e: work.fast()){
      	if(e.getValue() == 0){
      		work.unset(e.getKey());
      	}
      }*/

      // Save a shrunk copy of the vector (only storing tags that apply to this item) in
      // our map, we'll add IDF and normalize later.
      itemVectors.put(item, work.shrinkDomain());
      // work is ready to be reset and re-used for the next item
    }

    // Now we've seen all the items, so we have each item's TF vector and a global vector
    // of document frequencies.
    // Invert and log the document frequency.  We can do this in-place.

    for (VectorEntry e : docFreq.fast()) {
      // TODO Update this document frequency entry to be a log-IDF value
      docFreq.set(e, Math.log(items.size() * 1.0 / e.getValue()));
    }

    // Now docFreq is a log-IDF vector.
    // So we can use it to apply IDF to each item vector to put it in the final model.
    // Create a map to store the final model data.
    Map<Long, SparseVector> modelData = Maps.newHashMap();
    for (Map.Entry<Long, MutableSparseVector> entry : itemVectors.entrySet()) {
      MutableSparseVector tv = entry.getValue();
      // TODO Convert this vector to a TF-IDF vector
      for (Long i : tagIds.values()) {
        tv.set(i, tv.get(i) * docFreq.get(i));
      }

      // TODO Normalize the TF-IDF vector to be a unit vector
      // HINT The method tv.norm() will give you the Euclidian length of the vector
      tv.multiply(1.0 / tv.norm());

      // Store a frozen (immutable) version of the vector in the model data.
      modelData.put(entry.getKey(), tv.freeze());
    }

    // we technically don't need the IDF vector anymore, so long as we have no new tags
    return new TFIDFModel(tagIds, modelData);
  }

Пример #6

Показать файл

Файл: TFIDFModelBuilder.java Проект: paolobarbaglia/coursera_recommender_systems

  /**
   * This method is where the model should actually be computed.
   *
   * @return The TF-IDF model (a model of item tag vectors).
   */
  @Override
  public TFIDFModel get() {
    // Build a map of tags to numeric IDs.  This lets you convert tags (which are strings)
    // into long IDs that you can use as keys in a tag vector.
    Map<String, Long> tagIds = buildTagIdMap();

    // Create a vector to accumulate document frequencies for the IDF computation
    MutableSparseVector docFreq = MutableSparseVector.create(tagIds.values());
    docFreq.fill(0);

    // We now proceed in 2 stages. First, we build a TF vector for each item.
    // While we do this, we also build the DF vector.
    // We will then apply the IDF to each TF vector and normalize it to a unit vector.

    // Create a map to store the item TF vectors.
    Map<Long, MutableSparseVector> itemVectors = Maps.newHashMap();

    // Create a work vector to accumulate each item's tag vector.
    // This vector will be re-used for each item.
    MutableSparseVector work = MutableSparseVector.create(tagIds.values());

    // Iterate over the items to compute each item's vector.
    LongSet items = dao.getItemIds();

    for (long item : items) {
      // Reset the work vector for this item's tags.
      work.clear();
      // Now the vector is empty (all keys are 'unset').

      List<String> hashtag = new ArrayList<String>();

      for (String tag : dao.getItemTags(item)) {

        Long id = tagIds.get(tag);

        try {
          // if id is not in the key set, throw the Exception.
          work.set(id, work.get(id) + 1);

        } catch (Exception e) {
          // if you catch the Exception, which means that id has not been set yet.
          work.set(id, 1.0); // use set method to "set" the Key
        }

        if (!hashtag.contains(tag)) {
          docFreq.set(id, docFreq.get(id) + 1);
          hashtag.add(tag);
        }
      }

      // Save a shrunk copy of the vector (only storing tags that apply to this item) in
      // our map, we'll add IDF and normalize later.
      itemVectors.put(item, work.shrinkDomain());
      // work is ready to be reset and re-used for the next item

    }

    // Now we've seen all the items, so we have each item's TF vector and a global vector
    // of document frequencies.
    // Invert and log the document frequency.  We can do this in-place.
    for (VectorEntry e : docFreq.fast()) {

      docFreq.set(e.getKey(), Math.log(items.size() / e.getValue()));
    }

    // Now docFreq is a log-IDF vector.
    // So we can use it to apply IDF to each item vector to put it in the final model.
    // Create a map to store the final model data.
    Map<Long, SparseVector> modelData = Maps.newHashMap();
    for (Map.Entry<Long, MutableSparseVector> entry : itemVectors.entrySet()) {
      MutableSparseVector tv = entry.getValue();

      // DA FARE Convert this vector to a TF-IDF vector
      for (VectorEntry e : tv.fast()) {
        tv.set(e.getKey(), ((e.getValue() * docFreq.get(e.getKey()))));
      }

      // DA FARE Normalize the TF-IDF vector to be a unit vector
      // HINT The method tv.norm() will give you the Euclidian length of the vector
      tv.multiply(1 / tv.norm());

      // Store a frozen (immutable) version of the vector in the model data.
      modelData.put(entry.getKey(), tv.freeze());
    }

    // we technically don't need the IDF vector anymore, so long as we have no new tags
    return new TFIDFModel(tagIds, modelData);
  }