Java MutableSparseVector.get примеры использования

Язык программирования: Java

Пространство имен/Пакет: org.grouplens.lenskit.vectors

Класс/Тип: MutableSparseVector

Метод/Функция: get

Примеров на hotexamples.com: 5

Java MutableSparseVector.get - 5 примеров найдено. Это лучшие примеры Java кода для org.grouplens.lenskit.vectors.MutableSparseVector.get, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

create(7)

wrap(6)

set(6)

fast(5)

get(5)

multiply(4)

fill(3)

freeze(3)

norm(3)

shrinkDomain(2)

addChannel(1)

keySet(1)

keyDomain(1)

getOrAddChannelVector(1)

addScaled(1)

clear(1)

getOrAddChannel(1)

Пример #1

Показать файл

Файл: SVDModelBuilder.java Проект: pippobaudos/coursera-intro-recommendation-systems

  /**
   * Build a rating matrix from the rating data. Each user's ratings are first normalized by
   * subtracting a baseline score (usually a mean).
   *
   * @param userMapping The index mapping of user IDs to column numbers.
   * @param itemMapping The index mapping of item IDs to row numbers.
   * @return A matrix storing the <i>normalized</i> user ratings.
   */
  private RealMatrix createRatingMatrix(IdIndexMapping userMapping, IdIndexMapping itemMapping) {
    final int nusers = userMapping.size();
    final int nitems = itemMapping.size();

    // Create a matrix with users on rows and items on columns
    logger.info("creating {} by {} rating matrix", nusers, nitems);
    RealMatrix matrix = MatrixUtils.createRealMatrix(nusers, nitems);

    // populate it with data
    Cursor<UserHistory<Event>> users = userEventDAO.streamEventsByUser();
    try {
      for (UserHistory<Event> user : users) {
        // Get the row number for this user
        int u = userMapping.getIndex(user.getUserId());
        MutableSparseVector ratings = Ratings.userRatingVector(user.filter(Rating.class));
        MutableSparseVector baselines = MutableSparseVector.create(ratings.keySet());
        baselineScorer.score(user.getUserId(), baselines);
        // TODO Populate this user's row with their ratings, minus the baseline scores
        for (VectorEntry entry : ratings.fast(State.SET)) {
          long itemid = entry.getKey();
          int i = itemMapping.getIndex(itemid);
          double rating = entry.getValue();
          double baseline = baselines.get(itemid);
          matrix.setEntry(u, i, rating - baseline);
        }
      }
    } finally {
      users.close();
    }

    return matrix;
  }

Пример #2

Показать файл

Файл: OrdRecRatingPredictor.java Проект: Elkfrawy/lenskit

  @Override
  public void predict(long uid, @Nonnull MutableSparseVector predictions) {
    logger.debug("predicting {} items for {}", predictions.keyDomain().size(), uid);
    OrdRecModel params = new OrdRecModel(quantizer);
    SparseVector ratings = makeUserVector(uid, userEventDao);
    LongSet keySet = LongUtils.setUnion(ratings.keySet(), predictions.keyDomain());
    MutableSparseVector scores = MutableSparseVector.create(keySet);
    itemScorer.score(uid, scores);
    params.train(ratings, scores);
    logger.debug("trained parameters for {}: {}", uid, params);

    Vector probabilities = Vector.createLength(params.getLevelCount());
    Long2ObjectMap<IVector> distChannel = null;
    if (reportDistribution) {
      distChannel = predictions.addChannel(RATING_PROBABILITY_CHANNEL);
    }

    for (VectorEntry e : predictions.fast(VectorEntry.State.EITHER)) {
      long iid = e.getKey();
      double score = scores.get(iid);
      params.getProbDistribution(score, probabilities);

      int mlIdx = probabilities.maxElementIndex();

      predictions.set(e, quantizer.getIndexValue(mlIdx));
      if (distChannel != null) {
        distChannel.put(e.getKey(), probabilities.immutable());
      }
    }
  }

Пример #3

Показать файл

Файл: OrdRecRatingPredictor.java Проект: Elkfrawy/lenskit

    /** The train function of OrdRec. Get all parameters after learning process. */
    @SuppressWarnings("ConstantConditions")
    private void train(SparseVector ratings, MutableSparseVector scores) {

      Vector dbeta = Vector.createLength(beta.length());
      double dt1;
      // n is the number of iteration;
      for (int j = 0; j < iterationCount; j++) {
        for (VectorEntry rating : ratings.fast()) {
          long iid = rating.getKey();
          double score = scores.get(iid);
          int r = quantizer.index(rating.getValue());

          double probEqualR = getProbEQ(score, r);
          double probLessR = getProbLE(score, r);
          double probLessR_1 = getProbLE(score, r - 1);

          dt1 =
              learningRate
                  / probEqualR
                  * (probLessR * (1 - probLessR) * derivateOfBeta(r, 0, t1)
                      - probLessR_1 * (1 - probLessR_1) * derivateOfBeta(r - 1, 0, t1)
                      - regTerm * t1);

          double dbetaK;
          for (int k = 0; k < beta.length(); k++) {
            dbetaK =
                learningRate
                    / probEqualR
                    * (probLessR * (1 - probLessR) * derivateOfBeta(r, k + 1, beta.get(k))
                        - probLessR_1
                            * (1 - probLessR_1)
                            * derivateOfBeta(r - 1, k + 1, beta.get(k))
                        - regTerm * beta.get(k));
            dbeta.set(k, dbetaK);
          }
          t1 = t1 + dt1;
          beta.add(dbeta);
        }
      }
    }

Пример #4

Показать файл

Файл: TFIDFModelBuilder.java Проект: jieshan/myNewTag

  /**
   * This method is where the model should actually be computed.
   *
   * @return The TF-IDF model (a model of item tag vectors).
   */
  @Override
  public TFIDFModel get() {
    // Build a map of tags to numeric IDs.  This lets you convert tags (which are strings)
    // into long IDs that you can use as keys in a tag vector.
    Map<String, Long> tagIds = buildTagIdMap();

    // Create a vector to accumulate document frequencies for the IDF computation
    MutableSparseVector docFreq = MutableSparseVector.create(tagIds.values());
    docFreq.fill(0);

    // We now proceed in 2 stages. First, we build a TF vector for each item.
    // While we do this, we also build the DF vector.
    // We will then apply the IDF to each TF vector and normalize it to a unit vector.

    // Create a map to store the item TF vectors.
    Map<Long, MutableSparseVector> itemVectors = Maps.newHashMap();

    // Create a work vector to accumulate each item's tag vector.
    // This vector will be re-used for each item.
    MutableSparseVector work = MutableSparseVector.create(tagIds.values());

    // Iterate over the items to compute each item's vector.
    LongSet items = dao.getItemIds();
    for (long item : items) {
      // Reset the work vector for this item's tags.
      // work.clear();
      work.fill(0);
      // Now the vector is empty (all keys are 'unset').

      HashMap<String, Integer> DFcount = new HashMap<String, Integer>();
      // TODO Populate the work vector with the number of times each tag is applied to this item.
      // TODO Increment the document frequency vector once for each unique tag on the item.
      List<String> tags = dao.getItemTags(item);
      // System.out.println(tags.toString());
      for (String tag : tags) {
        // System.out.println(tag);
        // System.out.println(tagIds.get(tag));
        // System.out.println(work.size());

        work.set(tagIds.get(tag), work.get(tagIds.get(tag)) + 1);
        if (!DFcount.containsKey(tag)) {
          DFcount.put(tag, 1);
          docFreq.set(tagIds.get(tag), docFreq.get(tagIds.get(tag)) + 1);
        }
      }

      /*for(VectorEntry e: work.fast()){
      	if(e.getValue() == 0){
      		work.unset(e.getKey());
      	}
      }*/

      // Save a shrunk copy of the vector (only storing tags that apply to this item) in
      // our map, we'll add IDF and normalize later.
      itemVectors.put(item, work.shrinkDomain());
      // work is ready to be reset and re-used for the next item
    }

    // Now we've seen all the items, so we have each item's TF vector and a global vector
    // of document frequencies.
    // Invert and log the document frequency.  We can do this in-place.

    for (VectorEntry e : docFreq.fast()) {
      // TODO Update this document frequency entry to be a log-IDF value
      docFreq.set(e, Math.log(items.size() * 1.0 / e.getValue()));
    }

    // Now docFreq is a log-IDF vector.
    // So we can use it to apply IDF to each item vector to put it in the final model.
    // Create a map to store the final model data.
    Map<Long, SparseVector> modelData = Maps.newHashMap();
    for (Map.Entry<Long, MutableSparseVector> entry : itemVectors.entrySet()) {
      MutableSparseVector tv = entry.getValue();
      // TODO Convert this vector to a TF-IDF vector
      for (Long i : tagIds.values()) {
        tv.set(i, tv.get(i) * docFreq.get(i));
      }

      // TODO Normalize the TF-IDF vector to be a unit vector
      // HINT The method tv.norm() will give you the Euclidian length of the vector
      tv.multiply(1.0 / tv.norm());

      // Store a frozen (immutable) version of the vector in the model data.
      modelData.put(entry.getKey(), tv.freeze());
    }

    // we technically don't need the IDF vector anymore, so long as we have no new tags
    return new TFIDFModel(tagIds, modelData);
  }

Пример #5

Показать файл

Файл: TFIDFModelBuilder.java Проект: paolobarbaglia/coursera_recommender_systems

  /**
   * This method is where the model should actually be computed.
   *
   * @return The TF-IDF model (a model of item tag vectors).
   */
  @Override
  public TFIDFModel get() {
    // Build a map of tags to numeric IDs.  This lets you convert tags (which are strings)
    // into long IDs that you can use as keys in a tag vector.
    Map<String, Long> tagIds = buildTagIdMap();

    // Create a vector to accumulate document frequencies for the IDF computation
    MutableSparseVector docFreq = MutableSparseVector.create(tagIds.values());
    docFreq.fill(0);

    // We now proceed in 2 stages. First, we build a TF vector for each item.
    // While we do this, we also build the DF vector.
    // We will then apply the IDF to each TF vector and normalize it to a unit vector.

    // Create a map to store the item TF vectors.
    Map<Long, MutableSparseVector> itemVectors = Maps.newHashMap();

    // Create a work vector to accumulate each item's tag vector.
    // This vector will be re-used for each item.
    MutableSparseVector work = MutableSparseVector.create(tagIds.values());

    // Iterate over the items to compute each item's vector.
    LongSet items = dao.getItemIds();

    for (long item : items) {
      // Reset the work vector for this item's tags.
      work.clear();
      // Now the vector is empty (all keys are 'unset').

      List<String> hashtag = new ArrayList<String>();

      for (String tag : dao.getItemTags(item)) {

        Long id = tagIds.get(tag);

        try {
          // if id is not in the key set, throw the Exception.
          work.set(id, work.get(id) + 1);

        } catch (Exception e) {
          // if you catch the Exception, which means that id has not been set yet.
          work.set(id, 1.0); // use set method to "set" the Key
        }

        if (!hashtag.contains(tag)) {
          docFreq.set(id, docFreq.get(id) + 1);
          hashtag.add(tag);
        }
      }

      // Save a shrunk copy of the vector (only storing tags that apply to this item) in
      // our map, we'll add IDF and normalize later.
      itemVectors.put(item, work.shrinkDomain());
      // work is ready to be reset and re-used for the next item

    }

    // Now we've seen all the items, so we have each item's TF vector and a global vector
    // of document frequencies.
    // Invert and log the document frequency.  We can do this in-place.
    for (VectorEntry e : docFreq.fast()) {

      docFreq.set(e.getKey(), Math.log(items.size() / e.getValue()));
    }

    // Now docFreq is a log-IDF vector.
    // So we can use it to apply IDF to each item vector to put it in the final model.
    // Create a map to store the final model data.
    Map<Long, SparseVector> modelData = Maps.newHashMap();
    for (Map.Entry<Long, MutableSparseVector> entry : itemVectors.entrySet()) {
      MutableSparseVector tv = entry.getValue();

      // DA FARE Convert this vector to a TF-IDF vector
      for (VectorEntry e : tv.fast()) {
        tv.set(e.getKey(), ((e.getValue() * docFreq.get(e.getKey()))));
      }

      // DA FARE Normalize the TF-IDF vector to be a unit vector
      // HINT The method tv.norm() will give you the Euclidian length of the vector
      tv.multiply(1 / tv.norm());

      // Store a frozen (immutable) version of the vector in the model data.
      modelData.put(entry.getKey(), tv.freeze());
    }

    // we technically don't need the IDF vector anymore, so long as we have no new tags
    return new TFIDFModel(tagIds, modelData);
  }