Ejemplo n.º 1
0
    private Set<Integer> negativeSampleContexts(int wordIndex) {
      Set<Integer> negativeContexts = new HashSet<Integer>();
      Set<Integer> positiveContexts = contextPairs.get(wordIndex);

      while (negativeContexts.size() < kSamples) {
        int contextIndex = (int) (Math.random() * V);
        if (!positiveContexts.contains(contextIndex) && negativeContexts.contains(contextIndex)) {
          negativeContexts.add(contextIndex);
        }
      }
      return negativeContexts;
    }
Ejemplo n.º 2
0
    /**
     * @author jacqueline If boolean sampleUnigram = true, we use noiseSampler from
     *     randomContextGeneration to model the unigram probability distribution raised to specfied
     *     power, default 3/4. Otherwise, use overloaded negativeSampleContexts(int wordIndex)
     *     method to draw from uniform probability distribution.
     */
    private Set<Integer> negativeSampleContexts(
        int wordIndex, EnumeratedDistribution<String> weightedRandomSample) {
      Set<Integer> negativeContexts = new HashSet<Integer>();
      Set<Integer> positiveContexts = contextPairs.get(wordIndex);

      while (negativeContexts.size() < kSamples) {
        String possibleContext = weightedRandomSample.sample();
        int contextIndex = encodedVocab.get(possibleContext);
        if (!positiveContexts.contains(contextIndex) && !negativeContexts.contains(contextIndex)) {
          negativeContexts.add(contextIndex);
        }
      }
      return negativeContexts;
    }