private Set<Integer> negativeSampleContexts(int wordIndex) { Set<Integer> negativeContexts = new HashSet<Integer>(); Set<Integer> positiveContexts = contextPairs.get(wordIndex); while (negativeContexts.size() < kSamples) { int contextIndex = (int) (Math.random() * V); if (!positiveContexts.contains(contextIndex) && negativeContexts.contains(contextIndex)) { negativeContexts.add(contextIndex); } } return negativeContexts; }
/** * @author jacqueline If boolean sampleUnigram = true, we use noiseSampler from * randomContextGeneration to model the unigram probability distribution raised to specfied * power, default 3/4. Otherwise, use overloaded negativeSampleContexts(int wordIndex) * method to draw from uniform probability distribution. */ private Set<Integer> negativeSampleContexts( int wordIndex, EnumeratedDistribution<String> weightedRandomSample) { Set<Integer> negativeContexts = new HashSet<Integer>(); Set<Integer> positiveContexts = contextPairs.get(wordIndex); while (negativeContexts.size() < kSamples) { String possibleContext = weightedRandomSample.sample(); int contextIndex = encodedVocab.get(possibleContext); if (!positiveContexts.contains(contextIndex) && !negativeContexts.contains(contextIndex)) { negativeContexts.add(contextIndex); } } return negativeContexts; }