Java CounterMap.keySet Examples

Programming Language: Java

Namespace/Package Name: java.util

Class/Type: CounterMap

Method/Function: keySet

Examples at hotexamples.com: 3

Java CounterMap.keySet - 3 examples found. These are the top rated real world Java examples of java.util.CounterMap.keySet extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getCount(6)

getCounter(6)

incrementCount(6)

keySet(3)

setCount(3)

containsKey(1)

Example #1

Show file

File: POSTaggerTester.java Project: QimingChen/NLP

 public void train(List<LabeledLocalTrigramContext> labeledLocalTrigramContexts) {
   // collect word-tag counts
   for (LabeledLocalTrigramContext labeledLocalTrigramContext : labeledLocalTrigramContexts) {
     String word = labeledLocalTrigramContext.getCurrentWord();
     String tag = labeledLocalTrigramContext.getCurrentTag();
     if (!wordsToTags.keySet().contains(word)) {
       // word is currently unknown, so tally its tag in the unknown tag counter
       unknownWordTags.incrementCount(tag, 1.0);
     }
     wordsToTags.incrementCount(word, tag, 1.0);
     seenTagTrigrams.add(
         makeTrigramString(
             labeledLocalTrigramContext.getPreviousPreviousTag(),
             labeledLocalTrigramContext.getPreviousTag(),
             labeledLocalTrigramContext.getCurrentTag()));
   }
   wordsToTags = Counters.conditionalNormalize(wordsToTags);
   unknownWordTags = Counters.normalize(unknownWordTags);
 }

Example #2

Show file

File: POSTaggerTester.java Project: QimingChen/NLP

 public Counter<String> getLogScoreCounter(LocalTrigramContext localTrigramContext) {
   int position = localTrigramContext.getPosition();
   String word = localTrigramContext.getWords().get(position);
   Counter<String> tagCounter = unknownWordTags;
   if (wordsToTags.keySet().contains(word)) {
     tagCounter = wordsToTags.getCounter(word);
   }
   Set<String> allowedFollowingTags =
       allowedFollowingTags(
           tagCounter.keySet(),
           localTrigramContext.getPreviousPreviousTag(),
           localTrigramContext.getPreviousTag());
   Counter<String> logScoreCounter = new Counter<String>();
   for (String tag : tagCounter.keySet()) {
     double logScore = Math.log(tagCounter.getCount(tag));
     if (!restrictTrigrams
         || allowedFollowingTags.isEmpty()
         || allowedFollowingTags.contains(tag)) logScoreCounter.setCount(tag, logScore);
   }
   return logScoreCounter;
 }

Example #3

Show file

File: WordAlignmentTester copy.java Project: jgutman/NLP-data-src-code

	  private CounterMap<String,String> trainEM(int maxIterations) {
		  Set<String> englishVocab = new HashSet<String>();
		  Set<String> frenchVocab = new HashSet<String>();
		  
		  CounterMap<String,String> translations = new CounterMap<String,String>();
		  englishVocab.add(NULL);
		  int iteration = 0;
		  final double thresholdProb = 0.0001;
		  
		  for (SentencePair sentencePair : trainingSentencePairs) {
			  List<String> frenchWords = sentencePair.getFrenchWords();
			  List<String> englishWords = sentencePair.getEnglishWords();
			  // add words from list to vocabulary sets
			  englishVocab.addAll(englishWords);
			  frenchVocab.addAll(frenchWords);
		  }
		  System.out.println("Ready");
		  
		  // We need to initialize translations.getCount(f,e) uniformly
		  // t(f|e) summed over all e in {E + NULL} = 1
		  final double initialCount = 1.0 / englishVocab.size();
		  
		  while(iteration < maxIterations) {
			  CounterMap<String,String> counts = new CounterMap<String,String>(); // set count(f|e) to 0 for all e,f
			  Counter<String> totalEnglish = new Counter<String>(); // set total(e) to 0 for all e
			  
			  // E-step: loop over all sentences and update counts
			  for (SentencePair sentencePair : trainingSentencePairs) {
				  List<String> frenchWords = sentencePair.getFrenchWords();
				  List<String> englishWords = sentencePair.getEnglishWords();
				  
			      int numFrenchWords = frenchWords.size();
			      int numEnglishWords = englishWords.size();
			      Counter<String> sTotalF = new Counter<String>(); 
			      
			      // compute normalization constant sTotalF
			      for (int frenchPosition = 0; frenchPosition < numFrenchWords; frenchPosition++) {
			    	  String f = frenchWords.get(frenchPosition);
			    	  // initialize and compute for English = NULL
			    	  if (!translations.containsKey(f) && initialize)
			    		  translations.setCount(f, NULL, initialCount);
			    	  else if (!translations.containsKey(f))
			    		  translations.setCount(f, NULL, thresholdProb);
			    	  sTotalF.incrementCount(f, translations.getCount(f, NULL)); 
			    	  for (int englishPosition = 0; englishPosition < numEnglishWords; englishPosition++) {
			    		  String e = englishWords.get(englishPosition);
			    		  if (!(translations.getCounter(f)).containsKey(e) && initialize)
			    			  translations.setCount(f, e, initialCount);
			    		  else if (!(translations.getCounter(f)).containsKey(e))
			    			  translations.setCount(f, e, thresholdProb);
			    		  sTotalF.incrementCount(f, translations.getCount(f, e));
			    	  }
			      }
			      
			      // collect counts in counts and totalEnglish
			      for (int frenchPosition = 0; frenchPosition < numFrenchWords; frenchPosition++) {
			    	  String f = frenchWords.get(frenchPosition);
			    	  
			    	  // collect counts for English = NULL
			    	  double count = translations.getCount(f, NULL) / sTotalF.getCount(f);
			    	  counts.incrementCount(NULL, f, count);
			    	  totalEnglish.incrementCount(NULL, count);
			    	  for (int englishPosition = 0; englishPosition < numEnglishWords; englishPosition++) {
			    		  String e = englishWords.get(englishPosition);
			    		  count = translations.getCount(f, e) / sTotalF.getCount(f);
			    		  counts.incrementCount(e, f, count);
			    		  totalEnglish.incrementCount(e, count);
			    	  }
			      }
			  } // end of E-step
			  System.out.println("Completed E-step");
			  
			  // M-step: update probabilities with counts from E-step and check for convergence
			  iteration++;
			  for (String e : counts.keySet()) {//englishVocab) {
				  double normalizer = totalEnglish.getCount(e);
				  for (String f : (counts.getCounter(e)).keySet()) {//frenchVocab) {
					  
					  // To speed implementation, we want to update translations only when count / normalizer > threshold
					  double prob = counts.getCount(e, f) / normalizer;
					  if (!initialize) {					  
						  if (prob > thresholdProb)
							  translations.setCount(f, e, prob);
						  else
							  (translations.getCounter(f)).removeKey(e);
					  }
					  else {
						  translations.setCount(f, e, prob);
					  }
				  }
			  }
			  System.out.println("Completed iteration " + iteration);
		  } // end of M-step
		  
		  System.out.println("Trained!");
		  return translations;
	  }