Пример #1
0
 /**
  * This method creates a list of trivially HierarchicalPhrases (i.e. they're really just
  * contiguous phrases, but we will want to perform some of the HierarchialPhrase operations on
  * them). Sorts the positions. Adds the results to the cache.
  *
  * <p>The construction of more complex hierarchical phrases is handled within the prefix tree.
  *
  * <p>This method performs deterministic sampling, as described in Lopez (2008) p59:
  *
  * <blockquote>
  *
  * To resolve this issue, we used deterministic sampling. Whenever a source phrase occurs more
  * frequently than the maximum sample size, we take our samples at uniform intervals over the set
  * of locations returned by the suffix array. With this strategy in place, hypotheses receive the
  * same feature weights between different runs of the decoder, the results are deterministic, and
  * the MERT algorithm converges at the same rate as it does without sampling.
  *
  * </blockquote>
  *
  * @param startPositions an unsorted list of the positions in the corpus where the matched phrases
  *     begin
  * @param pattern a contiguous phrase
  * @return a list of trivially hierarchical phrases
  */
 protected HierarchicalPhrases createHierarchicalPhrases(
     int[] startPositions, Pattern pattern, PrefixTree prefixTree) {
   if (startPositions == null) {
     return HierarchicalPhrases.emptyList(prefixTree);
   } else if (hierarchicalPhraseCache.containsKey(pattern)) {
     return hierarchicalPhraseCache.get(pattern);
   } else {
     Arrays.sort(startPositions);
     //			int length = pattern.size();
     HierarchicalPhrases hierarchicalPhrases =
         new HierarchicalPhrases(pattern, startPositions, prefixTree);
     //			ArrayList<HierarchicalPhrase> hierarchicalPhrases = new
     // ArrayList<HierarchicalPhrase>(startPositions.length);
     //
     //			int step = //(startPositions.length<sampleSize) ? 1 : startPositions.length / sampleSize;
     //				1;
     //			for(int i = 0; i < startPositions.length; i+=step) {
     //				int[] position = {startPositions[i]};
     //				int[] endPosition = {startPositions[i] + length};
     //				HierarchicalPhrase hierarchicalPhrase = new HierarchicalPhrase(pattern, position,
     // endPosition, corpus, length);
     //				hierarchicalPhrases.add(hierarchicalPhrase);
     //			}
     hierarchicalPhraseCache.put(pattern, hierarchicalPhrases);
     return hierarchicalPhrases;
   }
 }
Пример #2
0
 /**
  * @return a list of hierarchical phrases that match the pattern if they are already cached or
  *     null if the pattern is not in the cache.
  */
 public HierarchicalPhrases getMatchingPhrases(Pattern pattern) {
   return hierarchicalPhraseCache.get(pattern);
 }