コード例 #1
0
ファイル: CollectionUtils.java プロジェクト: Balkanlii/nlp
 /**
  * Returns a list of all modes in the Collection. (If the Collection has multiple items with the
  * highest frequency, all of them will be returned.)
  */
 public static <T> Set<T> modes(Collection<T> values) {
   Counter<T> counter = new ClassicCounter<T>(values);
   List<Double> sortedCounts = CollectionUtils.sorted(counter.values());
   Double highestCount = sortedCounts.get(sortedCounts.size() - 1);
   Counters.retainAbove(counter, highestCount);
   return counter.keySet();
 }
コード例 #2
0
  public TokensRegexNERAnnotator(String name, Properties properties) {
    String prefix = (name != null && !name.isEmpty()) ? name + '.' : "";
    String backgroundSymbol =
        properties.getProperty(prefix + "backgroundSymbol", DEFAULT_BACKGROUND_SYMBOL);
    String[] backgroundSymbols = backgroundSymbol.split("\\s*,\\s*");
    String mappingFiles =
        properties.getProperty(prefix + "mapping", DefaultPaths.DEFAULT_REGEXNER_RULES);
    String[] mappings = mappingFiles.split("\\s*[,;]\\s*");
    String validPosRegex = properties.getProperty(prefix + "validpospattern");
    this.posMatchType =
        PosMatchType.valueOf(
            properties.getProperty(prefix + "posmatchtype", DEFAULT_POS_MATCH_TYPE.name()));

    String noDefaultOverwriteLabelsProp =
        properties.getProperty(prefix + "noDefaultOverwriteLabels");
    this.noDefaultOverwriteLabels =
        (noDefaultOverwriteLabelsProp != null)
            ? Collections.unmodifiableSet(
                CollectionUtils.asSet(noDefaultOverwriteLabelsProp.split("\\s*,\\s*")))
            : Collections.unmodifiableSet(new HashSet<>());
    this.ignoreCase = PropertiesUtils.getBool(properties, prefix + "ignorecase", false);
    this.verbose = PropertiesUtils.getBool(properties, prefix + "verbose", false);

    if (validPosRegex != null && !validPosRegex.isEmpty()) {
      validPosPattern = Pattern.compile(validPosRegex);
    } else {
      validPosPattern = null;
    }
    entries =
        Collections.unmodifiableList(
            readEntries(name, noDefaultOverwriteLabels, ignoreCase, verbose, mappings));
    IdentityHashMap<SequencePattern<CoreMap>, Entry> patternToEntry = new IdentityHashMap<>();
    multiPatternMatcher = createPatternMatcher(patternToEntry);
    this.patternToEntry = Collections.unmodifiableMap(patternToEntry);
    Set<String> myLabels = Generics.newHashSet();
    // Can always override background or none.
    Collections.addAll(myLabels, backgroundSymbols);
    myLabels.add(null);
    // Always overwrite labels
    for (Entry entry : entries) myLabels.add(entry.type);
    this.myLabels = Collections.unmodifiableSet(myLabels);
  }
コード例 #3
0
 public List<CoreMap> getMergedList(int... groups) {
   List<CoreMap> res = new ArrayList<CoreMap>();
   int last = 0;
   List<Integer> orderedGroups = CollectionUtils.asList(groups);
   Collections.sort(orderedGroups);
   for (int group : orderedGroups) {
     int groupStart = start(group);
     if (groupStart >= last) {
       res.addAll(elements.subList(last, groupStart));
       int groupEnd = end(group);
       if (groupEnd - groupStart >= 1) {
         CoreMap merged = createMergedChunk(groupStart, groupEnd);
         res.add(merged);
         last = groupEnd;
       }
     }
   }
   res.addAll(elements.subList(last, elements.size()));
   return res;
 }
コード例 #4
0
  public void testSeparators() {
    Tree tree = convertTree(commaTreeString);
    List<Transition> transitions = CreateTransitionSequence.createTransitionSequence(tree, true);
    List<String> expectedTransitions =
        Arrays.asList(
            new String[] {
              "Shift",
              "Shift",
              "Shift",
              "Shift",
              "RightBinary(@ADJP)",
              "RightBinary(ADJP)",
              "Shift",
              "RightBinary(@NP)",
              "RightBinary(NP)",
              "CompoundUnary([ROOT, FRAG])",
              "Finalize",
              "Idle"
            });
    assertEquals(
        expectedTransitions,
        CollectionUtils.transformAsList(
            transitions,
            new Function<Transition, String>() {
              public String apply(Transition t) {
                return t.toString();
              }
            }));

    String expectedSeparators = "[{2=,}]";

    State state = ShiftReduceParser.initialStateFromGoldTagTree(tree);
    assertEquals(1, state.separators.size());
    assertEquals(2, state.separators.firstKey().intValue());
    assertEquals(",", state.separators.get(2));
  }
コード例 #5
0
    public SequenceMatchResult<CoreMap> apply(
        SequenceMatchResult<CoreMap> matchResult, int... groups) {
      BasicSequenceMatchResult<CoreMap> res = matchResult.toBasicSequenceMatchResult();

      List<? extends CoreMap> elements = matchResult.elements();
      List<CoreMap> mergedElements = new ArrayList<CoreMap>();
      res.elements = mergedElements;

      int last = 0;
      int mergedGroup = 0;
      int offset = 0;
      List<Integer> orderedGroups = CollectionUtils.asList(groups);
      Collections.sort(orderedGroups);
      for (int group : orderedGroups) {
        int groupStart = matchResult.start(group);
        if (groupStart >= last) {
          // Add elements from last to start of group to merged elements
          mergedElements.addAll(elements.subList(last, groupStart));
          // Fiddle with matched group indices
          for (; mergedGroup < group; mergedGroup++) {
            if (res.matchedGroups[mergedGroup] != null) {
              res.matchedGroups[mergedGroup].matchBegin -= offset;
              res.matchedGroups[mergedGroup].matchEnd -= offset;
            }
          }
          // Get merged element
          int groupEnd = matchResult.end(group);
          if (groupEnd - groupStart >= 1) {
            CoreMap merged = aggregator.merge(elements, groupStart, groupEnd);
            mergedElements.add(merged);
            last = groupEnd;

            // Fiddle with matched group indices
            res.matchedGroups[mergedGroup].matchBegin = mergedElements.size() - 1;
            res.matchedGroups[mergedGroup].matchEnd = mergedElements.size();
            mergedGroup++;
            while (mergedGroup < res.matchedGroups.length) {
              if (res.matchedGroups[mergedGroup] != null) {
                if (res.matchedGroups[mergedGroup].matchBegin == matchResult.start(group)
                    && res.matchedGroups[mergedGroup].matchEnd == matchResult.end(group)) {
                  res.matchedGroups[mergedGroup].matchBegin = res.matchedGroups[group].matchBegin;
                  res.matchedGroups[mergedGroup].matchEnd = res.matchedGroups[group].matchEnd;
                } else if (res.matchedGroups[mergedGroup].matchEnd <= matchResult.end(group)) {
                  res.matchedGroups[mergedGroup] = null;
                } else {
                  break;
                }
              }
              mergedGroup++;
            }
            offset = matchResult.end(group) - res.matchedGroups[group].matchEnd;
          }
        }
      }
      // Add rest of elements
      mergedElements.addAll(elements.subList(last, elements.size()));
      // Fiddle with matched group indices
      for (; mergedGroup < res.matchedGroups.length; mergedGroup++) {
        if (res.matchedGroups[mergedGroup] != null) {
          res.matchedGroups[mergedGroup].matchBegin -= offset;
          res.matchedGroups[mergedGroup].matchEnd -= offset;
        }
      }
      return res;
    }