/** * Returns a list of all modes in the Collection. (If the Collection has multiple items with the * highest frequency, all of them will be returned.) */ public static <T> Set<T> modes(Collection<T> values) { Counter<T> counter = new ClassicCounter<T>(values); List<Double> sortedCounts = CollectionUtils.sorted(counter.values()); Double highestCount = sortedCounts.get(sortedCounts.size() - 1); Counters.retainAbove(counter, highestCount); return counter.keySet(); }
public TokensRegexNERAnnotator(String name, Properties properties) { String prefix = (name != null && !name.isEmpty()) ? name + '.' : ""; String backgroundSymbol = properties.getProperty(prefix + "backgroundSymbol", DEFAULT_BACKGROUND_SYMBOL); String[] backgroundSymbols = backgroundSymbol.split("\\s*,\\s*"); String mappingFiles = properties.getProperty(prefix + "mapping", DefaultPaths.DEFAULT_REGEXNER_RULES); String[] mappings = mappingFiles.split("\\s*[,;]\\s*"); String validPosRegex = properties.getProperty(prefix + "validpospattern"); this.posMatchType = PosMatchType.valueOf( properties.getProperty(prefix + "posmatchtype", DEFAULT_POS_MATCH_TYPE.name())); String noDefaultOverwriteLabelsProp = properties.getProperty(prefix + "noDefaultOverwriteLabels"); this.noDefaultOverwriteLabels = (noDefaultOverwriteLabelsProp != null) ? Collections.unmodifiableSet( CollectionUtils.asSet(noDefaultOverwriteLabelsProp.split("\\s*,\\s*"))) : Collections.unmodifiableSet(new HashSet<>()); this.ignoreCase = PropertiesUtils.getBool(properties, prefix + "ignorecase", false); this.verbose = PropertiesUtils.getBool(properties, prefix + "verbose", false); if (validPosRegex != null && !validPosRegex.isEmpty()) { validPosPattern = Pattern.compile(validPosRegex); } else { validPosPattern = null; } entries = Collections.unmodifiableList( readEntries(name, noDefaultOverwriteLabels, ignoreCase, verbose, mappings)); IdentityHashMap<SequencePattern<CoreMap>, Entry> patternToEntry = new IdentityHashMap<>(); multiPatternMatcher = createPatternMatcher(patternToEntry); this.patternToEntry = Collections.unmodifiableMap(patternToEntry); Set<String> myLabels = Generics.newHashSet(); // Can always override background or none. Collections.addAll(myLabels, backgroundSymbols); myLabels.add(null); // Always overwrite labels for (Entry entry : entries) myLabels.add(entry.type); this.myLabels = Collections.unmodifiableSet(myLabels); }
public List<CoreMap> getMergedList(int... groups) { List<CoreMap> res = new ArrayList<CoreMap>(); int last = 0; List<Integer> orderedGroups = CollectionUtils.asList(groups); Collections.sort(orderedGroups); for (int group : orderedGroups) { int groupStart = start(group); if (groupStart >= last) { res.addAll(elements.subList(last, groupStart)); int groupEnd = end(group); if (groupEnd - groupStart >= 1) { CoreMap merged = createMergedChunk(groupStart, groupEnd); res.add(merged); last = groupEnd; } } } res.addAll(elements.subList(last, elements.size())); return res; }
public void testSeparators() { Tree tree = convertTree(commaTreeString); List<Transition> transitions = CreateTransitionSequence.createTransitionSequence(tree, true); List<String> expectedTransitions = Arrays.asList( new String[] { "Shift", "Shift", "Shift", "Shift", "RightBinary(@ADJP)", "RightBinary(ADJP)", "Shift", "RightBinary(@NP)", "RightBinary(NP)", "CompoundUnary([ROOT, FRAG])", "Finalize", "Idle" }); assertEquals( expectedTransitions, CollectionUtils.transformAsList( transitions, new Function<Transition, String>() { public String apply(Transition t) { return t.toString(); } })); String expectedSeparators = "[{2=,}]"; State state = ShiftReduceParser.initialStateFromGoldTagTree(tree); assertEquals(1, state.separators.size()); assertEquals(2, state.separators.firstKey().intValue()); assertEquals(",", state.separators.get(2)); }
public SequenceMatchResult<CoreMap> apply( SequenceMatchResult<CoreMap> matchResult, int... groups) { BasicSequenceMatchResult<CoreMap> res = matchResult.toBasicSequenceMatchResult(); List<? extends CoreMap> elements = matchResult.elements(); List<CoreMap> mergedElements = new ArrayList<CoreMap>(); res.elements = mergedElements; int last = 0; int mergedGroup = 0; int offset = 0; List<Integer> orderedGroups = CollectionUtils.asList(groups); Collections.sort(orderedGroups); for (int group : orderedGroups) { int groupStart = matchResult.start(group); if (groupStart >= last) { // Add elements from last to start of group to merged elements mergedElements.addAll(elements.subList(last, groupStart)); // Fiddle with matched group indices for (; mergedGroup < group; mergedGroup++) { if (res.matchedGroups[mergedGroup] != null) { res.matchedGroups[mergedGroup].matchBegin -= offset; res.matchedGroups[mergedGroup].matchEnd -= offset; } } // Get merged element int groupEnd = matchResult.end(group); if (groupEnd - groupStart >= 1) { CoreMap merged = aggregator.merge(elements, groupStart, groupEnd); mergedElements.add(merged); last = groupEnd; // Fiddle with matched group indices res.matchedGroups[mergedGroup].matchBegin = mergedElements.size() - 1; res.matchedGroups[mergedGroup].matchEnd = mergedElements.size(); mergedGroup++; while (mergedGroup < res.matchedGroups.length) { if (res.matchedGroups[mergedGroup] != null) { if (res.matchedGroups[mergedGroup].matchBegin == matchResult.start(group) && res.matchedGroups[mergedGroup].matchEnd == matchResult.end(group)) { res.matchedGroups[mergedGroup].matchBegin = res.matchedGroups[group].matchBegin; res.matchedGroups[mergedGroup].matchEnd = res.matchedGroups[group].matchEnd; } else if (res.matchedGroups[mergedGroup].matchEnd <= matchResult.end(group)) { res.matchedGroups[mergedGroup] = null; } else { break; } } mergedGroup++; } offset = matchResult.end(group) - res.matchedGroups[group].matchEnd; } } } // Add rest of elements mergedElements.addAll(elements.subList(last, elements.size())); // Fiddle with matched group indices for (; mergedGroup < res.matchedGroups.length; mergedGroup++) { if (res.matchedGroups[mergedGroup] != null) { res.matchedGroups[mergedGroup].matchBegin -= offset; res.matchedGroups[mergedGroup].matchEnd -= offset; } } return res; }