public TokensRegexNERAnnotator(String name, Properties properties) { String prefix = (name != null && !name.isEmpty()) ? name + '.' : ""; String backgroundSymbol = properties.getProperty(prefix + "backgroundSymbol", DEFAULT_BACKGROUND_SYMBOL); String[] backgroundSymbols = backgroundSymbol.split("\\s*,\\s*"); String mappingFiles = properties.getProperty(prefix + "mapping", DefaultPaths.DEFAULT_REGEXNER_RULES); String[] mappings = mappingFiles.split("\\s*[,;]\\s*"); String validPosRegex = properties.getProperty(prefix + "validpospattern"); this.posMatchType = PosMatchType.valueOf( properties.getProperty(prefix + "posmatchtype", DEFAULT_POS_MATCH_TYPE.name())); String noDefaultOverwriteLabelsProp = properties.getProperty(prefix + "noDefaultOverwriteLabels"); this.noDefaultOverwriteLabels = (noDefaultOverwriteLabelsProp != null) ? Collections.unmodifiableSet( CollectionUtils.asSet(noDefaultOverwriteLabelsProp.split("\\s*,\\s*"))) : Collections.unmodifiableSet(new HashSet<>()); this.ignoreCase = PropertiesUtils.getBool(properties, prefix + "ignorecase", false); this.verbose = PropertiesUtils.getBool(properties, prefix + "verbose", false); if (validPosRegex != null && !validPosRegex.isEmpty()) { validPosPattern = Pattern.compile(validPosRegex); } else { validPosPattern = null; } entries = Collections.unmodifiableList( readEntries(name, noDefaultOverwriteLabels, ignoreCase, verbose, mappings)); IdentityHashMap<SequencePattern<CoreMap>, Entry> patternToEntry = new IdentityHashMap<>(); multiPatternMatcher = createPatternMatcher(patternToEntry); this.patternToEntry = Collections.unmodifiableMap(patternToEntry); Set<String> myLabels = Generics.newHashSet(); // Can always override background or none. Collections.addAll(myLabels, backgroundSymbols); myLabels.add(null); // Always overwrite labels for (Entry entry : entries) myLabels.add(entry.type); this.myLabels = Collections.unmodifiableSet(myLabels); }