private MultiPatternMatcher<CoreMap> createPatternMatcher( Map<SequencePattern<CoreMap>, Entry> patternToEntry) { // Convert to tokensregex pattern int patternFlags = ignoreCase ? Pattern.CASE_INSENSITIVE : 0; int stringMatchFlags = ignoreCase ? NodePattern.CASE_INSENSITIVE : 0; Env env = TokenSequencePattern.getNewEnv(); env.setDefaultStringPatternFlags(patternFlags); env.setDefaultStringMatchFlags(stringMatchFlags); NodePattern<String> posTagPattern = (validPosPattern != null && PosMatchType.MATCH_ALL_TOKENS.equals(posMatchType)) ? new CoreMapNodePattern.StringAnnotationRegexPattern(validPosPattern) : null; List<TokenSequencePattern> patterns = new ArrayList<>(entries.size()); for (Entry entry : entries) { TokenSequencePattern pattern; if (entry.tokensRegex != null) { // TODO: posTagPatterns... pattern = TokenSequencePattern.compile(env, entry.tokensRegex); } else { List<SequencePattern.PatternExpr> nodePatterns = new ArrayList<>(); for (String p : entry.regex) { CoreMapNodePattern c = CoreMapNodePattern.valueOf(p, patternFlags); if (posTagPattern != null) { c.add(CoreAnnotations.PartOfSpeechAnnotation.class, posTagPattern); } nodePatterns.add(new SequencePattern.NodePatternExpr(c)); } pattern = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(nodePatterns)); } if (entry.annotateGroup < 0 || entry.annotateGroup > pattern.getTotalGroups()) { throw new RuntimeException("Invalid match group for entry " + entry); } pattern.setPriority(entry.priority); patterns.add(pattern); patternToEntry.put(pattern, entry); } return TokenSequencePattern.getMultiPatternMatcher(patterns); }
@Override protected List<Attribute> init( AbstractDefinition abstractDefinition, ExpressionExecutor[] attributeExpressionExecutors, ExecutionPlanContext executionPlanContext) { if (logger.isDebugEnabled()) { logger.debug("Initializing Query ..."); } if (attributeExpressionLength < 2) { throw new ExecutionPlanCreationException( "Query expects at least two parameters. Received only " + attributeExpressionLength + ".\nUsage: #nlp.findTokensRegexPattern(regex:string, text:string-variable)"); } String regex; try { if (attributeExpressionExecutors[0] instanceof ConstantExpressionExecutor) { regex = (String) attributeExpressionExecutors[0].execute(null); } else { throw new ExecutionPlanCreationException( "First parameter should be a constant." + ".\nUsage: #nlp.findTokensRegexPattern(regex:string, text:string-variable)"); } } catch (ClassCastException e) { throw new ExecutionPlanCreationException( "First parameter should be of type string. Found " + attributeExpressionExecutors[0].getReturnType() + ".\nUsage: #nlp.findTokensRegexPattern(regex:string, text:string-variable)"); } try { regexPattern = TokenSequencePattern.compile(regex); } catch (Exception e) { throw new ExecutionPlanCreationException("Cannot parse given regex " + regex, e); } if (!(attributeExpressionExecutors[1] instanceof VariableExpressionExecutor)) { throw new ExecutionPlanCreationException( "Second parameter should be a variable." + ".\nUsage: #nlp.findTokensRegexPattern(regex:string, text:string-variable)"); } if (logger.isDebugEnabled()) { logger.debug( String.format( "Query parameters initialized. Regex: %s Stream Parameters: %s", regex, abstractDefinition.getAttributeList())); } initPipeline(); ArrayList<Attribute> attributes = new ArrayList<Attribute>(1); attributes.add(new Attribute("match", Attribute.Type.STRING)); attributeCount = regexPattern.getTotalGroups(); for (int i = 1; i < attributeCount; i++) { attributes.add(new Attribute(groupPrefix + i, Attribute.Type.STRING)); } return attributes; }