Ejemplo n.º 1
0
 private MultiPatternMatcher<CoreMap> createPatternMatcher(
     Map<SequencePattern<CoreMap>, Entry> patternToEntry) {
   // Convert to tokensregex pattern
   int patternFlags = ignoreCase ? Pattern.CASE_INSENSITIVE : 0;
   int stringMatchFlags = ignoreCase ? NodePattern.CASE_INSENSITIVE : 0;
   Env env = TokenSequencePattern.getNewEnv();
   env.setDefaultStringPatternFlags(patternFlags);
   env.setDefaultStringMatchFlags(stringMatchFlags);
   NodePattern<String> posTagPattern =
       (validPosPattern != null && PosMatchType.MATCH_ALL_TOKENS.equals(posMatchType))
           ? new CoreMapNodePattern.StringAnnotationRegexPattern(validPosPattern)
           : null;
   List<TokenSequencePattern> patterns = new ArrayList<>(entries.size());
   for (Entry entry : entries) {
     TokenSequencePattern pattern;
     if (entry.tokensRegex != null) {
       // TODO: posTagPatterns...
       pattern = TokenSequencePattern.compile(env, entry.tokensRegex);
     } else {
       List<SequencePattern.PatternExpr> nodePatterns = new ArrayList<>();
       for (String p : entry.regex) {
         CoreMapNodePattern c = CoreMapNodePattern.valueOf(p, patternFlags);
         if (posTagPattern != null) {
           c.add(CoreAnnotations.PartOfSpeechAnnotation.class, posTagPattern);
         }
         nodePatterns.add(new SequencePattern.NodePatternExpr(c));
       }
       pattern =
           TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(nodePatterns));
     }
     if (entry.annotateGroup < 0 || entry.annotateGroup > pattern.getTotalGroups()) {
       throw new RuntimeException("Invalid match group for entry " + entry);
     }
     pattern.setPriority(entry.priority);
     patterns.add(pattern);
     patternToEntry.put(pattern, entry);
   }
   return TokenSequencePattern.getMultiPatternMatcher(patterns);
 }
  @Override
  protected List<Attribute> init(
      AbstractDefinition abstractDefinition,
      ExpressionExecutor[] attributeExpressionExecutors,
      ExecutionPlanContext executionPlanContext) {
    if (logger.isDebugEnabled()) {
      logger.debug("Initializing Query ...");
    }

    if (attributeExpressionLength < 2) {
      throw new ExecutionPlanCreationException(
          "Query expects at least two parameters. Received only "
              + attributeExpressionLength
              + ".\nUsage: #nlp.findTokensRegexPattern(regex:string, text:string-variable)");
    }

    String regex;
    try {
      if (attributeExpressionExecutors[0] instanceof ConstantExpressionExecutor) {
        regex = (String) attributeExpressionExecutors[0].execute(null);
      } else {
        throw new ExecutionPlanCreationException(
            "First parameter should be a constant."
                + ".\nUsage: #nlp.findTokensRegexPattern(regex:string, text:string-variable)");
      }
    } catch (ClassCastException e) {
      throw new ExecutionPlanCreationException(
          "First parameter should be of type string. Found "
              + attributeExpressionExecutors[0].getReturnType()
              + ".\nUsage: #nlp.findTokensRegexPattern(regex:string, text:string-variable)");
    }

    try {
      regexPattern = TokenSequencePattern.compile(regex);
    } catch (Exception e) {
      throw new ExecutionPlanCreationException("Cannot parse given regex " + regex, e);
    }

    if (!(attributeExpressionExecutors[1] instanceof VariableExpressionExecutor)) {
      throw new ExecutionPlanCreationException(
          "Second parameter should be a variable."
              + ".\nUsage: #nlp.findTokensRegexPattern(regex:string, text:string-variable)");
    }

    if (logger.isDebugEnabled()) {
      logger.debug(
          String.format(
              "Query parameters initialized. Regex: %s Stream Parameters: %s",
              regex, abstractDefinition.getAttributeList()));
    }

    initPipeline();

    ArrayList<Attribute> attributes = new ArrayList<Attribute>(1);

    attributes.add(new Attribute("match", Attribute.Type.STRING));
    attributeCount = regexPattern.getTotalGroups();
    for (int i = 1; i < attributeCount; i++) {
      attributes.add(new Attribute(groupPrefix + i, Attribute.Type.STRING));
    }
    return attributes;
  }