Java TokenSequencePattern.compile示例

编程语言: Java

命名空间/包名称: edu.stanford.nlp.ling.tokensregex

方法/功能: compile

hotexamples.com的示例: 4

Java TokenSequencePattern.compile - 已找到4个示例。这些是从开源项目中提取的最受好评的edu.stanford.nlp.ling.tokensregex.TokenSequencePattern.compile现实Java示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

compile(4)

getMatcher(2)

getTotalGroups(2)

pattern(2)

getMultiPatternMatcher(1)

getNewEnv(1)

matcher(1)

setPriority(1)

示例#1

显示文件

文件： TokensRegexNERAnnotator.java 项目： automenta/corenlp

 private MultiPatternMatcher<CoreMap> createPatternMatcher(
     Map<SequencePattern<CoreMap>, Entry> patternToEntry) {
   // Convert to tokensregex pattern
   int patternFlags = ignoreCase ? Pattern.CASE_INSENSITIVE : 0;
   int stringMatchFlags = ignoreCase ? NodePattern.CASE_INSENSITIVE : 0;
   Env env = TokenSequencePattern.getNewEnv();
   env.setDefaultStringPatternFlags(patternFlags);
   env.setDefaultStringMatchFlags(stringMatchFlags);
   NodePattern<String> posTagPattern =
       (validPosPattern != null && PosMatchType.MATCH_ALL_TOKENS.equals(posMatchType))
           ? new CoreMapNodePattern.StringAnnotationRegexPattern(validPosPattern)
           : null;
   List<TokenSequencePattern> patterns = new ArrayList<>(entries.size());
   for (Entry entry : entries) {
     TokenSequencePattern pattern;
     if (entry.tokensRegex != null) {
       // TODO: posTagPatterns...
       pattern = TokenSequencePattern.compile(env, entry.tokensRegex);
     } else {
       List<SequencePattern.PatternExpr> nodePatterns = new ArrayList<>();
       for (String p : entry.regex) {
         CoreMapNodePattern c = CoreMapNodePattern.valueOf(p, patternFlags);
         if (posTagPattern != null) {
           c.add(CoreAnnotations.PartOfSpeechAnnotation.class, posTagPattern);
         }
         nodePatterns.add(new SequencePattern.NodePatternExpr(c));
       }
       pattern =
           TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(nodePatterns));
     }
     if (entry.annotateGroup < 0 || entry.annotateGroup > pattern.getTotalGroups()) {
       throw new RuntimeException("Invalid match group for entry " + entry);
     }
     pattern.setPriority(entry.priority);
     patterns.add(pattern);
     patternToEntry.put(pattern, entry);
   }
   return TokenSequencePattern.getMultiPatternMatcher(patterns);
 }

示例#2

显示文件

文件： WordsToSentencesAnnotator.java 项目： StonyBrookNLP/stingysentiment

 public WordsToSentencesAnnotator(
     boolean verbose,
     String boundaryTokenRegex,
     Set<String> boundaryToDiscard,
     Set<String> htmlElementsToDiscard,
     String newlineIsSentenceBreak,
     String boundaryMultiTokenRegex,
     Set<String> tokenRegexesToDiscard) {
   this(
       verbose,
       false,
       new WordToSentenceProcessor<CoreLabel>(
           boundaryTokenRegex,
           boundaryToDiscard,
           htmlElementsToDiscard,
           WordToSentenceProcessor.stringToNewlineIsSentenceBreak(newlineIsSentenceBreak),
           (boundaryMultiTokenRegex != null)
               ? TokenSequencePattern.compile(boundaryMultiTokenRegex)
               : null,
           tokenRegexesToDiscard));
 }

示例#3

显示文件

文件： StanfordCoreNLPServer.java 项目： BeSky/CoreNLP

    @Override
    public void handle(HttpExchange httpExchange) throws IOException {
      // Set common response headers
      httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");

      Future<String> json =
          corenlpExecutor.submit(
              () -> {
                try {
                  // Get the document
                  Properties props =
                      new Properties() {
                        {
                          setProperty("annotators", "tokenize,ssplit,pos,lemma,ner");
                        }
                      };
                  Annotation doc = getDocument(props, httpExchange);
                  if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
                    StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
                    pipeline.annotate(doc);
                  }

                  // Construct the matcher
                  Map<String, String> params = getURLParams(httpExchange.getRequestURI());
                  // (get the pattern)
                  if (!params.containsKey("pattern")) {
                    respondError("Missing required parameter 'pattern'", httpExchange);
                    return "";
                  }
                  String pattern = params.get("pattern");
                  // (get whether to filter / find)
                  String filterStr = params.getOrDefault("filter", "false");
                  final boolean filter =
                      filterStr.trim().isEmpty()
                          || "true".equalsIgnoreCase(filterStr.toLowerCase());
                  // (create the matcher)
                  final TokenSequencePattern regex = TokenSequencePattern.compile(pattern);

                  // Run TokensRegex
                  return JSONOutputter.JSONWriter.objectToJSON(
                      (docWriter) -> {
                        if (filter) {
                          // Case: just filter sentences
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          regex
                                              .matcher(
                                                  sentence.get(
                                                      CoreAnnotations.TokensAnnotation.class))
                                              .matches())
                                  .collect(Collectors.toList()));
                        } else {
                          // Case: find matches
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          (Consumer<JSONOutputter.Writer>)
                                              (JSONOutputter.Writer sentWriter) -> {
                                                List<CoreLabel> tokens =
                                                    sentence.get(
                                                        CoreAnnotations.TokensAnnotation.class);
                                                TokenSequenceMatcher matcher =
                                                    regex.matcher(tokens);
                                                int i = 0;
                                                while (matcher.find()) {
                                                  sentWriter.set(
                                                      Integer.toString(i),
                                                      (Consumer<JSONOutputter.Writer>)
                                                          (JSONOutputter.Writer matchWriter) -> {
                                                            matchWriter.set(
                                                                "text", matcher.group());
                                                            matchWriter.set(
                                                                "begin", matcher.start());
                                                            matchWriter.set("end", matcher.end());
                                                            for (int groupI = 0;
                                                                groupI < matcher.groupCount();
                                                                ++groupI) {
                                                              SequenceMatchResult.MatchedGroupInfo<
                                                                      CoreMap>
                                                                  info =
                                                                      matcher.groupInfo(groupI + 1);
                                                              matchWriter.set(
                                                                  info.varName == null
                                                                      ? Integer.toString(groupI + 1)
                                                                      : info.varName,
                                                                  (Consumer<JSONOutputter.Writer>)
                                                                      groupWriter -> {
                                                                        groupWriter.set(
                                                                            "text", info.text);
                                                                        if (info.nodes.size() > 0) {
                                                                          groupWriter.set(
                                                                              "begin",
                                                                              info.nodes
                                                                                      .get(0)
                                                                                      .get(
                                                                                          CoreAnnotations
                                                                                              .IndexAnnotation
                                                                                              .class)
                                                                                  - 1);
                                                                          groupWriter.set(
                                                                              "end",
                                                                              info.nodes
                                                                                  .get(
                                                                                      info.nodes
                                                                                              .size()
                                                                                          - 1)
                                                                                  .get(
                                                                                      CoreAnnotations
                                                                                          .IndexAnnotation
                                                                                          .class));
                                                                        }
                                                                      });
                                                            }
                                                          });
                                                  i += 1;
                                                }
                                                sentWriter.set("length", i);
                                              }));
                        }
                      });
                } catch (Exception e) {
                  e.printStackTrace();
                  try {
                    respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
                  } catch (IOException ignored) {
                  }
                }
                return "";
              });

      // Send response
      byte[] response = new byte[0];
      try {
        response = json.get(5, TimeUnit.SECONDS).getBytes();
      } catch (InterruptedException | ExecutionException | TimeoutException e) {
        respondError("Timeout when executing TokensRegex query", httpExchange);
      }
      if (response.length > 0) {
        httpExchange.getResponseHeaders().add("Content-Type", "text/json");
        httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length));
        httpExchange.sendResponseHeaders(HTTP_OK, response.length);
        httpExchange.getResponseBody().write(response);
        httpExchange.close();
      }
    }

示例#4

显示文件

文件： TokensRegexPatternStreamProcessor.java 项目： thiliA/siddhi-1

  @Override
  protected List<Attribute> init(
      AbstractDefinition abstractDefinition,
      ExpressionExecutor[] attributeExpressionExecutors,
      ExecutionPlanContext executionPlanContext) {
    if (logger.isDebugEnabled()) {
      logger.debug("Initializing Query ...");
    }

    if (attributeExpressionLength < 2) {
      throw new ExecutionPlanCreationException(
          "Query expects at least two parameters. Received only "
              + attributeExpressionLength
              + ".\nUsage: #nlp.findTokensRegexPattern(regex:string, text:string-variable)");
    }

    String regex;
    try {
      if (attributeExpressionExecutors[0] instanceof ConstantExpressionExecutor) {
        regex = (String) attributeExpressionExecutors[0].execute(null);
      } else {
        throw new ExecutionPlanCreationException(
            "First parameter should be a constant."
                + ".\nUsage: #nlp.findTokensRegexPattern(regex:string, text:string-variable)");
      }
    } catch (ClassCastException e) {
      throw new ExecutionPlanCreationException(
          "First parameter should be of type string. Found "
              + attributeExpressionExecutors[0].getReturnType()
              + ".\nUsage: #nlp.findTokensRegexPattern(regex:string, text:string-variable)");
    }

    try {
      regexPattern = TokenSequencePattern.compile(regex);
    } catch (Exception e) {
      throw new ExecutionPlanCreationException("Cannot parse given regex " + regex, e);
    }

    if (!(attributeExpressionExecutors[1] instanceof VariableExpressionExecutor)) {
      throw new ExecutionPlanCreationException(
          "Second parameter should be a variable."
              + ".\nUsage: #nlp.findTokensRegexPattern(regex:string, text:string-variable)");
    }

    if (logger.isDebugEnabled()) {
      logger.debug(
          String.format(
              "Query parameters initialized. Regex: %s Stream Parameters: %s",
              regex, abstractDefinition.getAttributeList()));
    }

    initPipeline();

    ArrayList<Attribute> attributes = new ArrayList<Attribute>(1);

    attributes.add(new Attribute("match", Attribute.Type.STRING));
    attributeCount = regexPattern.getTotalGroups();
    for (int i = 1; i < attributeCount; i++) {
      attributes.add(new Attribute(groupPrefix + i, Attribute.Type.STRING));
    }
    return attributes;
  }