Java PosTagger 예제들

프로그래밍 언어: Java

네임스페이스/패키지 이름: pltag.util

클래스/타입: PosTagger

hotexamples.com에서의 예제들: 2

Java PosTagger - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Java의 pltag.util.PosTagger에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

posTagLine(1)

posTagLineToArray(1)

tokensToLinePosTagged(1)

예제 #1

파일 보기

파일: Example.java 프로젝트: sinantie/PLTAG

  public Example(
      String name,
      String[] input,
      Lexicon lexicon,
      Map<ElementaryStringTree, ArrayList<Fringe>> shadowTreesMap,
      SuperTagger superTagger,
      FreqCounter freqCounter,
      Options opts) {
    this.opts = opts;
    this.name = name;
    this.lexicon = lexicon;
    this.shadowTreesMap = shadowTreesMap;
    this.superTagger = superTagger;
    this.freqCounter = freqCounter;

    if (input[0].equals(
        "NOT PARSED")) // we could not extract the lexicon or missing entirely from gold standard
                       // dataset
    {
      sentence = posTagged = parsed = goldStandardNoTraces = solution = "";
      numOfWords = 0;
      notParsed = true;
    } else {
      if (opts.estimateProcDifficulty) {
        if (opts.inputType == Options.InputType.dundee) {
          readDundeeInput(input[0]);
        } else if (opts.inputType == Options.InputType.posTagged
            || (opts.inputType == Options.InputType.pltag
                && input[0].contains("\t"))) // tab delimited POS-word pairs
        {
          readPosTagged(input[0]);
        } else if (opts.inputType == Options.InputType.pltag) {
          readPennTreebank(input, true);
          solution = parsed;
        } else // input is plain text without POS tags
        {
          if (opts.goldPosTags) {
            readPosTagged(PosTagger.posTagLine(input[0]));
          } else {
            StringBuilder str = new StringBuilder();
            //                    for(String word : removeQuotesPlain(input[0]).split(" "))
            for (String word : Utils.tokenizeStanford(input[0]).split(" ")) {
              str.append(String.format("N/A %s\t", word));
            }
            readPosTagged(str.toString().trim());
          }
        }
        solution = "";
      } else {
        readPennTreebank(input, true);
        solution = parsed;
      }
      this.numOfWords = posTagged.split("\t").length;
    }
  }

예제 #2

파일 보기

파일: Example.java 프로젝트: sinantie/PLTAG

 /**
  * Read input from Dundee corpus. The format is: RC_label|w_1 id_1 w_2 id_2 ...
  *
  * @param line
  */
 private void readDundeeInput(String line) {
   int index = line.indexOf("|");
   if (index > -1) sentenceRc = line.substring(0, index);
   line = line.substring(index + 1);
   StringBuilder sent = new StringBuilder();
   boolean usePosTagger = opts.goldPosTags;
   StringBuilder posTagDummy = new StringBuilder();
   if (line.charAt(0) == '(') // some examples may contain already parsed input in tree format
   {
     List<Word> words = Tree.valueOf(treeProcessDundeeIds(line)).yieldWords();
     for (Word word : words) {
       sent.append(word).append(" ");
       if (!usePosTagger) posTagDummy.append("N/A ").append(word).append("\t");
     }
   } else {
     // remove quotes
     line = replaceParenthesesDundee(removeQuotesDundee(line).trim()).trim();
     String[] tokens = line.split(" ");
     wordIds = new String[tokens.length / 2];
     for (int i = 0; i < tokens.length - 1; i += 2) {
       String word = !usePosTagger ? wordRemoveDigits(tokens[i]) : tokens[i];
       sent.append(word).append(" ");
       if (!usePosTagger) posTagDummy.append("N/A ").append(word).append("\t");
       wordIds[i / 2] = tokens[i + 1];
     }
     sentence = sent.toString().trim();
     if (usePosTagger) {
       Pair<String, String>[] posWords = PosTagger.posTagLineToArray(sentence);
       // One or more words has been expanded due to PTB-compliant splitting.
       // For each new constituent assign the word-id of the original word.
       if (posWords.length != wordIds.length) {
         adjustWordIdsDundee(posWords, sentence.split(" "));
       }
       sentence = sentRemoveDigits(sentence);
       readPosTagged(wordRemoveDigits(PosTagger.tokensToLinePosTagged(posWords)));
     } else posTagged = posTagDummy.toString().trim();
   }
 }