Ejemplos de CoreLabel.value en Java

Lenguaje de programación: Java

Namespace/Package Name: edu.stanford.nlp.ling

Clase / Tipo: CoreLabel

Método / Función: value

Ejemplos en hotexamples.com: 5

Java CoreLabel.value - 5 ejemplos encontrados. Estos son los ejemplos en Java del mundo real mejor valorados de edu.stanford.nlp.ling.CoreLabel.value extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

get(30)

word(18)

set(13)

tag(6)

value(5)

factory(4)

setValue(4)

setIndex(4)

containsKey(4)

setTag(3)

beginPosition(3)

lemma(3)

originalText(3)

index(3)

endPosition(3)

ner(2)

setLemma(2)

setNER(2)

setWord(2)

getString(2)

toShorterString(2)

toString(2)

remove(1)

setSentIndex(1)

setDocID(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: SpanishXMLTreeReader.java Proyecto: automenta/corenlp

  /**
   * Determine if the given tree contains a leaf which matches the part-of-speech and lexical
   * criteria.
   *
   * @param pos Regular expression to match part of speech (may be null, in which case any POS is
   *     allowed)
   * @param pos Regular expression to match word (may be null, in which case any word is allowed)
   */
  public static boolean shouldPrintTree(Tree tree, Pattern pos, Pattern word) {
    for (Tree t : tree) {
      if (t.isPreTerminal()) {
        CoreLabel label = (CoreLabel) t.label();
        String tpos = label.value();

        Tree wordNode = t.firstChild();
        CoreLabel wordLabel = (CoreLabel) wordNode.label();
        String tword = wordLabel.value();

        if ((pos == null || pos.matcher(tpos).find())
            && (word == null || word.matcher(tword).find())) return true;
      }
    }
    return false;
  }

Ejemplo n.º 2

Mostrar archivo

Archivo: LeafAncestorEval.java Proyecto: automenta/corenlp

  private static String toString(final List<CoreLabel> lineage) {
    StringBuilder sb = new StringBuilder();
    for (CoreLabel cl : lineage) {
      sb.append(cl.value());
      sb.append(" <-- ");
    }

    return sb.toString();
  }

Ejemplo n.º 3

Mostrar archivo

Archivo: LeafAncestorEval.java Proyecto: automenta/corenlp

  /**
   * Depth-first (post-order) search through the tree, recording the stack state as the lineage
   * every time a terminal is reached.
   *
   * <p>This implementation uses the Index annotation to store depth. If CoreLabels are not present
   * in the trees (or at least something that implements HasIndex), an exception will result.
   *
   * @param t The tree
   * @return A list of lineages
   */
  private static List<List<CoreLabel>> makeLineages(final Tree t) {
    if (t == null) return null;

    ((HasIndex) t.label()).setIndex(0);

    final Stack<Tree> treeStack = new Stack<>();
    treeStack.push(t);

    final Stack<CoreLabel> labelStack = new Stack<>();
    CoreLabel rootLabel = new CoreLabel(t.label());
    rootLabel.setIndex(0);
    labelStack.push(rootLabel);

    final List<List<CoreLabel>> lineages = new ArrayList<>();

    while (!treeStack.isEmpty()) {
      Tree node = treeStack.pop();
      int nodeDepth = ((HasIndex) node.label()).index();
      while (!labelStack.isEmpty() && labelStack.peek().index() != nodeDepth - 1) labelStack.pop();

      if (node.isPreTerminal()) {
        List<CoreLabel> lin = new ArrayList<>(labelStack);
        lineages.add(lin);

      } else {
        for (Tree kid : node.children()) {
          ((HasIndex) kid.label()).setIndex(nodeDepth + 1);
          treeStack.push(kid);
        }
        CoreLabel nodeLabel = new CoreLabel(node.label());
        nodeLabel.setIndex(nodeDepth);
        labelStack.add(nodeLabel);
      }
    }

    if (DEBUG) {
      System.out.println("Lineages:");
      for (List<CoreLabel> lin : lineages) {
        for (CoreLabel cl : lin) System.out.print(cl.value() + " <- ");
        System.out.println();
      }
    }

    return lineages;
  }

Ejemplo n.º 4

Mostrar archivo

Archivo: StanfordTokenize.java Proyecto: kperi/varaha

  public DataBag exec(Tuple input) throws IOException {
    if (input == null || input.size() < 1 || input.isNull(0)) return null;

    // Output bag
    DataBag bagOfTokens = bagFactory.newDefaultBag();

    StringReader textInput = new StringReader(input.get(0).toString());
    PTBTokenizer ptbt = new PTBTokenizer(textInput, new CoreLabelTokenFactory(), "");

    for (CoreLabel label; ptbt.hasNext(); ) {
      label = (CoreLabel) ptbt.next();
      if (label.value().length() > 2) {
        System.err.println(label.toString());
        Tuple termText = tupleFactory.newTuple(label.word());
        bagOfTokens.add(termText);
      }
    }
    return bagOfTokens;
  }

Ejemplo n.º 5

Mostrar archivo

Archivo: ChineseTreebankParserParams.java Proyecto: PartYoga/stanford-word-segmenter-server

  /**
   * transformTree does all language-specific tree transformations. Any parameterizations should be
   * inside the specific TreebankLangParserParams class.
   */
  @Override
  public Tree transformTree(Tree t, Tree root) {
    if (t == null || t.isLeaf()) {
      return t;
    }

    String parentStr;
    String grandParentStr;
    Tree parent;
    Tree grandParent;
    if (root == null || t.equals(root)) {
      parent = null;
      parentStr = "";
    } else {
      parent = t.parent(root);
      parentStr = parent.label().value();
    }
    if (parent == null || parent.equals(root)) {
      grandParent = null;
      grandParentStr = "";
    } else {
      grandParent = parent.parent(root);
      grandParentStr = grandParent.label().value();
    }

    String baseParentStr = ctlp.basicCategory(parentStr);
    String baseGrandParentStr = ctlp.basicCategory(grandParentStr);

    CoreLabel lab = (CoreLabel) t.label();
    String word = lab.word();
    String tag = lab.tag();
    String baseTag = ctlp.basicCategory(tag);
    String category = lab.value();
    String baseCategory = ctlp.basicCategory(category);

    if (t.isPreTerminal()) { // it's a POS tag
      List<String> leftAunts =
          listBasicCategories(SisterAnnotationStats.leftSisterLabels(parent, grandParent));
      List<String> rightAunts =
          listBasicCategories(SisterAnnotationStats.rightSisterLabels(parent, grandParent));

      // Chinese-specific punctuation splits
      if (chineseSplitPunct && baseTag.equals("PU")) {
        if (ChineseTreebankLanguagePack.chineseDouHaoAcceptFilter().accept(word)) {
          tag = tag + "-DOU";
          // System.out.println("Punct: Split dou hao"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseCommaAcceptFilter().accept(word)) {
          tag = tag + "-COMMA";
          // System.out.println("Punct: Split comma"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseColonAcceptFilter().accept(word)) {
          tag = tag + "-COLON";
          // System.out.println("Punct: Split colon"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseQuoteMarkAcceptFilter().accept(word)) {
          if (chineseSplitPunctLR) {
            if (ChineseTreebankLanguagePack.chineseLeftQuoteMarkAcceptFilter().accept(word)) {
              tag += "-LQUOTE";
            } else {
              tag += "-RQUOTE";
            }
          } else {
            tag = tag + "-QUOTE";
          }
          // System.out.println("Punct: Split quote"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseEndSentenceAcceptFilter().accept(word)) {
          tag = tag + "-ENDSENT";
          // System.out.println("Punct: Split end sent"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseParenthesisAcceptFilter().accept(word)) {
          if (chineseSplitPunctLR) {
            if (ChineseTreebankLanguagePack.chineseLeftParenthesisAcceptFilter().accept(word)) {
              tag += "-LPAREN";
            } else {
              tag += "-RPAREN";
            }
          } else {
            tag += "-PAREN";
            // printlnErr("Just used -PAREN annotation");
            // printlnErr(word);
            // throw new RuntimeException();
          }
          // System.out.println("Punct: Split paren"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseDashAcceptFilter().accept(word)) {
          tag = tag + "-DASH";
          // System.out.println("Punct: Split dash"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseOtherAcceptFilter().accept(word)) {
          tag = tag + "-OTHER";
        } else {
          printlnErr("Unknown punct (you should add it to CTLP): " + tag + " |" + word + "|");
        }
      } else if (chineseSplitDouHao) { // only split DouHao
        if (ChineseTreebankLanguagePack.chineseDouHaoAcceptFilter().accept(word)
            && baseTag.equals("PU")) {
          tag = tag + "-DOU";
        }
      }

      // Chinese-specific POS tag splits (non-punctuation)

      if (tagWordSize) {
        int l = word.length();
        tag += "-" + l + "CHARS";
      }

      if (mergeNNVV && baseTag.equals("NN")) {
        tag = "VV";
      }

      if ((chineseSelectiveTagPA || chineseVerySelectiveTagPA)
          && (baseTag.equals("CC") || baseTag.equals("P"))) {
        tag += "-" + baseParentStr;
      }
      if (chineseSelectiveTagPA && (baseTag.equals("VV"))) {
        tag += "-" + baseParentStr;
      }

      if (markMultiNtag && tag.startsWith("N")) {
        for (int i = 0; i < parent.numChildren(); i++) {
          if (parent.children()[i].label().value().startsWith("N") && parent.children()[i] != t) {
            tag += "=N";
            // System.out.println("Found multi=N rewrite");
          }
        }
      }

      if (markVVsisterIP && baseTag.equals("VV")) {
        boolean seenIP = false;
        for (int i = 0; i < parent.numChildren(); i++) {
          if (parent.children()[i].label().value().startsWith("IP")) {
            seenIP = true;
          }
        }
        if (seenIP) {
          tag += "-IP";
          // System.out.println("Found VV with IP sister"); // testing
        }
      }

      if (markPsisterIP && baseTag.equals("P")) {
        boolean seenIP = false;
        for (int i = 0; i < parent.numChildren(); i++) {
          if (parent.children()[i].label().value().startsWith("IP")) {
            seenIP = true;
          }
        }
        if (seenIP) {
          tag += "-IP";
        }
      }

      if (markADgrandchildOfIP && baseTag.equals("AD") && baseGrandParentStr.equals("IP")) {
        tag += "~IP";
        // System.out.println("Found AD with IP grandparent"); // testing
      }

      if (gpaAD && baseTag.equals("AD")) {
        tag += "~" + baseGrandParentStr;
        // System.out.println("Found AD with grandparent " + grandParentStr); // testing
      }

      if (markPostverbalP && leftAunts.contains("VV") && baseTag.equals("P")) {
        // System.out.println("Found post-verbal P");
        tag += "^=lVV";
      }

      // end Chinese-specific tag splits

      Label label = new CategoryWordTag(tag, word, tag);
      t.setLabel(label);
    } else {
      // it's a phrasal category
      Tree[] kids = t.children();

      // Chinese-specific category splits
      List<String> leftSis = listBasicCategories(SisterAnnotationStats.leftSisterLabels(t, parent));
      List<String> rightSis =
          listBasicCategories(SisterAnnotationStats.rightSisterLabels(t, parent));

      if (paRootDtr && baseParentStr.equals("ROOT")) {
        category += "^ROOT";
      }

      if (markIPsisterBA && baseCategory.equals("IP")) {
        if (leftSis.contains("BA")) {
          category += "=BA";
          // System.out.println("Found IP sister of BA");
        }
      }

      if (dominatesV && hasV(t.preTerminalYield())) {
        // mark categories containing a verb
        category += "-v";
      }

      if (markIPsisterVVorP && baseCategory.equals("IP")) {
        // todo: cdm: is just looking for "P" here selective enough??
        if (leftSis.contains("VV") || leftSis.contains("P")) {
          category += "=VVP";
        }
      }

      if (markIPsisDEC && baseCategory.equals("IP")) {
        if (rightSis.contains("DEC")) {
          category += "=DEC";
          // System.out.println("Found prenominal IP");
        }
      }

      if (baseCategory.equals("VP")) {
        // cdm 2008: this used to just check that it startsWith("VP"), but
        // I think that was bad because it also matched VPT verb compounds
        if (chineseSplitVP == 3) {
          boolean hasCC = false;
          boolean hasPU = false;
          boolean hasLexV = false;
          for (Tree kid : kids) {
            if (kid.label().value().startsWith("CC")) {
              hasCC = true;
            } else if (kid.label().value().startsWith("PU")) {
              hasPU = true;
            } else if (StringUtils.lookingAt(
                kid.label().value(), "(V[ACEV]|VCD|VCP|VNV|VPT|VRD|VSB)")) {
              hasLexV = true;
            }
          }
          if (hasCC || (hasPU && !hasLexV)) {
            category += "-CRD";
            // System.out.println("Found coordinate VP"); // testing
          } else if (hasLexV) {
            category += "-COMP";
            // System.out.println("Found complementing VP"); // testing
          } else {
            category += "-ADJT";
            // System.out.println("Found adjoining VP"); // testing
          }
        } else if (chineseSplitVP >= 1) {
          boolean hasBA = false;
          for (Tree kid : kids) {
            if (kid.label().value().startsWith("BA")) {
              hasBA = true;
            } else if (chineseSplitVP == 2 && tlp.basicCategory(kid.label().value()).equals("VP")) {
              for (Tree kidkid : kid.children()) {
                if (kidkid.label().value().startsWith("BA")) {
                  hasBA = true;
                }
              }
            }
          }
          if (hasBA) {
            category += "-BA";
          }
        }
      }

      if (markVPadjunct && baseParentStr.equals("VP")) {
        // cdm 2008: This used to use startsWith("VP") but changed to baseCat
        Tree[] sisters = parent.children();
        boolean hasVPsister = false;
        boolean hasCC = false;
        boolean hasPU = false;
        boolean hasLexV = false;
        for (Tree sister : sisters) {
          if (tlp.basicCategory(sister.label().value()).equals("VP")) {
            hasVPsister = true;
          }
          if (sister.label().value().startsWith("CC")) {
            hasCC = true;
          }
          if (sister.label().value().startsWith("PU")) {
            hasPU = true;
          }
          if (StringUtils.lookingAt(sister.label().value(), "(V[ACEV]|VCD|VCP|VNV|VPT|VRD|VSB)")) {
            hasLexV = true;
          }
        }
        if (hasVPsister && !(hasCC || hasPU || hasLexV)) {
          category += "-VPADJ";
          // System.out.println("Found adjunct of VP"); // testing
        }
      }

      if (markNPmodNP && baseCategory.equals("NP") && baseParentStr.equals("NP")) {
        if (rightSis.contains("NP")) {
          category += "=MODIFIERNP";
          // System.out.println("Found NP modifier of NP"); // testing
        }
      }

      if (markModifiedNP && baseCategory.equals("NP") && baseParentStr.equals("NP")) {
        if (rightSis.isEmpty()
            && (leftSis.contains("ADJP")
                || leftSis.contains("NP")
                || leftSis.contains("DNP")
                || leftSis.contains("QP")
                || leftSis.contains("CP")
                || leftSis.contains("PP"))) {
          category += "=MODIFIEDNP";
          // System.out.println("Found modified NP"); // testing
        }
      }

      if (markNPconj && baseCategory.equals("NP") && baseParentStr.equals("NP")) {
        if (rightSis.contains("CC")
            || rightSis.contains("PU")
            || leftSis.contains("CC")
            || leftSis.contains("PU")) {
          category += "=CONJ";
          // System.out.println("Found NP conjunct"); // testing
        }
      }

      if (markIPconj && baseCategory.equals("IP") && baseParentStr.equals("IP")) {
        Tree[] sisters = parent.children();
        boolean hasCommaSis = false;
        boolean hasIPSis = false;
        for (Tree sister : sisters) {
          if (ctlp.basicCategory(sister.label().value()).equals("PU")
              && ChineseTreebankLanguagePack.chineseCommaAcceptFilter()
                  .accept(sister.children()[0].label().toString())) {
            hasCommaSis = true;
            // System.out.println("Found CommaSis"); // testing
          }
          if (ctlp.basicCategory(sister.label().value()).equals("IP") && sister != t) {
            hasIPSis = true;
          }
        }
        if (hasCommaSis && hasIPSis) {
          category += "-CONJ";
          // System.out.println("Found IP conjunct"); // testing
        }
      }

      if (unaryIP && baseCategory.equals("IP") && t.numChildren() == 1) {
        category += "-U";
        // System.out.println("Found unary IP"); //testing
      }
      if (unaryCP && baseCategory.equals("CP") && t.numChildren() == 1) {
        category += "-U";
        // System.out.println("Found unary CP"); //testing
      }

      if (splitBaseNP && baseCategory.equals("NP")) {
        if (t.isPrePreTerminal()) {
          category = category + "-B";
        }
      }

      // if (Test.verbose) printlnErr(baseCategory + " " + leftSis.toString()); //debugging

      if (markPostverbalPP && leftSis.contains("VV") && baseCategory.equals("PP")) {
        // System.out.println("Found post-verbal PP");
        category += "=lVV";
      }

      if ((markADgrandchildOfIP || gpaAD)
          && listBasicCategories(SisterAnnotationStats.kidLabels(t)).contains("AD")) {
        category += "^ADVP";
      }

      if (markCC) {
        // was: for (int i = 0; i < kids.length; i++) {
        // This second version takes an idea from Collins: don't count
        // marginal conjunctions which don't conjoin 2 things.
        for (int i = 1; i < kids.length - 1; i++) {
          String cat2 = kids[i].label().value();
          if (cat2.startsWith("CC")) {
            category += "-CC";
          }
        }
      }

      Label label = new CategoryWordTag(category, word, tag);
      t.setLabel(label);
    }
    return t;
  }