コード例 #1
0
ファイル: Trees.java プロジェクト: hercky/undergradCourses
 /**
  * Returns the index of <code>daughter</code> in <code>parent</code> by ==. Returns -1 if <code>
  * daughter</code> not found.
  */
 public static int objectEqualityIndexOf(Tree parent, Tree daughter) {
   for (int i = 0; i < parent.children().length; i++) {
     if (daughter == parent.children()[i]) {
       return i;
     }
   }
   return -1;
 }
コード例 #2
0
ファイル: Trees.java プロジェクト: hercky/undergradCourses
 /**
  * returns the syntactic category of the tree as a list of the syntactic categories of the mother
  * and the daughters
  */
 public static List<String> localTreeAsCatList(Tree t) {
   List<String> l = new ArrayList<String>(t.children().length + 1);
   l.add(t.label().value());
   for (int i = 0; i < t.children().length; i++) {
     l.add(t.children()[i].label().value());
   }
   return l;
 }
コード例 #3
0
ファイル: RNTN.java プロジェクト: jpquiroga/java-deeplearning
 public FloatMatrix getWForNode(Tree node) {
   if (node.children().size() == 2) {
     String leftLabel = node.children().get(0).value();
     String leftBasic = basicCategory(leftLabel);
     String rightLabel = node.children().get(1).value();
     String rightBasic = basicCategory(rightLabel);
     return binaryTransform.get(leftBasic, rightBasic);
   } else if (node.children().size() == 1) {
     throw new AssertionError("No unary transform matrices, only unary classification");
   } else {
     throw new AssertionError("Unexpected tree children size of " + node.children().size());
   }
 }
コード例 #4
0
ファイル: FactoredParser.java プロジェクト: renaud/maven_repo
 protected Rule ltToRule(Tree lt) {
   if (lt.children().length == 1) {
     UnaryRule ur = new UnaryRule();
     ur.parent = stateNumberer.number(lt.label().value());
     ur.child = stateNumberer.number(lt.children()[0].label().value());
     return ur;
   } else {
     BinaryRule br = new BinaryRule();
     br.parent = stateNumberer.number(lt.label().value());
     br.leftChild = stateNumberer.number(lt.children()[0].label().value());
     br.rightChild = stateNumberer.number(lt.children()[1].label().value());
     return br;
   }
 }
コード例 #5
0
ファイル: Trees.java プロジェクト: hercky/undergradCourses
 private static void leafLabels(Tree t, List<Label> l) {
   if (t.isLeaf()) {
     l.add(t.label());
   } else {
     Tree[] kids = t.children();
     for (int j = 0, n = kids.length; j < n; j++) {
       leafLabels(kids[j], l);
     }
   }
 }
コード例 #6
0
ファイル: Trees.java プロジェクト: hercky/undergradCourses
 private static void preTerminals(Tree t, List<Tree> l) {
   if (t.isPreTerminal()) {
     l.add(t);
   } else {
     Tree[] kids = t.children();
     for (int j = 0, n = kids.length; j < n; j++) {
       preTerminals(kids[j], l);
     }
   }
 }
コード例 #7
0
ファイル: Trees.java プロジェクト: ricojansen/degraphmalizer
	public static <A> void printTree(Tree<A> tree, PrintStream ps)
	{
		ps.printf("(%s (", tree.value());
		for (Tree<A> t : tree.children())
		{
			printTree(t, ps);
			ps.print(",");
		}
		ps.print("))");
	}
コード例 #8
0
 /**
  * Add -TMP when not present within an NP
  *
  * @param tree The tree to add temporal info to.
  */
 private void addTMP9(final Tree tree) {
   // do the head chain under it
   Tree ht = headFinder.determineHead(tree);
   // special fix for possessives! -- make noun before head
   if (ht.value().equals("POS")) {
     int j = tree.objectIndexOf(ht);
     if (j > 0) {
       ht = tree.getChild(j - 1);
     }
   }
   // Note: this next bit changes the tree label, rather
   // than creating a new tree node.  Beware!
   if (ht.isPreTerminal()
       || ht.value().startsWith("NP")
       || ht.value().startsWith("PP")
       || ht.value().startsWith("ADVP")) {
     if (!TmpPattern.matcher(ht.value()).matches()) {
       LabelFactory lf = ht.labelFactory();
       // System.err.println("TMP: Changing " + ht.value() + " to " +
       //                   ht.value() + "-TMP");
       ht.setLabel(lf.newLabel(ht.value() + "-TMP"));
     }
     if (ht.value().startsWith("NP")
         || ht.value().startsWith("PP")
         || ht.value().startsWith("ADVP")) {
       addTMP9(ht);
     }
   }
   // do the NPs under it (which may or may not be the head chain
   Tree[] kidlets = tree.children();
   for (int k = 0; k < kidlets.length; k++) {
     ht = kidlets[k];
     LabelFactory lf;
     if (tree.isPrePreTerminal() && !TmpPattern.matcher(ht.value()).matches()) {
       // System.err.println("TMP: Changing " + ht.value() + " to " +
       //                   ht.value() + "-TMP");
       lf = ht.labelFactory();
       // Note: this next bit changes the tree label, rather
       // than creating a new tree node.  Beware!
       ht.setLabel(lf.newLabel(ht.value() + "-TMP"));
     } else if (ht.value().startsWith("NP")) {
       // don't add -TMP twice!
       if (!TmpPattern.matcher(ht.value()).matches()) {
         lf = ht.labelFactory();
         // System.err.println("TMP: Changing " + ht.value() + " to " +
         //                   ht.value() + "-TMP");
         // Note: this next bit changes the tree label, rather
         // than creating a new tree node.  Beware!
         ht.setLabel(lf.newLabel(ht.value() + "-TMP"));
       }
       addTMP9(ht);
     }
   }
 }
コード例 #9
0
 private static boolean includesEmptyNPSubj(Tree t) {
   if (t == null) {
     return false;
   }
   Tree[] kids = t.children();
   if (kids == null) {
     return false;
   }
   boolean foundNullSubj = false;
   for (Tree kid : kids) {
     Tree[] kidkids = kid.children();
     if (NPSbjPattern.matcher(kid.value()).matches()) {
       kid.setValue("NP");
       if (kidkids != null && kidkids.length == 1 && kidkids[0].value().equals("-NONE-")) {
         // only set flag, since there are 2 a couple of times (errors)
         foundNullSubj = true;
       }
     }
   }
   return foundNullSubj;
 }
コード例 #10
0
ファイル: Trees.java プロジェクト: hercky/undergradCourses
 private static void taggedLeafLabels(Tree t, List<CoreLabel> l) {
   if (t.isPreTerminal()) {
     CoreLabel fl = (CoreLabel) t.getChild(0).label();
     fl.set(TagLabelAnnotation.class, t.label());
     l.add(fl);
   } else {
     Tree[] kids = t.children();
     for (int j = 0, n = kids.length; j < n; j++) {
       taggedLeafLabels(kids[j], l);
     }
   }
 }
コード例 #11
0
ファイル: RNTN.java プロジェクト: jpquiroga/java-deeplearning
  /**
   * This is the method to call for assigning labels and node vectors to the Tree. After calling
   * this, each of the non-leaf nodes will have the node vector and the predictions of their classes
   * assigned to that subtree's node.
   */
  public void forwardPropagateTree(Tree tree) {
    FloatMatrix nodeVector;
    FloatMatrix classification;

    if (tree.isLeaf()) {
      // We do nothing for the leaves.  The preterminals will
      // calculate the classification for this word/tag.  In fact, the
      // recursion should not have gotten here (unless there are
      // degenerate trees of just one leaf)
      throw new AssertionError("We should not have reached leaves in forwardPropagate");
    } else if (tree.isPreTerminal()) {
      classification = getUnaryClassification(tree.label());
      String word = tree.children().get(0).value();
      FloatMatrix wordVector = getFeatureVector(word);
      if (wordVector == null) {
        wordVector = featureVectors.get(UNKNOWN_FEATURE);
      }

      nodeVector = activationFunction.apply(wordVector);
    } else if (tree.children().size() == 1) {
      throw new AssertionError(
          "Non-preterminal nodes of size 1 should have already been collapsed");
    } else if (tree.children().size() == 2) {
      Tree left = tree.firstChild(), right = tree.lastChild();
      forwardPropagateTree(left);
      forwardPropagateTree(right);

      String leftCategory = tree.children().get(0).label();
      String rightCategory = tree.children().get(1).label();
      FloatMatrix W = getBinaryTransform(leftCategory, rightCategory);
      classification = getBinaryClassification(leftCategory, rightCategory);

      FloatMatrix leftVector = tree.children().get(0).vector();
      FloatMatrix rightVector = tree.children().get(1).vector();

      FloatMatrix childrenVector = appendBias(leftVector, rightVector);

      if (useFloatTensors) {
        FloatTensor floatT = getBinaryFloatTensor(leftCategory, rightCategory);
        FloatMatrix floatTensorIn = FloatMatrix.concatHorizontally(leftVector, rightVector);
        FloatMatrix floatTensorOut = floatT.bilinearProducts(floatTensorIn);
        nodeVector = activationFunction.apply(W.mmul(childrenVector).add(floatTensorOut));
      } else nodeVector = activationFunction.apply(W.mmul(childrenVector));

    } else {
      throw new AssertionError("Tree not correctly binarized");
    }

    FloatMatrix inputWithBias = appendBias(nodeVector);
    FloatMatrix preAct = classification.mmul(inputWithBias);
    FloatMatrix predictions = outputActivation.apply(preAct);

    tree.setPrediction(predictions);
    tree.setVector(nodeVector);
  }
コード例 #12
0
ファイル: Trees.java プロジェクト: hercky/undergradCourses
 private static int treeToLatexHelper(
     Tree t, StringBuilder c, StringBuilder h, int n, int nextN, int indent) {
   StringBuilder sb = new StringBuilder();
   for (int i = 0; i < indent; i++) sb.append("  ");
   h.append('\n').append(sb);
   h.append("{\\")
       .append(t.isLeaf() ? "" : "n")
       .append("tnode{z")
       .append(n)
       .append("}{")
       .append(t.label())
       .append('}');
   if (!t.isLeaf()) {
     for (int k = 0; k < t.children().length; k++) {
       h.append(", ");
       c.append("\\nodeconnect{z").append(n).append("}{z").append(nextN).append("}\n");
       nextN = treeToLatexHelper(t.children()[k], c, h, nextN, nextN + 1, indent + 1);
     }
   }
   h.append('}');
   return nextN;
 }
コード例 #13
0
ファイル: Trees.java プロジェクト: ricojansen/degraphmalizer
	// TODO all this is very memory inefficient and will lead to stack overflows for very deep trees
	public static ObjectNode toJsonTree(ObjectMapper objectMapper, Tree<? extends JsonNode> tree)
	{
		final ObjectNode node = objectMapper.createObjectNode();
		node.put("_value", tree.value());
		
		final ArrayNode children = objectMapper.createArrayNode();
		for (Tree<? extends JsonNode> child : tree.children())
			children.add(toJsonTree(objectMapper, child));
		
		node.put("_children", children);
		
		return node;
	}
コード例 #14
0
ファイル: FactoredParser.java プロジェクト: renaud/maven_repo
 protected void tallyTree(Tree t, LinkedList<String> parents) {
   // traverse tree, building parent list
   String str = t.label().value();
   boolean strIsPassive = (str.indexOf('@') == -1);
   if (strIsPassive) {
     parents.addFirst(str);
   }
   if (!t.isLeaf()) {
     if (!t.children()[0].isLeaf()) {
       tallyInternalNode(t, parents);
       for (int c = 0; c < t.children().length; c++) {
         Tree child = t.children()[c];
         tallyTree(child, parents);
       }
     } else {
       tagNumberer.number(t.label().value());
     }
   }
   if (strIsPassive) {
     parents.removeFirst();
   }
 }
コード例 #15
0
ファイル: Trees.java プロジェクト: hercky/undergradCourses
 /**
  * traceTo() values that are contained in the tree are assigned to new objects. traceTo() values
  * that are not contained in the tree are given the old value.
  */
 public static void fixEmptyTreeLeafs(Tree t, Map<Tree, Tree> newToOld, Map<Tree, Tree> oldToNew) {
   Tree[] kids = t.children();
   for (int i = 0, n = kids.length; i < n; i++) {
     fixEmptyTreeLeafs(kids[i], newToOld, oldToNew);
   }
   if (t instanceof EmptyTreeLeaf) {
     EmptyTreeLeaf oldT = (EmptyTreeLeaf) newToOld.get(t);
     ((EmptyTreeLeaf) t).setEmptyType(oldT.emptyType());
     Tree oldTraceTo = oldT.traceTo();
     Tree newTraceTo = oldToNew.get(oldTraceTo);
     if (newTraceTo != null) ((EmptyTreeLeaf) t).setTraceTo(newTraceTo);
     else ((EmptyTreeLeaf) t).setTraceTo(oldTraceTo);
   }
 }
コード例 #16
0
ファイル: Trees.java プロジェクト: hercky/undergradCourses
 static Tree getPreTerminal(Tree tree, MutableInteger i, int n) {
   if (i.intValue() == n) {
     if (tree.isPreTerminal()) {
       return tree;
     } else {
       return getPreTerminal(tree.children()[0], i, n);
     }
   } else {
     if (tree.isPreTerminal()) {
       i.set(i.intValue() + tree.yield().size());
       return null;
     } else {
       Tree[] kids = tree.children();
       for (int j = 0; j < kids.length; j++) {
         Tree result = getPreTerminal(kids[j], i, n);
         if (result != null) {
           return result;
         }
       }
       return null;
     }
   }
 }
コード例 #17
0
ファイル: Trees.java プロジェクト: hercky/undergradCourses
 /** replaces all instances (by ==) of node with node1. Doesn't affect the node t itself */
 public static void replaceNode(Tree node, Tree node1, Tree t) {
   if (t.isLeaf()) return;
   Tree[] kids = t.children();
   List<Tree> newKids = new ArrayList<Tree>(kids.length);
   for (int i = 0, n = kids.length; i < n; i++) {
     if (kids[i] != node) {
       newKids.add(kids[i]);
       replaceNode(node, node1, kids[i]);
     } else {
       newKids.add(node1);
     }
   }
   t.setChildren(newKids);
 }
コード例 #18
0
ファイル: Trees.java プロジェクト: hercky/undergradCourses
 private static int treeToLatexEvenHelper(
     Tree t,
     StringBuilder c,
     StringBuilder h,
     int n,
     int nextN,
     int indent,
     int curDepth,
     int maxDepth) {
   StringBuilder sb = new StringBuilder();
   for (int i = 0; i < indent; i++) sb.append("  ");
   h.append('\n').append(sb);
   int tDepth = t.depth();
   if (tDepth == 0 && tDepth + curDepth < maxDepth) {
     for (int pad = 0; pad < maxDepth - tDepth - curDepth; pad++) {
       h.append("{\\ntnode{pad}{}, ");
     }
   }
   h.append("{\\ntnode{z").append(n).append("}{").append(t.label()).append('}');
   if (!t.isLeaf()) {
     for (int k = 0; k < t.children().length; k++) {
       h.append(", ");
       c.append("\\nodeconnect{z").append(n).append("}{z").append(nextN).append("}\n");
       nextN =
           treeToLatexEvenHelper(
               t.children()[k], c, h, nextN, nextN + 1, indent + 1, curDepth + 1, maxDepth);
     }
   }
   if (tDepth == 0 && tDepth + curDepth < maxDepth) {
     for (int pad = 0; pad < maxDepth - tDepth - curDepth; pad++) {
       h.append('}');
     }
   }
   h.append('}');
   return nextN;
 }
コード例 #19
0
  public static ArrayList<ArrayList<TaggedWord>> getPhrases(Tree parse, int phraseSizeLimit) {
    ArrayList<ArrayList<TaggedWord>> newList = new ArrayList<ArrayList<TaggedWord>>();
    List<Tree> leaves = parse.getLeaves();

    if (leaves.size() <= phraseSizeLimit) {
      // ArrayList<TaggedWord> phraseElements = PreprocessPhrase(parse.taggedYield());
      ArrayList<TaggedWord> phraseElements = Preprocess(parse.taggedYield());
      if (phraseElements.size() > 0) newList.add(phraseElements);
    } else {
      Tree[] childrenNodes = parse.children();
      for (int i = 0; i < childrenNodes.length; i++) {
        Tree currentParse = childrenNodes[i];
        newList.addAll(getPhrases(currentParse, phraseSizeLimit));
      }
    }
    return newList;
  }
コード例 #20
0
ファイル: Trees.java プロジェクト: hercky/undergradCourses
 static boolean rightEdge(Tree t, Tree t1, MutableInteger i) {
   if (t == t1) {
     return true;
   } else if (t1.isLeaf()) {
     int j = t1.yield().size(); // so that empties don't add size
     i.set(i.intValue() - j);
     return false;
   } else {
     Tree[] kids = t1.children();
     for (int j = kids.length - 1; j >= 0; j--) {
       if (rightEdge(t, kids[j], i)) {
         return true;
       }
     }
     return false;
   }
 }
コード例 #21
0
ファイル: Trees.java プロジェクト: hercky/undergradCourses
 public static Tree copyHelper(Tree t, Map<Tree, Tree> newToOld, Map<Tree, Tree> oldToNew) {
   Tree[] kids = t.children();
   Tree[] newKids = new Tree[kids.length];
   for (int i = 0, n = kids.length; i < n; i++) {
     newKids[i] = copyHelper(kids[i], newToOld, oldToNew);
   }
   TreeFactory tf = t.treeFactory();
   if (kids.length == 0) {
     Tree newLeaf = tf.newLeaf(t.label());
     newToOld.put(newLeaf, t);
     oldToNew.put(newLeaf, t);
     return newLeaf;
   }
   Tree newNode = tf.newTreeNode(t.label(), Arrays.asList(newKids));
   newToOld.put(newNode, t);
   oldToNew.put(t, newNode);
   return newNode;
 }
コード例 #22
0
ファイル: Trees.java プロジェクト: ricojansen/degraphmalizer
    /**
	 * Map a function over a tree
	 * 
	 * @param fn Function
	 * @param tree Tree of {@code A}'s
	 * @return Tree of {@code B}'s
	 */
	public static <A,B> Tree<B> map(final Function<A,B> fn, Tree<A> tree)
	{
		final B value = fn.apply(tree.value());
		
		if (isLeaf(tree))
            return new ImmutableTree<B>(value);

		else
		{
			final Function<Tree<A>,Tree<B>> tmap = new Function<Tree<A>,Tree<B>>()
				{
					public Tree<B> apply(Tree<A> tree)
					{
						return map(fn, tree);
					}
				};
			final Iterable<Tree<B>> tb = Iterables.transform(tree.children(), tmap);
			return new ImmutableTree<B>(value, tb);
		}
	}
コード例 #23
0
ファイル: RNTN.java プロジェクト: jpquiroga/java-deeplearning
 public FloatMatrix getClassWForNode(Tree node) {
   if (combineClassification) {
     return unaryClassification.get("");
   } else if (node.children().size() == 2) {
     String leftLabel = node.children().get(0).value();
     String leftBasic = basicCategory(leftLabel);
     String rightLabel = node.children().get(1).value();
     String rightBasic = basicCategory(rightLabel);
     return binaryClassification.get(leftBasic, rightBasic);
   } else if (node.children().size() == 1) {
     String unaryLabel = node.children().get(0).value();
     String unaryBasic = basicCategory(unaryLabel);
     return unaryClassification.get(unaryBasic);
   } else {
     throw new AssertionError("Unexpected tree children size of " + node.children().size());
   }
 }
コード例 #24
0
ファイル: RNTN.java プロジェクト: jpquiroga/java-deeplearning
  private void backpropDerivativesAndError(
      Tree tree,
      MultiDimensionalMap<String, String, FloatMatrix> binaryTD,
      MultiDimensionalMap<String, String, FloatMatrix> binaryCD,
      MultiDimensionalMap<String, String, FloatTensor> binaryFloatTensorTD,
      Map<String, FloatMatrix> unaryCD,
      Map<String, FloatMatrix> wordVectorD,
      FloatMatrix deltaUp) {
    if (tree.isLeaf()) {
      return;
    }

    FloatMatrix currentVector = tree.vector();
    String category = tree.label();
    category = basicCategory(category);

    // Build a vector that looks like 0,0,1,0,0 with an indicator for the correct class
    FloatMatrix goldLabel = new FloatMatrix(numOuts, 1);
    int goldClass = tree.goldLabel();
    if (goldClass >= 0) {
      goldLabel.put(goldClass, 1.0f);
    }

    Float nodeWeight = classWeights.get(goldClass);
    if (nodeWeight == null) nodeWeight = 1.0f;
    FloatMatrix predictions = tree.prediction();

    // If this is an unlabeled class, set deltaClass to 0.  We could
    // make this more efficient by eliminating various of the below
    // calculations, but this would be the easiest way to handle the
    // unlabeled class
    FloatMatrix deltaClass =
        goldClass >= 0
            ? SimpleBlas.scal(nodeWeight, predictions.sub(goldLabel))
            : new FloatMatrix(predictions.rows, predictions.columns);
    FloatMatrix localCD = deltaClass.mmul(appendBias(currentVector).transpose());

    float error = -(MatrixFunctions.log(predictions).muli(goldLabel).sum());
    error = error * nodeWeight;
    tree.setError(error);

    if (tree.isPreTerminal()) { // below us is a word vector
      unaryCD.put(category, unaryCD.get(category).add(localCD));

      String word = tree.children().get(0).label();
      word = getVocabWord(word);

      FloatMatrix currentVectorDerivative = activationFunction.apply(currentVector);
      FloatMatrix deltaFromClass = getUnaryClassification(category).transpose().mmul(deltaClass);
      deltaFromClass =
          deltaFromClass.get(interval(0, numHidden), interval(0, 1)).mul(currentVectorDerivative);
      FloatMatrix deltaFull = deltaFromClass.add(deltaUp);
      wordVectorD.put(word, wordVectorD.get(word).add(deltaFull));

    } else {
      // Otherwise, this must be a binary node
      String leftCategory = basicCategory(tree.children().get(0).label());
      String rightCategory = basicCategory(tree.children().get(1).label());
      if (combineClassification) {
        unaryCD.put("", unaryCD.get("").add(localCD));
      } else {
        binaryCD.put(
            leftCategory, rightCategory, binaryCD.get(leftCategory, rightCategory).add(localCD));
      }

      FloatMatrix currentVectorDerivative = activationFunction.applyDerivative(currentVector);
      FloatMatrix deltaFromClass =
          getBinaryClassification(leftCategory, rightCategory).transpose().mmul(deltaClass);

      FloatMatrix mult = deltaFromClass.get(interval(0, numHidden), interval(0, 1));
      deltaFromClass = mult.muli(currentVectorDerivative);
      FloatMatrix deltaFull = deltaFromClass.add(deltaUp);

      FloatMatrix leftVector = tree.children().get(0).vector();
      FloatMatrix rightVector = tree.children().get(1).vector();

      FloatMatrix childrenVector = appendBias(leftVector, rightVector);

      // deltaFull 50 x 1, childrenVector: 50 x 2
      FloatMatrix add = binaryTD.get(leftCategory, rightCategory);

      FloatMatrix W_df = deltaFromClass.mmul(childrenVector.transpose());
      binaryTD.put(leftCategory, rightCategory, add.add(W_df));

      FloatMatrix deltaDown;
      if (useFloatTensors) {
        FloatTensor Wt_df = getFloatTensorGradient(deltaFull, leftVector, rightVector);
        binaryFloatTensorTD.put(
            leftCategory,
            rightCategory,
            binaryFloatTensorTD.get(leftCategory, rightCategory).add(Wt_df));
        deltaDown =
            computeFloatTensorDeltaDown(
                deltaFull,
                leftVector,
                rightVector,
                getBinaryTransform(leftCategory, rightCategory),
                getBinaryFloatTensor(leftCategory, rightCategory));
      } else {
        deltaDown = getBinaryTransform(leftCategory, rightCategory).transpose().mmul(deltaFull);
      }

      FloatMatrix leftDerivative = activationFunction.apply(leftVector);
      FloatMatrix rightDerivative = activationFunction.apply(rightVector);
      FloatMatrix leftDeltaDown = deltaDown.get(interval(0, deltaFull.rows), interval(0, 1));
      FloatMatrix rightDeltaDown =
          deltaDown.get(interval(deltaFull.rows, deltaFull.rows * 2), interval(0, 1));
      backpropDerivativesAndError(
          tree.children().get(0),
          binaryTD,
          binaryCD,
          binaryFloatTensorTD,
          unaryCD,
          wordVectorD,
          leftDerivative.mul(leftDeltaDown));
      backpropDerivativesAndError(
          tree.children().get(1),
          binaryTD,
          binaryCD,
          binaryFloatTensorTD,
          unaryCD,
          wordVectorD,
          rightDerivative.mul(rightDeltaDown));
    }
  }
コード例 #25
0
ファイル: Trees.java プロジェクト: ricojansen/degraphmalizer
 public static final <_> boolean isLeaf(final Tree<_> tree)
 {
     return Iterables.isEmpty(tree.children());
 }
  /**
   * transformTree does all language-specific tree transformations. Any parameterizations should be
   * inside the specific TreebankLangParserParams class.
   */
  @Override
  public Tree transformTree(Tree t, Tree root) {
    if (t == null || t.isLeaf()) {
      return t;
    }

    String parentStr;
    String grandParentStr;
    Tree parent;
    Tree grandParent;
    if (root == null || t.equals(root)) {
      parent = null;
      parentStr = "";
    } else {
      parent = t.parent(root);
      parentStr = parent.label().value();
    }
    if (parent == null || parent.equals(root)) {
      grandParent = null;
      grandParentStr = "";
    } else {
      grandParent = parent.parent(root);
      grandParentStr = grandParent.label().value();
    }

    String baseParentStr = ctlp.basicCategory(parentStr);
    String baseGrandParentStr = ctlp.basicCategory(grandParentStr);

    CoreLabel lab = (CoreLabel) t.label();
    String word = lab.word();
    String tag = lab.tag();
    String baseTag = ctlp.basicCategory(tag);
    String category = lab.value();
    String baseCategory = ctlp.basicCategory(category);

    if (t.isPreTerminal()) { // it's a POS tag
      List<String> leftAunts =
          listBasicCategories(SisterAnnotationStats.leftSisterLabels(parent, grandParent));
      List<String> rightAunts =
          listBasicCategories(SisterAnnotationStats.rightSisterLabels(parent, grandParent));

      // Chinese-specific punctuation splits
      if (chineseSplitPunct && baseTag.equals("PU")) {
        if (ChineseTreebankLanguagePack.chineseDouHaoAcceptFilter().accept(word)) {
          tag = tag + "-DOU";
          // System.out.println("Punct: Split dou hao"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseCommaAcceptFilter().accept(word)) {
          tag = tag + "-COMMA";
          // System.out.println("Punct: Split comma"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseColonAcceptFilter().accept(word)) {
          tag = tag + "-COLON";
          // System.out.println("Punct: Split colon"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseQuoteMarkAcceptFilter().accept(word)) {
          if (chineseSplitPunctLR) {
            if (ChineseTreebankLanguagePack.chineseLeftQuoteMarkAcceptFilter().accept(word)) {
              tag += "-LQUOTE";
            } else {
              tag += "-RQUOTE";
            }
          } else {
            tag = tag + "-QUOTE";
          }
          // System.out.println("Punct: Split quote"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseEndSentenceAcceptFilter().accept(word)) {
          tag = tag + "-ENDSENT";
          // System.out.println("Punct: Split end sent"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseParenthesisAcceptFilter().accept(word)) {
          if (chineseSplitPunctLR) {
            if (ChineseTreebankLanguagePack.chineseLeftParenthesisAcceptFilter().accept(word)) {
              tag += "-LPAREN";
            } else {
              tag += "-RPAREN";
            }
          } else {
            tag += "-PAREN";
            // printlnErr("Just used -PAREN annotation");
            // printlnErr(word);
            // throw new RuntimeException();
          }
          // System.out.println("Punct: Split paren"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseDashAcceptFilter().accept(word)) {
          tag = tag + "-DASH";
          // System.out.println("Punct: Split dash"); // debugging
        } else if (ChineseTreebankLanguagePack.chineseOtherAcceptFilter().accept(word)) {
          tag = tag + "-OTHER";
        } else {
          printlnErr("Unknown punct (you should add it to CTLP): " + tag + " |" + word + "|");
        }
      } else if (chineseSplitDouHao) { // only split DouHao
        if (ChineseTreebankLanguagePack.chineseDouHaoAcceptFilter().accept(word)
            && baseTag.equals("PU")) {
          tag = tag + "-DOU";
        }
      }

      // Chinese-specific POS tag splits (non-punctuation)

      if (tagWordSize) {
        int l = word.length();
        tag += "-" + l + "CHARS";
      }

      if (mergeNNVV && baseTag.equals("NN")) {
        tag = "VV";
      }

      if ((chineseSelectiveTagPA || chineseVerySelectiveTagPA)
          && (baseTag.equals("CC") || baseTag.equals("P"))) {
        tag += "-" + baseParentStr;
      }
      if (chineseSelectiveTagPA && (baseTag.equals("VV"))) {
        tag += "-" + baseParentStr;
      }

      if (markMultiNtag && tag.startsWith("N")) {
        for (int i = 0; i < parent.numChildren(); i++) {
          if (parent.children()[i].label().value().startsWith("N") && parent.children()[i] != t) {
            tag += "=N";
            // System.out.println("Found multi=N rewrite");
          }
        }
      }

      if (markVVsisterIP && baseTag.equals("VV")) {
        boolean seenIP = false;
        for (int i = 0; i < parent.numChildren(); i++) {
          if (parent.children()[i].label().value().startsWith("IP")) {
            seenIP = true;
          }
        }
        if (seenIP) {
          tag += "-IP";
          // System.out.println("Found VV with IP sister"); // testing
        }
      }

      if (markPsisterIP && baseTag.equals("P")) {
        boolean seenIP = false;
        for (int i = 0; i < parent.numChildren(); i++) {
          if (parent.children()[i].label().value().startsWith("IP")) {
            seenIP = true;
          }
        }
        if (seenIP) {
          tag += "-IP";
        }
      }

      if (markADgrandchildOfIP && baseTag.equals("AD") && baseGrandParentStr.equals("IP")) {
        tag += "~IP";
        // System.out.println("Found AD with IP grandparent"); // testing
      }

      if (gpaAD && baseTag.equals("AD")) {
        tag += "~" + baseGrandParentStr;
        // System.out.println("Found AD with grandparent " + grandParentStr); // testing
      }

      if (markPostverbalP && leftAunts.contains("VV") && baseTag.equals("P")) {
        // System.out.println("Found post-verbal P");
        tag += "^=lVV";
      }

      // end Chinese-specific tag splits

      Label label = new CategoryWordTag(tag, word, tag);
      t.setLabel(label);
    } else {
      // it's a phrasal category
      Tree[] kids = t.children();

      // Chinese-specific category splits
      List<String> leftSis = listBasicCategories(SisterAnnotationStats.leftSisterLabels(t, parent));
      List<String> rightSis =
          listBasicCategories(SisterAnnotationStats.rightSisterLabels(t, parent));

      if (paRootDtr && baseParentStr.equals("ROOT")) {
        category += "^ROOT";
      }

      if (markIPsisterBA && baseCategory.equals("IP")) {
        if (leftSis.contains("BA")) {
          category += "=BA";
          // System.out.println("Found IP sister of BA");
        }
      }

      if (dominatesV && hasV(t.preTerminalYield())) {
        // mark categories containing a verb
        category += "-v";
      }

      if (markIPsisterVVorP && baseCategory.equals("IP")) {
        // todo: cdm: is just looking for "P" here selective enough??
        if (leftSis.contains("VV") || leftSis.contains("P")) {
          category += "=VVP";
        }
      }

      if (markIPsisDEC && baseCategory.equals("IP")) {
        if (rightSis.contains("DEC")) {
          category += "=DEC";
          // System.out.println("Found prenominal IP");
        }
      }

      if (baseCategory.equals("VP")) {
        // cdm 2008: this used to just check that it startsWith("VP"), but
        // I think that was bad because it also matched VPT verb compounds
        if (chineseSplitVP == 3) {
          boolean hasCC = false;
          boolean hasPU = false;
          boolean hasLexV = false;
          for (Tree kid : kids) {
            if (kid.label().value().startsWith("CC")) {
              hasCC = true;
            } else if (kid.label().value().startsWith("PU")) {
              hasPU = true;
            } else if (StringUtils.lookingAt(
                kid.label().value(), "(V[ACEV]|VCD|VCP|VNV|VPT|VRD|VSB)")) {
              hasLexV = true;
            }
          }
          if (hasCC || (hasPU && !hasLexV)) {
            category += "-CRD";
            // System.out.println("Found coordinate VP"); // testing
          } else if (hasLexV) {
            category += "-COMP";
            // System.out.println("Found complementing VP"); // testing
          } else {
            category += "-ADJT";
            // System.out.println("Found adjoining VP"); // testing
          }
        } else if (chineseSplitVP >= 1) {
          boolean hasBA = false;
          for (Tree kid : kids) {
            if (kid.label().value().startsWith("BA")) {
              hasBA = true;
            } else if (chineseSplitVP == 2 && tlp.basicCategory(kid.label().value()).equals("VP")) {
              for (Tree kidkid : kid.children()) {
                if (kidkid.label().value().startsWith("BA")) {
                  hasBA = true;
                }
              }
            }
          }
          if (hasBA) {
            category += "-BA";
          }
        }
      }

      if (markVPadjunct && baseParentStr.equals("VP")) {
        // cdm 2008: This used to use startsWith("VP") but changed to baseCat
        Tree[] sisters = parent.children();
        boolean hasVPsister = false;
        boolean hasCC = false;
        boolean hasPU = false;
        boolean hasLexV = false;
        for (Tree sister : sisters) {
          if (tlp.basicCategory(sister.label().value()).equals("VP")) {
            hasVPsister = true;
          }
          if (sister.label().value().startsWith("CC")) {
            hasCC = true;
          }
          if (sister.label().value().startsWith("PU")) {
            hasPU = true;
          }
          if (StringUtils.lookingAt(sister.label().value(), "(V[ACEV]|VCD|VCP|VNV|VPT|VRD|VSB)")) {
            hasLexV = true;
          }
        }
        if (hasVPsister && !(hasCC || hasPU || hasLexV)) {
          category += "-VPADJ";
          // System.out.println("Found adjunct of VP"); // testing
        }
      }

      if (markNPmodNP && baseCategory.equals("NP") && baseParentStr.equals("NP")) {
        if (rightSis.contains("NP")) {
          category += "=MODIFIERNP";
          // System.out.println("Found NP modifier of NP"); // testing
        }
      }

      if (markModifiedNP && baseCategory.equals("NP") && baseParentStr.equals("NP")) {
        if (rightSis.isEmpty()
            && (leftSis.contains("ADJP")
                || leftSis.contains("NP")
                || leftSis.contains("DNP")
                || leftSis.contains("QP")
                || leftSis.contains("CP")
                || leftSis.contains("PP"))) {
          category += "=MODIFIEDNP";
          // System.out.println("Found modified NP"); // testing
        }
      }

      if (markNPconj && baseCategory.equals("NP") && baseParentStr.equals("NP")) {
        if (rightSis.contains("CC")
            || rightSis.contains("PU")
            || leftSis.contains("CC")
            || leftSis.contains("PU")) {
          category += "=CONJ";
          // System.out.println("Found NP conjunct"); // testing
        }
      }

      if (markIPconj && baseCategory.equals("IP") && baseParentStr.equals("IP")) {
        Tree[] sisters = parent.children();
        boolean hasCommaSis = false;
        boolean hasIPSis = false;
        for (Tree sister : sisters) {
          if (ctlp.basicCategory(sister.label().value()).equals("PU")
              && ChineseTreebankLanguagePack.chineseCommaAcceptFilter()
                  .accept(sister.children()[0].label().toString())) {
            hasCommaSis = true;
            // System.out.println("Found CommaSis"); // testing
          }
          if (ctlp.basicCategory(sister.label().value()).equals("IP") && sister != t) {
            hasIPSis = true;
          }
        }
        if (hasCommaSis && hasIPSis) {
          category += "-CONJ";
          // System.out.println("Found IP conjunct"); // testing
        }
      }

      if (unaryIP && baseCategory.equals("IP") && t.numChildren() == 1) {
        category += "-U";
        // System.out.println("Found unary IP"); //testing
      }
      if (unaryCP && baseCategory.equals("CP") && t.numChildren() == 1) {
        category += "-U";
        // System.out.println("Found unary CP"); //testing
      }

      if (splitBaseNP && baseCategory.equals("NP")) {
        if (t.isPrePreTerminal()) {
          category = category + "-B";
        }
      }

      // if (Test.verbose) printlnErr(baseCategory + " " + leftSis.toString()); //debugging

      if (markPostverbalPP && leftSis.contains("VV") && baseCategory.equals("PP")) {
        // System.out.println("Found post-verbal PP");
        category += "=lVV";
      }

      if ((markADgrandchildOfIP || gpaAD)
          && listBasicCategories(SisterAnnotationStats.kidLabels(t)).contains("AD")) {
        category += "^ADVP";
      }

      if (markCC) {
        // was: for (int i = 0; i < kids.length; i++) {
        // This second version takes an idea from Collins: don't count
        // marginal conjunctions which don't conjoin 2 things.
        for (int i = 1; i < kids.length - 1; i++) {
          String cat2 = kids[i].label().value();
          if (cat2.startsWith("CC")) {
            category += "-CC";
          }
        }
      }

      Label label = new CategoryWordTag(category, word, tag);
      t.setLabel(label);
    }
    return t;
  }