예제 #1
0
 private SimpleMatrix concatenateContextWords(
     SimpleMatrix childVec, IntPair span, List<String> words) {
   // TODO: factor out getting the words
   SimpleMatrix left =
       (span.getSource() < 0)
           ? dvModel.getStartWordVector()
           : dvModel.getWordVector(words.get(span.getSource()));
   SimpleMatrix right =
       (span.getTarget() >= words.size())
           ? dvModel.getEndWordVector()
           : dvModel.getWordVector(words.get(span.getTarget()));
   return NeuralUtils.concatenate(childVec, left, right);
 }
예제 #2
0
  /**
   * Recreates a Stanford Tree from the StanfordParser annotations and saves all
   * non-StanfordParser-Annotations within the scope of the sentence in the label of the best
   * fitting node.
   *
   * <p><strong>CAUTION: </strong><i>This method is intended for the use by CAS Multipliers, which
   * create new CASes from this tree. The annotation-spans in the source-CAS will be changed!!!!!!
   * You do NOT want to use the source CAS after this method has been called. The
   * createStanfordTree()-method does not change the CAS, so use this instead, if the annotations do
   * not have to be recovered or accessed in the tree.</i>
   *
   * <p>TODO: This behavior could be changed by making COPIES of the annotations and changing the
   * copied instead of the originals. However, in order to being able to make copies, a dummy CAS
   * must be introduced to which the annotations can be copied. When they are recovered, they will
   * be copied to the new destination CAS anyway.
   *
   * @param root the ROOT annotation
   * @return an {@link Tree} object representing the syntax structure of the sentence
   * @throws CASException if the JCas cannot be accessed.
   */
  public static Tree createStanfordTreeWithAnnotations(Annotation root) throws CASException {
    JCas aJCas = root.getCAS().getJCas();

    // Create tree
    Tree tree = createStanfordTree(root);

    // Get all non-parser related annotations
    // and all tokens (needed for span-calculations later on)
    List<Annotation> nonParserAnnotations = new ArrayList<Annotation>();
    List<Token> tokens = new ArrayList<Token>();

    // Using getCoveredAnnotations instead of iterate, because subiterators did not work in all
    // cases
    List<Annotation> annosWithinRoot = JCasUtil.selectCovered(aJCas, Annotation.class, root);

    for (Annotation curAnno : annosWithinRoot) {
      if (!(curAnno instanceof POS)
          && !(curAnno instanceof Constituent)
          && !(curAnno instanceof Dependency)
          && !(curAnno instanceof PennTree)
          && !(curAnno instanceof Lemma)
          && !(curAnno instanceof Token)
          && !(curAnno instanceof DocumentMetaData)) {
        nonParserAnnotations.add(curAnno);
      } else if (curAnno instanceof Token) {
        tokens.add((Token) curAnno);
      }
    }

    // create wrapper for tree and its tokens
    TreeWithTokens annoTree = new TreeWithTokens(tree, tokens);

    /*
     * Add annotations to the best-fitting nodes. The best-fitting node for an annotation is the
     * deepest node in the tree that still completely contains the annotation.
     */
    for (Annotation curAnno : nonParserAnnotations) {
      // get best fitting node
      Tree bestFittingNode = annoTree.getBestFit(curAnno);

      // Add annotation to node
      if (bestFittingNode != null) {

        // translate annotation span to a value relative to the
        // node-span
        IntPair span = annoTree.getSpan(bestFittingNode);
        curAnno.setBegin(curAnno.getBegin() - span.getSource());
        curAnno.setEnd(curAnno.getEnd() - span.getSource());

        // get the collection from the label of the best-fitting node in which we store UIMA
        // annotations or create it, if it does not exist
        Collection<Annotation> annotations =
            ((CoreLabel) bestFittingNode.label()).get(UIMAAnnotations.class);
        if (annotations == null) {
          annotations = new ArrayList<Annotation>();
        }

        // add annotation + checksum of annotated text to list and write it back to node
        // label
        annotations.add(curAnno);

        ((CoreLabel) bestFittingNode.label()).set(UIMAAnnotations.class, annotations);
      }
    }

    return tree;
  }