private SimpleMatrix concatenateContextWords( SimpleMatrix childVec, IntPair span, List<String> words) { // TODO: factor out getting the words SimpleMatrix left = (span.getSource() < 0) ? dvModel.getStartWordVector() : dvModel.getWordVector(words.get(span.getSource())); SimpleMatrix right = (span.getTarget() >= words.size()) ? dvModel.getEndWordVector() : dvModel.getWordVector(words.get(span.getTarget())); return NeuralUtils.concatenate(childVec, left, right); }
/** * Recreates a Stanford Tree from the StanfordParser annotations and saves all * non-StanfordParser-Annotations within the scope of the sentence in the label of the best * fitting node. * * <p><strong>CAUTION: </strong><i>This method is intended for the use by CAS Multipliers, which * create new CASes from this tree. The annotation-spans in the source-CAS will be changed!!!!!! * You do NOT want to use the source CAS after this method has been called. The * createStanfordTree()-method does not change the CAS, so use this instead, if the annotations do * not have to be recovered or accessed in the tree.</i> * * <p>TODO: This behavior could be changed by making COPIES of the annotations and changing the * copied instead of the originals. However, in order to being able to make copies, a dummy CAS * must be introduced to which the annotations can be copied. When they are recovered, they will * be copied to the new destination CAS anyway. * * @param root the ROOT annotation * @return an {@link Tree} object representing the syntax structure of the sentence * @throws CASException if the JCas cannot be accessed. */ public static Tree createStanfordTreeWithAnnotations(Annotation root) throws CASException { JCas aJCas = root.getCAS().getJCas(); // Create tree Tree tree = createStanfordTree(root); // Get all non-parser related annotations // and all tokens (needed for span-calculations later on) List<Annotation> nonParserAnnotations = new ArrayList<Annotation>(); List<Token> tokens = new ArrayList<Token>(); // Using getCoveredAnnotations instead of iterate, because subiterators did not work in all // cases List<Annotation> annosWithinRoot = JCasUtil.selectCovered(aJCas, Annotation.class, root); for (Annotation curAnno : annosWithinRoot) { if (!(curAnno instanceof POS) && !(curAnno instanceof Constituent) && !(curAnno instanceof Dependency) && !(curAnno instanceof PennTree) && !(curAnno instanceof Lemma) && !(curAnno instanceof Token) && !(curAnno instanceof DocumentMetaData)) { nonParserAnnotations.add(curAnno); } else if (curAnno instanceof Token) { tokens.add((Token) curAnno); } } // create wrapper for tree and its tokens TreeWithTokens annoTree = new TreeWithTokens(tree, tokens); /* * Add annotations to the best-fitting nodes. The best-fitting node for an annotation is the * deepest node in the tree that still completely contains the annotation. */ for (Annotation curAnno : nonParserAnnotations) { // get best fitting node Tree bestFittingNode = annoTree.getBestFit(curAnno); // Add annotation to node if (bestFittingNode != null) { // translate annotation span to a value relative to the // node-span IntPair span = annoTree.getSpan(bestFittingNode); curAnno.setBegin(curAnno.getBegin() - span.getSource()); curAnno.setEnd(curAnno.getEnd() - span.getSource()); // get the collection from the label of the best-fitting node in which we store UIMA // annotations or create it, if it does not exist Collection<Annotation> annotations = ((CoreLabel) bestFittingNode.label()).get(UIMAAnnotations.class); if (annotations == null) { annotations = new ArrayList<Annotation>(); } // add annotation + checksum of annotated text to list and write it back to node // label annotations.add(curAnno); ((CoreLabel) bestFittingNode.label()).set(UIMAAnnotations.class, annotations); } } return tree; }