private void addTreeToAlphabet(DepTree tree) { for (DepTreeNode node : tree) { if (node.getLabel() != WallDepTreeNode.WALL_LABEL) { int idx = alphabet.lookupIndex(node.getLabel()); if (idx == -1) { throw new RuntimeException("Unknown label: " + node.getLabel()); } } } }
public Set<String> getTypes() { Set<String> types = new HashSet<String>(); for (DepTree tree : this) { for (DepTreeNode node : tree) { types.add(node.getLabel()); } } types.remove(WallDepTreeNode.WALL_LABEL); return types; }
/** * @param dn * @param prevRelType * @param listFeatIndsOfCurInp * @param listFeatCountOfCurInp * @throws IOException */ private void createFeaturesFromInputGraph( DependencyTree dt, DepTreeNode dn, String prevRelType, ArrayList<Integer> listFeatIndsOfCurInp, ArrayList<Integer> listFeatCountOfCurInp, ArrayList<Integer> listOfNodeTraversed, int[] e1Boundaries, int[] e2Boundaries) throws IOException { // TODO: what will happen if there is multiple root? int weight = 1; // cc++; listOfNodeTraversed.add(dn.wordIndex); // if ( isEntityButNotOneOfTheTargetEntities(dn, e1Boundaries, e2Boundaries) ) // dn.lemma = "ENTITYother"; // System.out.println(cc); String[] feature = new String[0]; // */ // BFS traversal for (int i = 0; i < dn.getChildrenWordIndexes().size(); i++) { // * /* // v-walk feature = new String[] { dn.lemma + "@" + dn.relNamesWithChildren.get(i) + "@" + dn.children.get(i).lemma, dn.children.get(i).lemma + "@" + dn.relNamesWithChildren.get(i) + "@" + dn.lemma }; //addNewFeatureInList(feature, listFeatIndsOfCurInp, listFeatCountOfCurInp); */ // FileUtility.writeInFile( vectOutFile, feature[0] + "\n", true); feature = new String[] { dn.pos + "@" + dn.getRelNamesWithChildren().get(i) + "@" + dn.getChildren().get(i).pos, dn.getChildren().get(i).pos + "@" + dn.getRelNamesWithChildren().get(i) + "@" + dn.pos }; GenericFeatVect.addNewFeatureInList( feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, weight); // */ // e-walk if (!prevRelType.isEmpty()) { feature = new String[] { prevRelType + "#" + dn.pos + "#" + dn.getRelNamesWithChildren().get(i), dn.getRelNamesWithChildren().get(i) + "#" + dn.pos + "#" + prevRelType }; GenericFeatVect.addNewFeatureInList( feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, weight); feature = new String[] { prevRelType + "#" + dn.lemma + "#" + dn.getRelNamesWithChildren().get(i), dn.getRelNamesWithChildren().get(i) + "#" + dn.lemma + "#" + prevRelType }; GenericFeatVect.addNewFeatureInList( feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, weight); } for (int k = i + 1; k < dn.getChildrenWordIndexes().size(); k++) { // add e-walk for the siblings WITHOUT taking lexical order in consideration feature = new String[] { dn.getRelNamesWithChildren().get(i) + "#" + dn.pos + "#" + dn.getRelNamesWithChildren().get(k), dn.getRelNamesWithChildren().get(k) + "#" + dn.pos + "#" + dn.getRelNamesWithChildren().get(i) }; GenericFeatVect.addNewFeatureInList( feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, weight); feature = new String[] { dn.getRelNamesWithChildren().get(i) + "#" + dn.lemma + "#" + dn.getRelNamesWithChildren().get(k), dn.getRelNamesWithChildren().get(k) + "#" + dn.lemma + "#" + dn.getRelNamesWithChildren().get(i), }; GenericFeatVect.addNewFeatureInList( feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, weight); /* // 3-gram of siblings and parent feature = new String[] { dn.children.get(i).lemma + "#" + dn.lemma + "#" + dn.children.get(k).lemma, dn.children.get(k).lemma + "#" + dn.lemma + "#" + dn.children.get(i).lemma }; addNewFeatureInList(feature, listFeatIndsOfCurInp, listFeatCountOfCurInp); /* feature = new String[] { dn.children.get(i).pos + "#" + dn.pos + "#" + dn.children.get(k).pos, dn.children.get(k).pos + "#" + dn.pos + "#" + dn.children.get(i).pos }; addNewFeatureInList(feature, listFeatIndsOfCurInp, listFeatCountOfCurInp); */ } } // for grand children for (int i = 0; i < dn.getChildrenWordIndexes().size(); i++) { if (!listOfNodeTraversed.contains(dn.getChildrenWordIndexes().get(i))) createFeaturesFromInputGraph( dt, dn.getChildren().get(i), dn.getRelNamesWithChildren().get(i), listFeatIndsOfCurInp, listFeatCountOfCurInp, listOfNodeTraversed, e1Boundaries, e2Boundaries); } // for parents for (int i = 0; dn.getParents() != null && i < dn.getParents().size(); i++) { /* for ( int c=0; c< dn.childrenWordIndex.size(); c++ ){ // 3-gram feature = new String[] { dn.getParents().get(i).lemma + "#" + dn.lemma + "#" + dn.children.get(c).lemma, //dn.children.get(c).lemma + "#" + dn.lemma + "#" + dn.getParents().get(i).lemma }; addNewFeatureInList(feature, listFeatIndsOfCurInp, listFeatCountOfCurInp); } */ if (!listOfNodeTraversed.contains(dn.getParents().get(i).wordIndex)) { createFeaturesFromInputGraph( dt, dn.getParents().get(i), "", listFeatIndsOfCurInp, listFeatCountOfCurInp, listOfNodeTraversed, e1Boundaries, e2Boundaries); } } }