예제 #1
0
 private void addTreeToAlphabet(DepTree tree) {
   for (DepTreeNode node : tree) {
     if (node.getLabel() != WallDepTreeNode.WALL_LABEL) {
       int idx = alphabet.lookupIndex(node.getLabel());
       if (idx == -1) {
         throw new RuntimeException("Unknown label: " + node.getLabel());
       }
     }
   }
 }
예제 #2
0
 public Set<String> getTypes() {
   Set<String> types = new HashSet<String>();
   for (DepTree tree : this) {
     for (DepTreeNode node : tree) {
       types.add(node.getLabel());
     }
   }
   types.remove(WallDepTreeNode.WALL_LABEL);
   return types;
 }
예제 #3
0
  /**
   * @param dn
   * @param prevRelType
   * @param listFeatIndsOfCurInp
   * @param listFeatCountOfCurInp
   * @throws IOException
   */
  private void createFeaturesFromInputGraph(
      DependencyTree dt,
      DepTreeNode dn,
      String prevRelType,
      ArrayList<Integer> listFeatIndsOfCurInp,
      ArrayList<Integer> listFeatCountOfCurInp,
      ArrayList<Integer> listOfNodeTraversed,
      int[] e1Boundaries,
      int[] e2Boundaries)
      throws IOException {
    // TODO: what will happen if there is multiple root?

    int weight = 1;
    //	cc++;
    listOfNodeTraversed.add(dn.wordIndex);

    // if ( isEntityButNotOneOfTheTargetEntities(dn, e1Boundaries, e2Boundaries) )
    // dn.lemma = "ENTITYother";

    // System.out.println(cc);

    String[] feature = new String[0];

    // */
    // BFS traversal

    for (int i = 0; i < dn.getChildrenWordIndexes().size(); i++) {
      // *
      /*
      // v-walk
      feature =  new String[] {
      		dn.lemma + "@" + dn.relNamesWithChildren.get(i) + "@" + dn.children.get(i).lemma,
      		dn.children.get(i).lemma + "@" + dn.relNamesWithChildren.get(i) + "@" + dn.lemma
      };
      //addNewFeatureInList(feature, listFeatIndsOfCurInp, listFeatCountOfCurInp);
      */

      //	FileUtility.writeInFile( vectOutFile, feature[0] + "\n", true);

      feature =
          new String[] {
            dn.pos + "@" + dn.getRelNamesWithChildren().get(i) + "@" + dn.getChildren().get(i).pos,
            dn.getChildren().get(i).pos + "@" + dn.getRelNamesWithChildren().get(i) + "@" + dn.pos
          };
      GenericFeatVect.addNewFeatureInList(
          feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, weight);
      // */
      // e-walk
      if (!prevRelType.isEmpty()) {
        feature =
            new String[] {
              prevRelType + "#" + dn.pos + "#" + dn.getRelNamesWithChildren().get(i),
              dn.getRelNamesWithChildren().get(i) + "#" + dn.pos + "#" + prevRelType
            };
        GenericFeatVect.addNewFeatureInList(
            feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, weight);

        feature =
            new String[] {
              prevRelType + "#" + dn.lemma + "#" + dn.getRelNamesWithChildren().get(i),
              dn.getRelNamesWithChildren().get(i) + "#" + dn.lemma + "#" + prevRelType
            };
        GenericFeatVect.addNewFeatureInList(
            feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, weight);
      }

      for (int k = i + 1; k < dn.getChildrenWordIndexes().size(); k++) {

        // add e-walk for the siblings WITHOUT taking lexical order in consideration
        feature =
            new String[] {
              dn.getRelNamesWithChildren().get(i)
                  + "#"
                  + dn.pos
                  + "#"
                  + dn.getRelNamesWithChildren().get(k),
              dn.getRelNamesWithChildren().get(k)
                  + "#"
                  + dn.pos
                  + "#"
                  + dn.getRelNamesWithChildren().get(i)
            };
        GenericFeatVect.addNewFeatureInList(
            feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, weight);

        feature =
            new String[] {
              dn.getRelNamesWithChildren().get(i)
                  + "#"
                  + dn.lemma
                  + "#"
                  + dn.getRelNamesWithChildren().get(k),
              dn.getRelNamesWithChildren().get(k)
                  + "#"
                  + dn.lemma
                  + "#"
                  + dn.getRelNamesWithChildren().get(i),
            };
        GenericFeatVect.addNewFeatureInList(
            feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, weight);
        /*
        // 3-gram of siblings and parent
        feature = new String[] {
        		dn.children.get(i).lemma + "#" + dn.lemma + "#" + dn.children.get(k).lemma,
        		dn.children.get(k).lemma + "#" + dn.lemma + "#" + dn.children.get(i).lemma
        	};
        addNewFeatureInList(feature, listFeatIndsOfCurInp, listFeatCountOfCurInp);
        /*
        feature = new String[] {
        		dn.children.get(i).pos + "#" + dn.pos + "#" + dn.children.get(k).pos,
        		dn.children.get(k).pos + "#" + dn.pos + "#" + dn.children.get(i).pos
        	};
        addNewFeatureInList(feature, listFeatIndsOfCurInp, listFeatCountOfCurInp);
        */
      }
    }

    // for grand children
    for (int i = 0; i < dn.getChildrenWordIndexes().size(); i++) {
      if (!listOfNodeTraversed.contains(dn.getChildrenWordIndexes().get(i)))
        createFeaturesFromInputGraph(
            dt,
            dn.getChildren().get(i),
            dn.getRelNamesWithChildren().get(i),
            listFeatIndsOfCurInp,
            listFeatCountOfCurInp,
            listOfNodeTraversed,
            e1Boundaries,
            e2Boundaries);
    }

    // for parents
    for (int i = 0; dn.getParents() != null && i < dn.getParents().size(); i++) {
      /*
      for ( int c=0; c< dn.childrenWordIndex.size(); c++ ){
      	// 3-gram
      	feature = new String[] {
      			dn.getParents().get(i).lemma + "#" + dn.lemma + "#" + dn.children.get(c).lemma,
      			//dn.children.get(c).lemma + "#" + dn.lemma + "#" + dn.getParents().get(i).lemma
      		};
      	addNewFeatureInList(feature, listFeatIndsOfCurInp, listFeatCountOfCurInp);
      }
      */
      if (!listOfNodeTraversed.contains(dn.getParents().get(i).wordIndex)) {
        createFeaturesFromInputGraph(
            dt,
            dn.getParents().get(i),
            "",
            listFeatIndsOfCurInp,
            listFeatCountOfCurInp,
            listOfNodeTraversed,
            e1Boundaries,
            e2Boundaries);
      }
    }
  }