Пример #1
0
  /**
   * @param dt
   * @param eBoundaries
   * @param listFeatIndsOfCurInp
   * @param listFeatCountOfCurInp
   * @throws IOException
   */
  private void createNgramFeatures(
      DependencyTree dt,
      int weight,
      Entity ent,
      ArrayList<Integer> listFeatIndsOfCurInp,
      ArrayList<Integer> listFeatCountOfCurInp)
      throws IOException {

    String[] feature = new String[0];

    // collecting the unigram within a window of {-x, +x}

    int x = ent.getStartWordIndex();
    for (int i = x - 1; i >= x - 2 && i >= 0; i--) {
      feature = new String[] {dt.allNodesByWordIndex[i].lemma + "$" + (i - x)};

      GenericFeatVect.addNewFeatureInList(
          feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, weight);
    }

    x = ent.getEndWordIndex();
    for (int i = x + 1; i <= x + 2 && i < dt.allNodesByWordIndex.length; i++) {
      feature = new String[] {dt.allNodesByWordIndex[i].lemma + "$" + (x - i)};

      GenericFeatVect.addNewFeatureInList(
          feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, weight);
    }
  }
Пример #2
0
  /**
   * @param curSen
   * @param listFeatIndsOfCurInp
   * @param listFeatCountOfCurInp
   * @param e1
   * @param e2
   */
  private void extractNonTargetEntityFeatures(
      Sentence curSen,
      ArrayList<Integer> listFeatIndsOfCurInp,
      ArrayList<Integer> listFeatCountOfCurInp,
      Entity e1,
      Entity e2) {

    for (int e = 0; e < curSen.listOfEntities.size(); e++) {

      Entity entOther = curSen.listOfEntities.get(e);

      if (entOther.getNEcategory().equalsIgnoreCase("DISO")) {

        GenericFeatVect.addNewFeatureInList(
            new String[] {"DISOinsideSentence"}, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, 1);

        //	System.out.println(entOther.id);
        DepTreeNode headOfEnt =
            curSen.depTree.getHeadWordFromWordBoundaries(
                entOther.getAllWordIndexes(), true, curSen);

        int hwiOfEntOther = headOfEnt.wordIndex;

        int wiOfImmediateGovernor = getNonConjGovernorIndex(curSen, hwiOfEntOther);

        if (wiOfImmediateGovernor < 0) continue;

        int wiOfNearestVerbGovernor = getNearestVerbGovernor(wiOfImmediateGovernor, curSen);

        ArrayList<Integer> listOfWIofEntities = new ArrayList<Integer>();
        listOfWIofEntities.addAll(e1.getAllWordIndexes());
        listOfWIofEntities.addAll(e2.getAllWordIndexes());

        // detect whether drugs are dependent of the verb governor
        if (wiOfNearestVerbGovernor > -1
            && curSen.depTree.allNodesByWordIndex[wiOfNearestVerbGovernor].governAllWIsInList(
                listOfWIofEntities)) {
          /*
          GenericFeatVect.addNewFeatureInList( new String[]{"bothEntDependOnVerbGovernorOfTheDISO"}, 1,
          		listFeatIndsOfCurInp, listFeatCountOfCurInp, 1);
          //*/
          GenericFeatVect.addNewFeatureInList(
              new String[] {
                "nearestVerbGovernorOfTheDISO="
                    + curSen.arrLemmasByParser[wiOfNearestVerbGovernor].toLowerCase()
              },
              1,
              listFeatIndsOfCurInp,
              listFeatCountOfCurInp,
              1);
          // */
        }

        if (wiOfImmediateGovernor > -1
            && curSen.depTree.allNodesByWordIndex[wiOfImmediateGovernor].governAllWIsInList(
                listOfWIofEntities)) {
          /*
          GenericFeatVect.addNewFeatureInList( new String[]{"bothEntDependOnImmediateGovernorOfTheDISO"}, 1,
          			listFeatIndsOfCurInp, listFeatCountOfCurInp, 1);
          //*/
          GenericFeatVect.addNewFeatureInList(
              new String[] {
                "immediateGovernorOfTheDISO="
                    + curSen.arrLemmasByParser[wiOfImmediateGovernor].toLowerCase()
              },
              1,
              listFeatIndsOfCurInp,
              listFeatCountOfCurInp,
              1);
          //	*/
        }
        // *
        if (wiOfImmediateGovernor == wiOfNearestVerbGovernor)
          GenericFeatVect.addNewFeatureInList(
              new String[] {"immediateGovernorIsVerbGovernorOfTheDISO"},
              1,
              listFeatIndsOfCurInp,
              listFeatCountOfCurInp,
              1);
        //	*/
      }
    }
  }
Пример #3
0
  /**
   * @param curSen
   * @param listFeatIndsOfCurInp
   * @param listFeatCountOfCurInp
   * @param e1
   * @param e2
   * @param negWI
   * @param listOfReducedGraph
   */
  private void extractNegationScopeFeatures(
      Sentence curSen,
      ArrayList<Integer> listFeatIndsOfCurInp,
      ArrayList<Integer> listFeatCountOfCurInp,
      Entity e1,
      Entity e2,
      int negWI,
      ArrayList<Integer> listOfReducedGraph) {

    int wiOfImmediateGovernor = getNonConjGovernorIndex(curSen, negWI);

    if (!listOfReducedGraph.contains(wiOfImmediateGovernor) || wiOfImmediateGovernor < 0) return;

    int wiOfNearestVerbGovernor = getNearestVerbGovernor(wiOfImmediateGovernor, curSen);

    //	int dist = 0;

    // *
    if (!listOfReducedGraph.contains(wiOfNearestVerbGovernor)
    // ||
    // dist > 3
    ) wiOfNearestVerbGovernor = -1;
    /*
    if ( wiOfNearestVerbGovernor > -1 &&
    		curSen.depTree.allNodesByWordIndex[wiOfNearestVerbGovernor].getParentsWordIndexes().isEmpty()
    	)
    	GenericFeatVect.addNewFeatureInList( new String[]{"verbGovernorIsRoot"}, 1,
    			listFeatIndsOfCurInp, listFeatCountOfCurInp, 1);
    */

    ArrayList<Integer> listOfWIofEntities = new ArrayList<Integer>();
    listOfWIofEntities.addAll(e1.getAllWordIndexes());
    listOfWIofEntities.addAll(e2.getAllWordIndexes());

    /*
    // detect whether drugs are dependent of the verb governor
    if ( wiOfNearestVerbGovernor > -1
    		&& curSen.depTree.allNodesByWordIndex[wiOfNearestVerbGovernor].governAllWIsInList(listOfWIofEntities) )
    	GenericFeatVect.addNewFeatureInList( new String[]{"bothEntDependOnVerbGovernor"}, 1,
    			listFeatIndsOfCurInp, listFeatCountOfCurInp, 1);
    */
    if (wiOfImmediateGovernor > -1
        && curSen.depTree.allNodesByWordIndex[wiOfImmediateGovernor].governAllWIsInList(
            listOfWIofEntities))
      GenericFeatVect.addNewFeatureInList(
          new String[] {"bothEntDependOnImmediateGovernor"},
          1,
          listFeatIndsOfCurInp,
          listFeatCountOfCurInp,
          1);

    if (wiOfImmediateGovernor == wiOfNearestVerbGovernor)
      GenericFeatVect.addNewFeatureInList(
          new String[] {"immediateGovernorIsVerbGovernor"},
          1,
          listFeatIndsOfCurInp,
          listFeatCountOfCurInp,
          1);
    /*
    GenericFeatVect.addNewFeatureInList( new String[]{"immediateGovernor="
    			+ curSen.arrLemmasByParser[wiOfImmediateGovernor].toLowerCase()}, 1,
    			listFeatIndsOfCurInp, listFeatCountOfCurInp, 1);
    //*/
    if (wiOfNearestVerbGovernor > -1)
      GenericFeatVect.addNewFeatureInList(
          new String[] {
            "nearestVerbGovernor=" + curSen.arrLemmasByParser[wiOfNearestVerbGovernor].toLowerCase()
          },
          1,
          listFeatIndsOfCurInp,
          listFeatCountOfCurInp,
          1);
    //		*/
  }
Пример #4
0
  /**
   * @param isSimplifyEntity
   * @param tokenWithPos
   * @param senID
   * @param sentence
   * @param listEnt
   * @param listRel
   * @param listDependencies
   * @param medtType
   * @param isResolveOverlappingEntities
   * @param relToBeConsidered
   * @param arrClauseBoundOfSen
   * @return
   * @throws IOException
   */
  public boolean setInpVectFromDepGraphOfPairsAndTrigger(
      Relation objRel,
      Sentence objCurSen,
      int medtType,
      boolean discardDepRelUsingProbabilityInReducedGraph,
      boolean useWalkFeatures,
      boolean useRegExPatterns,
      boolean useDepPatterns,
      boolean useTriggers,
      boolean triggersFromWholeRGinsteadOfLCP,
      boolean useNegativeCues,
      Entity e1,
      Entity e2)
      throws IOException {

    ArrayList<Integer> listFeatIndsOfCurInp = new ArrayList<Integer>(),
        listFeatCountOfCurInp = new ArrayList<Integer>();

    DepTreeNode
        headOfEnt1 =
            objCurSen.depTree.getHeadWordFromWordBoundaries(
                e1.getAllWordIndexes(), true, objCurSen),
        headOfEnt2 =
            objCurSen.depTree.getHeadWordFromWordBoundaries(
                e2.getAllWordIndexes(), true, objCurSen);

    DepTreeNode dn = null;

    //	System.out.println(e1.id + "  " + e2.id);

    // All nodes in the shortest path connecting the target pairs must be retained
    // All nodes satisfying the 3 rules of MEDT kernel must be retained
    dn =
        objCurSen
            .depTree
            .clone()
            .findMinimalSubTreeWithEntities(false, headOfEnt1, medtType, headOfEnt2);

    /*
     *  If there is no minimal subtree/path between target entities then we do not consider to generate
     *  instance for training/testing.
     */
    if (dn == null) {
      FileUtility.writeInFile(
          GenericFeatVect.vectOutFile,
          e1.id + " " + e1.name + "\t" + e2.id + " " + e2.name + "\n\n",
          true);

      FileUtility.writeInFile(GenericFeatVect.vectOutFile, objCurSen.text + "\n\n", true);

      return false;
    }

    GenericFeatVect.listOfAllInstances.add(
        e1.id + " " + e1.name + "\t" + e2.id + " " + e2.name + "\n\n" + objCurSen.text + "\n\n");

    // Construct feature set using e-walks and v-walks
    if (dn != null && useWalkFeatures) {

      createNgramFeatures(objCurSen.depTree, 1, e1, listFeatIndsOfCurInp, listFeatCountOfCurInp);
      createNgramFeatures(objCurSen.depTree, 1, e2, listFeatIndsOfCurInp, listFeatCountOfCurInp);

      // System.out.println(objCurSen.senID + "  " + dn.wordIndex);
      objCurSen.depTree.replaceEntitiesWithDummies(
          dn, e1.boundaries, e2.boundaries, new ArrayList<Integer>());
      createFeaturesFromInputGraph(
          objCurSen.depTree,
          dn,
          "",
          listFeatIndsOfCurInp,
          listFeatCountOfCurInp,
          new ArrayList<Integer>(),
          e1.boundaries,
          e2.boundaries);
    }

    if (useRegExPatterns) {
      matchPPIpatternOnSentence(
          objCurSen.text, e1.name, e2.name, listFeatIndsOfCurInp, listFeatCountOfCurInp);
      matchPPIpatternOnSentence(
          objCurSen.text, e2.name, e1.name, listFeatIndsOfCurInp, listFeatCountOfCurInp);
    }

    ArrayList<String> listOfDepRelsInReducedGraph = new ArrayList<String>();

    ArrayList<ArrayList<Integer>> listNodesAndLCP =
        new PatternsDepRelFromGraph()
            .extractDepRelsInReducedGraph(
                objRel,
                objCurSen,
                listOfDepRelsInReducedGraph,
                discardDepRelUsingProbabilityInReducedGraph,
                false);

    if (useDepPatterns)
      extractDepPatternFeatures(
          listOfDepRelsInReducedGraph, listFeatIndsOfCurInp, listFeatCountOfCurInp);

    //  NOTE: listNodesAndLCP has two elements - (0) all the nodes in Reduced graph, and (1) the
    // least common parents (LCPs) in Reduced graph.

    if (listNodesAndLCP == null) listNodesAndLCP = new ArrayList<ArrayList<Integer>>();

    if (useTriggers && listNodesAndLCP.size() > 0) {
      if (triggersFromWholeRGinsteadOfLCP)
        for (int i = 0; i < listNodesAndLCP.get(0).size(); i++)
          addTriggerWordFeatures(
              objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].word,
              listFeatIndsOfCurInp,
              listFeatCountOfCurInp);
      else
        for (int i = 0; i < listNodesAndLCP.get(1).size(); i++)
          addTriggerWordFeatures(
              objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(1).get(i)].word,
              listFeatIndsOfCurInp,
              listFeatCountOfCurInp);
    }
    // *
    if (useNegativeCues && listNodesAndLCP.size() > 0) {

      for (int i = 0; i < listNodesAndLCP.get(0).size(); i++) {
        // System.out.println(objCurSen.senID + "  " + dn.wordIndex);
        // addNegativeWordFeatures( graph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].word,
        // listFeatIndsOfCurInp, listFeatCountOfCurInp);

        if (Triggers.listOfNegativeWords.contains(
            objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].lemma)) {

          // add a feature indicating that there is a negative word in the reduced graph
          String[] feature =
              new String[] {
                "HasNegWord@$"
                    + objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].lemma,
              };

          GenericFeatVect.addNewFeatureInList(
              feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, 1);

          if (objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i) + 1].lemma
                  .equalsIgnoreCase("be")
              || objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i) + 1].pos
                  .equalsIgnoreCase("IN")) {
            feature =
                new String[] {
                  "WordNextToNegCue@$"
                      + objCurSen
                          .depGraph
                          .allNodesByWordIndex[listNodesAndLCP.get(0).get(i) + 2]
                          .lemma,
                };
          } else {
            feature =
                new String[] {
                  "WordNextToNegCue@$"
                      + objCurSen
                          .depGraph
                          .allNodesByWordIndex[listNodesAndLCP.get(0).get(i) + 1]
                          .lemma,
                };
          }

          GenericFeatVect.addNewFeatureInList(
              feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, 1);

          // *
          // extract negation scope features
          if (objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].lemma.matches(
              "(no|not)"))
            extractNegationScopeFeatures(
                objCurSen,
                listFeatIndsOfCurInp,
                listFeatCountOfCurInp,
                e1,
                e2,
                listNodesAndLCP.get(0).get(i),
                listNodesAndLCP.get(0));
          // */

        }
      }
    }
    // */

    // add Zhou et al. 2005 features
    new ExtAceFeatVect()
        .getZhouEtAl2005FeatVal(objCurSen, e1, e2, listFeatIndsOfCurInp, listFeatCountOfCurInp);

    extractNonTargetEntityFeatures(objCurSen, listFeatIndsOfCurInp, listFeatCountOfCurInp, e1, e2);

    GenericFeatVect.sortFeatValByIndx(listFeatIndsOfCurInp, listFeatCountOfCurInp);

    GenericFeatVect.listOfAllInstancesWithFeat.add(listFeatIndsOfCurInp);
    GenericFeatVect.listOfAllInstancesWithFeatCount.add(listFeatCountOfCurInp);

    return true;
  }