コード例 #1
0
  List<Util.Pair<Integer, Integer>> getCandidates(
      Document document, int arg2Line, int connStart, int connEnd, int arg1Line) {
    List<Util.Pair<Integer, Integer>> candidates = new ArrayList<Util.Pair<Integer, Integer>>();

    int distance = 10;
    Sentence arg2Sentence = document.getSentence(arg2Line);
    // String conn = arg2Sentence.toString(connStart, connEnd).toLowerCase();
    // String category = connAnalyzer.getCategory(conn);
    int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd);
    SimpleDepGraph arg2DepGraph = document.getDepGraph(arg2Line);
    List<Integer> reachable = arg2DepGraph.getReachableIndices(connHeadPos, false, distance);
    for (Integer i : reachable) {
      if (arg2Sentence.get(i).getTag("POS").matches("VB.*|NNS?|JJ.*|MD")) {
        candidates.add(new Util.Pair<Integer, Integer>(arg2Line, i));
      }
    }
    Tree mainHead = headAnalyzer.getCollinsHead(arg2Sentence.getParseTree().getChild(0));
    if (mainHead != null) {
      int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Sentence.getParseTree(), mainHead);
      List<String> pathAsList = arg2DepGraph.getPathAsList(connHeadPos, mainHeadPos, false);
      if (pathAsList != null) {
        distance = distance - (1 + pathAsList.size());
      } else {
        // System.out.println("No path from connHead to mainHead!");
        distance--;
      }
    }
    // if (arg1Line == arg2Line) return candidates;

    for (int i = arg2Line - 1; i >= 0 && distance >= 0; i--) {
      Sentence sentence = document.getSentence(i);
      SimpleDepGraph depGraph = document.getDepGraph(i);
      mainHead = headAnalyzer.getCollinsHead(sentence.getParseTree().getChild(0));
      if (mainHead == null) {
        distance--;
        continue;
      }
      int mainHeadPos = treeAnalyzer.getLeafPosition(sentence.getParseTree(), mainHead);
      reachable = depGraph.getReachableIndices(mainHeadPos, false, distance);
      if (reachable == null) {
        distance--;
        continue;
      }
      for (Integer j : reachable) {
        if (sentence.get(j).getTag("POS").matches("VB.*|NNS?|JJ.*|MD")) {
          candidates.add(new Util.Pair<Integer, Integer>(i, j));
        }
      }
      distance -= 2;
    }
    return candidates;
  }
コード例 #2
0
  private PropertyList addDependencyFeatures(
      PropertyList pl,
      Document doc,
      Pair<Integer, Integer> candidate,
      int arg2Line,
      int arg2HeadPos,
      int connStart,
      int connEnd) {
    Sentence arg2Sentence = doc.getSentence(arg2Line);
    String conn = arg2Sentence.toString(connStart, connEnd);
    String category = connAnalyzer.getCategory(conn.toLowerCase());
    int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd);

    int arg1Line = candidate.first();
    Tree arg1Tree = doc.getTree(arg1Line);
    int arg1HeadPos = candidate.second();

    List<String> path = new ArrayList<String>();
    if (arg1Line == arg2Line) {
      SimpleDepGraph depGraph = doc.getDepGraph(arg1Line);
      List<String> tmpPath = depGraph.getPathAsList(connHeadPos, arg1HeadPos, false);
      if (tmpPath != null) {
        path.addAll(tmpPath);
      } else {
        path.add("null");
      }
    } else {
      Tree arg2Root = arg2Sentence.getParseTree();
      Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0));
      int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead);
      if (mainHeadPos != -1) {
        SimpleDepGraph depGraph = doc.getDepGraph(arg2Line);
        List<String> tmpPath = depGraph.getPathAsList(connHeadPos, mainHeadPos, false);
        if (tmpPath != null) {
          path.addAll(tmpPath);
        } else {
          path.add("null");
        }
      }
      for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) {
        path.add("SENT");
      }

      Tree arg1Root = arg1Tree;
      mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0));
      mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead);
      if (mainHeadPos != -1) {
        SimpleDepGraph depGraph = doc.getDepGraph(arg1Line);
        List<String> tmpPath = depGraph.getPathAsList(mainHeadPos, arg1HeadPos, false);
        if (tmpPath != null) {
          path.addAll(tmpPath);
        } else {
          path.add("null");
        }
      }
    }

    StringBuilder sbPath = new StringBuilder();
    StringBuilder sbPathWithoutCC = new StringBuilder();
    StringBuilder sbPathWithoutRep = new StringBuilder();

    String prev = "";
    for (String node : path) {
      sbPath.append(node).append(":");
      if (!node.matches("cc|-cc")) {
        sbPathWithoutCC.append(node).append(":");
      }
      if (!node.equals(prev)) {
        sbPathWithoutRep.append(node).append(":");
      }
      prev = node;
    }
    // M-dependency path
    pl = PropertyList.add("M=" + sbPath.toString(), 1.0, pl);
    // Q-M&C
    pl = PropertyList.add("Q=" + "CONN-" + conn + '&' + "M-" + sbPath.toString(), 1.0, pl);
    // T-M&R
    pl = PropertyList.add("T=" + "CAT-" + category + '&' + "M-" + sbPath.toString(), 1.0, pl);
    // O-collapsed path without cc
    pl = PropertyList.add("O=" + sbPathWithoutCC.toString(), 1.0, pl);
    // P-collapsed path without repetition
    pl = PropertyList.add("P=" + sbPathWithoutRep.toString(), 1.0, pl);

    return pl;
  }
コード例 #3
0
  private PropertyList addConstituentFeatures(
      PropertyList pl,
      Document doc,
      Pair<Integer, Integer> candidate,
      int arg2Line,
      int arg2HeadPos,
      int connStart,
      int connEnd) {
    Sentence arg2Sentence = doc.getSentence(arg2Line);
    String conn = arg2Sentence.toString(connStart, connEnd);
    int connHeadPos = connAnalyzer.getHeadWord(arg2Sentence.getParseTree(), connStart, connEnd);

    int arg1Line = candidate.first();
    Tree arg1Tree = doc.getTree(arg1Line);
    int arg1HeadPos = candidate.second();

    List<String> path = new ArrayList<String>();
    List<String> pathWithoutPOS = new ArrayList<String>();

    if (arg1Line == arg2Line) {
      Tree root = arg1Tree;
      List<Tree> leaves = root.getLeaves();
      List<Tree> treePath = root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(arg1HeadPos));
      if (treePath != null) {
        for (Tree t : treePath) {
          if (!t.isLeaf()) {
            path.add(t.value());
            if (!t.isPreTerminal()) {
              pathWithoutPOS.add(t.value());
            }
          }
        }
      }
    } else {
      Tree arg2Root = arg2Sentence.getParseTree();
      Tree mainHead = headAnalyzer.getCollinsHead(arg2Root.getChild(0));
      List<Tree> leaves = arg2Root.getLeaves();
      int mainHeadPos = treeAnalyzer.getLeafPosition(arg2Root, mainHead);
      if (mainHeadPos != -1) {
        List<Tree> treePath =
            arg2Root.pathNodeToNode(leaves.get(connHeadPos), leaves.get(mainHeadPos));
        if (treePath != null) {
          for (Tree t : treePath) {
            if (!t.isLeaf()) {
              path.add(t.value());
              if (!t.isPreTerminal()) {
                pathWithoutPOS.add(t.value());
              }
            }
          }
        }
      }
      for (int i = 0; i < Math.abs(arg1Line - arg2Line); i++) {
        path.add("SENT");
        pathWithoutPOS.add("SENT");
      }
      Tree arg1Root = arg1Tree;
      mainHead = headAnalyzer.getCollinsHead(arg1Root.getChild(0));
      leaves = arg1Root.getLeaves();
      mainHeadPos = treeAnalyzer.getLeafPosition(arg1Root, mainHead);
      if (mainHeadPos != -1) {
        List<Tree> treePath =
            arg1Root.pathNodeToNode(leaves.get(mainHeadPos), leaves.get(arg1HeadPos));
        if (treePath != null) {
          for (Tree t : treePath) {
            if (!t.isLeaf()) {
              path.add(t.value());
              if (!t.isPreTerminal()) {
                pathWithoutPOS.add(t.value());
              }
            }
          }
        }
      }
    }
    // H-full path
    // L-C&H
    StringBuilder fullPath = new StringBuilder();
    for (String node : path) {
      fullPath.append(node).append(":");
    }
    pl = PropertyList.add("H=" + fullPath.toString(), 1.0, pl);
    pl = PropertyList.add("L=CONN-" + conn + "&" + "H-" + fullPath.toString(), 1.0, pl);

    // I-length of path
    pl = PropertyList.add("I=" + path.size(), 1.0, pl);

    // J-collapsed path without part of speech
    // K-collapsed path without repititions
    fullPath = new StringBuilder();
    StringBuilder collapsedPath = new StringBuilder();
    String prev = "";
    for (String node : pathWithoutPOS) {
      fullPath.append(node).append(":");
      if (!node.equals(prev)) {
        collapsedPath.append(node).append(":");
      }
      prev = node;
    }
    pl = PropertyList.add("J=" + fullPath.toString(), 1.0, pl);
    pl = PropertyList.add("K=" + collapsedPath.toString(), 1.0, pl);

    return pl;
  }
コード例 #4
0
  private PropertyList addBaselineFeatures(
      PropertyList pl,
      Document doc,
      Pair<Integer, Integer> candidate,
      int arg2Line,
      int arg2HeadPos,
      int connStart,
      int connEnd) {
    Sentence arg2Sentence = doc.getSentence(arg2Line);
    int arg1Line = candidate.first();
    Sentence arg1Sentence = doc.getSentence(arg1Line);
    int arg1HeadPos = candidate.second();
    String conn = arg2Sentence.toString(connStart, connEnd);

    // R-connective type
    String category = connAnalyzer.getCategory(conn.toLowerCase());
    pl = PropertyList.add("R=" + category, 1.0, pl);

    // A-position of the connective
    String position = "Medial";
    if (connStart < 4) position = "Initial";
    else if (connEnd >= (arg1Sentence.size() - 3)) position = "Terminal";
    pl = PropertyList.add("A=" + position, 1.0, pl);

    // S-A & R
    pl = PropertyList.add("S=" + position + "&" + category, 1.0, pl);

    // C-connective phrase
    pl = PropertyList.add("C=" + conn, 1.0, pl);

    // D-downcase conn phrase
    pl = PropertyList.add("D=" + conn.toLowerCase(), 1.0, pl);

    // E-argument head word
    pl = PropertyList.add("E=" + arg1Sentence.get(arg1HeadPos).word(), 1.0, pl);

    // B-same sentence or not
    pl = PropertyList.add("B=" + (arg1Line == arg2Line), 1.0, pl);
    // G-A&B
    pl = PropertyList.add("A=" + position + "&" + "B=" + (arg1Line == arg2Line), 1.0, pl);

    // F-arg1 head prior or after conn
    if (arg1Line < arg2Line || arg1HeadPos < connStart) {
      pl = PropertyList.add("F=<", 1.0, pl);
    } else {
      pl = PropertyList.add("F=>", 1.0, pl);
    }

    // if (1 < 2) return pl;
    // Z1-relative position of arg1-conn-arg2
    String z = null;
    if (arg1Line < arg2Line) {
      if (arg2HeadPos < connStart) z = "ARG1-ARG2-CONN";
      else z = "ARG1-CONN-ARG2";
    } else if (arg1HeadPos < connStart) {
      if (arg2HeadPos < arg1HeadPos) z = "ARG2-ARG1-CONN";
      else if (arg2HeadPos < connStart) z = "ARG1-ARG2-CONN";
      else z = "ARG1-CONN-ARG2";
    } else if (arg2HeadPos < connStart) {
      z = "ARG2-CONN-ARG1";
    } else if (arg2HeadPos < arg1HeadPos) {
      z = "CONN-ARG2-ARG1";
    } else {
      z = "CONN-ARG1-ARG2";
    }
    pl = PropertyList.add("Z=" + z, 1.0, pl);

    // Z2-Conn&Z1
    pl = PropertyList.add("CONN=" + conn + "&" + "Z=" + z, 1.0, pl);

    return pl;
  }
コード例 #5
0
  private void addInstancesThroughPipe(
      PDTBRelation relation,
      Document document,
      int arg1Line,
      int arg2Line,
      InstanceList instanceList) {
    // System.out.println("Relation: " + relation.toString());
    // System.out.println("arg1Line: " + arg1Line);
    // System.out.println("arg2Line: " + arg2Line);

    String connectiveGornAddress = relation.getConnectiveGornAddress();
    Tree arg2Tree = document.getTree(arg2Line);
    List<Tree> connHeadLeaves =
        connAnalyzer.getConnHeadLeaves(arg2Tree, connectiveGornAddress, relation.getConnHead());
    if (connHeadLeaves.isEmpty()) return;

    int connStart = treeAnalyzer.getLeafPosition(arg2Tree, connHeadLeaves.get(0));
    int connEnd =
        treeAnalyzer.getLeafPosition(arg2Tree, connHeadLeaves.get(connHeadLeaves.size() - 1));
    if ((connEnd - connStart) > 4) { // handle if..else, etc.
      connEnd = connStart;
    }

    // consider only the first sentence in case of multi-line argument1
    String arg1GornAddress = relation.getArg1GornAddress();
    Tree arg1Tree = document.getTree(arg1Line);
    List<Tree> arg1GornNodes = getArgGornNodes(arg1Tree, arg1Line, arg1GornAddress);

    Tree syntacticHead = headAnalyzer.getSyntacticHead(arg1Tree, arg1GornNodes);
    int arg1HeadPos = treeAnalyzer.getLeafPosition(arg1Tree, syntacticHead);

    String arg2GornAddress = relation.getArg2GornAddress();
    List<Tree> arg2GornNodes = getArgGornNodes(arg2Tree, arg2Line, arg2GornAddress);

    Tree arg2SyntacticHead = headAnalyzer.getSyntacticHead(arg2Tree, arg2GornNodes);
    int arg2HeadPos = treeAnalyzer.getLeafPosition(arg2Tree, arg2SyntacticHead);

    if (arg2HeadPos == -1) {
      System.out.println("arg2Head == -1");
      return;
    }
    if (arg1HeadPos == -1) {
      System.out.println("arg1Head == -1");
      return;
    }
    int trueCandidate = -1;
    List<Pair<Integer, Integer>> candidates =
        getCandidates(document, arg2Line, connStart, connEnd, arg1Line);
    for (int i = 0; i < candidates.size(); i++) {
      Pair<Integer, Integer> candidate = candidates.get(i);
      if (candidate.first() == arg1Line && candidate.second() == arg1HeadPos) {
        trueCandidate = i;
        break;
      }
    }
    if (trueCandidate == -1) {
      // trueCandidate = candidates.size();
      // candidates.add(new Pair<Integer, Integer>(arg1Line, arg1HeadPos));
      // System.out.println("Covered!");
      System.out.println("true candidate == -1!!!");
      System.out.println(syntacticHead.value());
    } else {
      int extractArg2 =
          ARG2_EXTRACTOR.extractArg2(
              document.getSentence(arg2Line),
              document.getTree(arg2Line),
              document.getDepGraph(arg2Line),
              connStart,
              connEnd);
      if (extractArg2 == -1) {
        extractArg2 = 0;
        System.out.println("Arg2 == -1!!!!!!!!!!!!!!!!!");
      }
      // Arg1RankInstance instance = new Arg1RankInstance(document, candidates, arg2Line,
      // extractArg2, connStart, connEnd, trueCandidate);

      Arg1RankInstance instance =
          new Arg1RankInstance(
              document, candidates, arg2Line, arg2HeadPos, connStart, connEnd, trueCandidate);
      instanceList.addThruPipe(instance);
    }
  }
コード例 #6
0
  /**
   * Shows accuracy according to Ben Wellner's definition of accuracy
   *
   * @param classifier
   * @param instanceList
   */
  private void showAccuracy(Classifier classifier, InstanceList instanceList) throws IOException {
    int total = instanceList.size();
    int correct = 0;
    HashMap<String, Integer> errorMap = new HashMap<String, Integer>();
    FileWriter errorWriter = new FileWriter("arg1Error.log");

    for (Instance instance : instanceList) {
      Classification classification = classifier.classify(instance);
      if (classification.bestLabelIsCorrect()) {
        correct++;
      } else {
        Arg1RankInstance rankInstance = (Arg1RankInstance) instance;
        Document doc = rankInstance.getDocument();
        Sentence s = doc.getSentence(rankInstance.getArg2Line());
        String conn =
            s.toString(rankInstance.getConnStart(), rankInstance.getConnEnd()).toLowerCase();
        // String category = connAnalyzer.getCategory(conn);
        if (errorMap.containsKey(conn)) {
          errorMap.put(conn, errorMap.get(conn) + 1);
        } else {
          errorMap.put(conn, 1);
        }
        int arg2Line = rankInstance.getArg2Line();
        int arg1Line =
            rankInstance.getCandidates().get(rankInstance.getTrueArg1Candidate()).first();
        int arg1HeadPos =
            rankInstance.getCandidates().get(rankInstance.getTrueArg1Candidate()).second();
        int predictedCandidateIndex =
            Integer.parseInt(classification.getLabeling().getBestLabel().toString());

        if (arg1Line == arg2Line) {
          errorWriter.write("FileName: " + doc.getFileName() + "\n");
          errorWriter.write("Sentential\n");
          errorWriter.write("Conn: " + conn + "\n");
          errorWriter.write("Arg1Head: " + s.get(arg1HeadPos).word() + "\n");
          errorWriter.write(s.toString() + "\n\n");
        } else {
          errorWriter.write("FileName: " + doc.getFileName() + "\n");
          errorWriter.write("Inter-Sentential\n");
          errorWriter.write("Arg1 in : " + arg1Line + "\n");
          errorWriter.write("Arg2 in : " + arg2Line + "\n");
          errorWriter.write("Conn: " + conn + "\n");
          errorWriter.write(s.toString() + "\n");
          Sentence s1 = doc.getSentence(arg1Line);
          errorWriter.write("Arg1Head: " + s1.get(arg1HeadPos) + "\n");
          errorWriter.write(s1.toString() + "\n\n");
        }
        int predictedArg1Line = rankInstance.getCandidates().get(predictedCandidateIndex).first();
        int predictedArg1HeadPos =
            rankInstance.getCandidates().get(predictedCandidateIndex).second();
        Sentence pSentence = doc.getSentence(predictedArg1Line);
        errorWriter.write(
            "Predicted arg1 sentence: "
                + pSentence.toString()
                + " [Correct: "
                + (predictedArg1Line == arg1Line)
                + "]\n");
        errorWriter.write("Predicted head: " + pSentence.get(predictedArg1HeadPos).word() + "\n\n");
      }
    }
    errorWriter.close();

    Set<Entry<String, Integer>> entrySet = errorMap.entrySet();
    List<Entry<String, Integer>> list = new ArrayList<Entry<String, Integer>>(entrySet);
    Collections.sort(
        list,
        new Comparator<Entry<String, Integer>>() {

          @Override
          public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
            if (o1.getValue() > o2.getValue()) return -1;
            else if (o1.getValue() < o2.getValue()) return 1;
            return 0;
          }
        });

    for (Entry<String, Integer> item : list) {
      System.out.println(item.getKey() + "-" + item.getValue());
    }

    System.out.println("Total: " + total);
    System.out.println("Correct: " + correct);
    System.out.println("Accuracy: " + (1.0 * correct) / total);
  }