public static void reconstruct(
      String input, String output, boolean isDirectory, boolean excludeCommon) {
    List<DEPTree> trees;
    PrintWriter writer;
    Pair<List<AbstractMention>, CoreferantSet> resolution;

    /* Coref Configuration */
    SieveSystemCongiuration config = new SieveSystemCongiuration(TLanguage.ENGLISH);
    config.loadMentionDetectors(true, true, true);
    config.loadDefaultSieves(true, true, true, true, true, true, true, true);
    AbstractCoreferenceResolution coref = new SieveSystemCoreferenceResolution(config);
    /* ************* */

    if (isDirectory) {
      List<String> l_filePaths = FileUtils.getFileList(input, ".cnlp", true);
      for (String filePath : l_filePaths) {
        trees = CoreferenceTestUtil.getTestDocuments(filePath, 9);
        resolution = coref.getEntities(trees);

        writer =
            new PrintWriter(
                IOUtils.createBufferedPrintStream(
                    output + FileUtils.getBaseName(filePath) + ".reconstructed"));
        writer.println(reconstruct(trees, resolution.o1, resolution.o2, excludeCommon));
        writer.close();
      }
    } else {
      trees = CoreferenceTestUtil.getTestDocuments(input, 9);
      resolution = coref.getEntities(trees);

      writer = new PrintWriter(IOUtils.createBufferedPrintStream(output + ".reconstructed"));
      writer.println(reconstruct(trees, resolution.o1, resolution.o2, excludeCommon));
      writer.close();
    }
  }
 public static void reconstruct(
     List<DEPTree> trees,
     List<AbstractMention> mentions,
     CoreferantSet links,
     String output,
     boolean excludeCommon) {
   PrintWriter writer =
       new PrintWriter(IOUtils.createBufferedPrintStream(output + ".reconstructed"));
   writer.println(reconstruct(trees, mentions, links, excludeCommon));
   writer.close();
 }
예제 #3
0
  @Test
  public void testLinks() {
    String filename = "src/test/resources/constituent/links.parse";
    CTReader reader = new CTReader(IOUtils.createFileInputStream(filename));
    CTTree tree;

    PBLocation[] antes = {
      new PBLocation(3, 1),
      new PBLocation(9, 0),
      new PBLocation(6, 1),
      new PBLocation(33, 1),
      new PBLocation(2, 1),
      new PBLocation(6, 4),
      new PBLocation(8, 1),
      new PBLocation(11, 1),
      new PBLocation(9, 1),
      new PBLocation(22, 1),
      new PBLocation(6, 1),
      new PBLocation(0, 1),
      new PBLocation(8, 1),
      new PBLocation(6, 1),
      new PBLocation(10, 1)
    };

    PBLocation[] nulls = {
      new PBLocation(7, 0),
      new PBLocation(11, 0),
      new PBLocation(11, 0),
      new PBLocation(36, 0),
      new PBLocation(5, 0),
      new PBLocation(1, 0),
      new PBLocation(20, 0),
      new PBLocation(13, 0),
      new PBLocation(11, 0),
      new PBLocation(23, 0),
      new PBLocation(8, 0),
      new PBLocation(3, 0),
      new PBLocation(11, 0),
      new PBLocation(9, 0),
      new PBLocation(14, 0)
    };
    int i, size = antes.length;

    for (i = 0; i < size; i++) {
      tree = reader.nextTree();
      CTLibEn.preprocess(tree);
      assertEquals(tree.getNode(antes[i]), tree.getNode(nulls[i]).getAntecedent());
    }

    reader.close();
  }
예제 #4
0
  @Test
  public void testFixFunctionTags() {
    String filename = "src/test/resources/constituent/functionTags.parse";
    CTReader reader = new CTReader(IOUtils.createFileInputStream(filename));
    CTTree tree;
    String[] parses = {
      "(TOP (S (NP-SBJ (NP (CC both) (NNP Bush) (CC and) (NNP Rice))) (VP (VBP have) (VP (VBN delivered) (NP (NP (NNS speeches)) (, ,) (SBAR (WHNP-1 (WDT which)) (S (NP-SBJ (-NONE- *T*-1)) (VP (VBP are) (ADJP-PRD (RB very) (JJ clear))))))))))",
      "(TOP (S (NP-SBJ-1 (NNP Mr.) (NNP Clinton)) (VP (VBD was) (VP (VBN joined) (NP (-NONE- *-1)) (PP-LGS (IN by) (NP (JJ several) (JJ key) (NN republican) (NNS leaders))))) (. .)))",
      "(TOP (SBARQ (WHNP-1 (WP Who)) (SQ (VBZ is) (NP-SBJ (PRP it)) (NP-PRD (-NONE- *T*-1)) (SBAR-CLF (WHNP-2 (WDT that)) (S (NP-SBJ-3 (-NONE- *T*-2)) (NP-TMP (NN today)) (VP (VBZ wants) (S (NP-SBJ (-NONE- *PRO*-3)) (VP (TO to) (VP (VB blow) (NP (NNS things)) (PRT (RP up)) (PP-LOC (IN in) (NP (NNP Lebanon))))))))) (, ,) (NP-VOC (NNP Doctor))) (. ?)))"
    };
    int i, size = parses.length;

    for (i = 0; i < size; i++) {
      tree = reader.nextTree();
      CTLibEn.fixFunctionTags(tree);
      assertEquals(parses[i], tree.toStringLine());
    }

    reader.close();
  }
예제 #5
0
 public DTHtml() {
   init(IOUtils.getInputStreamsFromClasspath(PathTokenizer.HTML_TAGS));
 }