예제 #1
0
  public static void main(String[] args) {

    // set up default options ..............................................
    Map<String, String> options = new HashMap<String, String>();
    options.put("--path", "../data/parser/");
    options.put("--data", "masc");
    options.put("--parser", "nlpclass.assignments.PCFGParserTester$BaselineParser");
    options.put("--maxLength", "20");

    // let command-line options supersede defaults .........................
    options.putAll(CommandLineUtils.simpleCommandLineParser(args));
    System.out.println("PCFGParserTester options:");
    for (Map.Entry<String, String> entry : options.entrySet()) {
      System.out.printf("  %-12s: %s%n", entry.getKey(), entry.getValue());
    }
    System.out.println();

    MAX_LENGTH = Integer.parseInt(options.get("--maxLength"));

    Parser parser;
    try {
      Class parserClass = Class.forName(options.get("--parser"));
      parser = (Parser) parserClass.newInstance();
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
    System.out.println("Using parser: " + parser);

    String basePath = options.get("--path");
    String preBasePath = basePath;
    String dataSet = options.get("--data");
    if (!basePath.endsWith("/")) {
      basePath += "/";
    }
    // basePath += dataSet;
    System.out.println("Data will be loaded from: " + basePath + "\n");

    List<Tree<String>> trainTrees = new ArrayList<Tree<String>>();
    List<Tree<String>> validationTrees = new ArrayList<Tree<String>>();
    List<Tree<String>> testTrees = new ArrayList<Tree<String>>();

    if (dataSet.equals("miniTest")) {
      // training data: first 3 of 4 datums
      basePath += "parser/" + dataSet;
      System.out.println("Loading training trees...");
      trainTrees = readTrees(basePath, 1, 3);
      System.out.println("done.");

      // test data: last of 4 datums
      System.out.println("Loading test trees...");
      testTrees = readTrees(basePath, 4, 4);
      System.out.println("done.");
    } else if (dataSet.equals("masc")) {
      basePath += "parser/";
      // training data: MASC train
      System.out.println("Loading MASC training trees... from: " + basePath + "masc/train");
      trainTrees.addAll(readMASCTrees(basePath + "masc/train", 0, 38));
      System.out.println("done.");
      System.out.println("Train trees size: " + trainTrees.size());

      System.out.println("First train tree: " + Trees.PennTreeRenderer.render(trainTrees.get(0)));
      System.out.println(
          "Last train tree: "
              + Trees.PennTreeRenderer.render(trainTrees.get(trainTrees.size() - 1)));

      // test data: MASC devtest
      System.out.println("Loading MASC test trees...");
      testTrees.addAll(readMASCTrees(basePath + "masc/devtest", 0, 11));
      // testTrees.addAll(readMASCTrees(basePath+"masc/blindtest", 0, 8));
      System.out.println("Test trees size: " + testTrees.size());
      System.out.println("done.");

      System.out.println("First test tree: " + Trees.PennTreeRenderer.render(testTrees.get(0)));
      System.out.println(
          "Last test tree: " + Trees.PennTreeRenderer.render(testTrees.get(testTrees.size() - 1)));
    } else if (!dataSet.equals("miniTest") && !dataSet.equals("masc")) {
      throw new RuntimeException("Bad data set: " + dataSet + ": use miniTest or masc.");
    }

    System.out.println("\nTraining parser...");
    parser.train(trainTrees);

    System.out.println("\nTesting parser...");
    testParser(parser, testTrees);
  }
예제 #2
0
  public static void main(String[] args) {

    // set up default options ..............................................
    Map<String, String> options = new HashMap<String, String>();
    options.put("-path", "/afs/ir/class/cs224n/pa2/data/");
    options.put("-data", "miniTest");
    options.put("-parser", "cs224n.assignments.PCFGParserTester$BaselineParser");
    options.put("-maxLength", "20");

    // let command-line options supersede defaults .........................
    options.putAll(CommandLineUtils.simpleCommandLineParser(args));
    System.out.println("PCFGParserTester options:");
    for (Map.Entry<String, String> entry : options.entrySet()) {
      System.out.printf("  %-12s: %s%n", entry.getKey(), entry.getValue());
    }
    System.out.println();

    MAX_LENGTH = Integer.parseInt(options.get("-maxLength"));

    Parser parser;
    try {
      Class parserClass = Class.forName(options.get("-parser"));
      parser = (Parser) parserClass.newInstance();
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
    System.out.println("Using parser: " + parser);

    String basePath = options.get("-path");
    String dataSet = options.get("-data");
    if (!basePath.endsWith("/")) basePath += "/";
    // basePath += dataSet;
    System.out.println("Data will be loaded from: " + basePath + "\n");

    List<Tree<String>> trainTrees = new ArrayList<Tree<String>>(),
        validationTrees = new ArrayList<Tree<String>>(),
        testTrees = new ArrayList<Tree<String>>();

    if (!basePath.endsWith("/")) basePath += "/";
    basePath += dataSet;
    if (dataSet.equals("miniTest")) {
      System.out.print("Loading training trees...");
      trainTrees = readTrees(basePath, 1, 3);
      System.out.println("done.");
      System.out.print("Loading test trees...");
      testTrees = readTrees(basePath, 4, 4);
      System.out.println("done.");
    } else if (dataSet.equals("treebank")) {
      System.out.print("Loading training trees...");
      trainTrees = readTrees(basePath, 200, 2199);
      System.out.println("done.");
      System.out.print("Loading validation trees...");
      validationTrees = readTrees(basePath, 2200, 2202);
      System.out.println("done.");
      System.out.print("Loading test trees...");
      testTrees = readTrees(basePath, 2300, 2319); // 2301);
      System.out.println("done.");
    } else {
      throw new RuntimeException("Bad data set mode: " + dataSet + ", use miniTest, or treebank.");
    }
    parser.train(trainTrees);
    testParser(parser, testTrees);
  }