示例#1
0
  public static void main(String[] args) throws Exception {
    // Parse command line flags and arguments
    Map<String, String> argMap = CommandLineUtils.simpleCommandLineParser(args);

    // Set up default parameters and settings
    String basePath = ".";
    boolean verbose = false;

    // Update defaults using command line specifications

    // The path to the assignment data
    if (argMap.containsKey("-path")) {
      basePath = argMap.get("-path");
    }
    System.out.println("Using base path: " + basePath);

    // Whether or not to print the individual errors.
    if (argMap.containsKey("-verbose")) {
      verbose = true;
    }

    // Read in data
    System.out.print("Loading training sentences...");
    List<TaggedSentence> trainTaggedSentences =
        readTaggedSentences(basePath + "/en-wsj-train.pos", true);
    Set<String> trainingVocabulary = extractVocabulary(trainTaggedSentences);
    System.out.println("done.");
    System.out.print("Loading in-domain dev sentences...");
    List<TaggedSentence> devInTaggedSentences =
        readTaggedSentences(basePath + "/en-wsj-dev.pos", true);
    System.out.println("done.");
    System.out.print("Loading out-of-domain dev sentences...");
    List<TaggedSentence> devOutTaggedSentences =
        readTaggedSentences(basePath + "/en-web-weblogs-dev.pos", true);
    System.out.println("done.");
    System.out.print("Loading out-of-domain blind test sentences...");
    List<TaggedSentence> testSentences =
        readTaggedSentences(basePath + "/en-web-test.blind", false);
    System.out.println("done.");

    // Construct tagger components
    // TODO : improve on the MostFrequentTagScorer
    LocalTrigramScorer localTrigramScorer = new MostFrequentTagScorer(false);
    // TODO : improve on the GreedyDecoder
    TrellisDecoder<State> trellisDecoder = new GreedyDecoder<State>();

    // Train tagger
    POSTagger posTagger = new POSTagger(localTrigramScorer, trellisDecoder);
    posTagger.train(trainTaggedSentences);

    // Optionally tune hyperparameters on dev data
    posTagger.validate(devInTaggedSentences);

    // Test tagger
    System.out.println("Evaluating on in-domain data:.");
    evaluateTagger(posTagger, devInTaggedSentences, trainingVocabulary, verbose);
    System.out.println("Evaluating on out-of-domain data:.");
    evaluateTagger(posTagger, devOutTaggedSentences, trainingVocabulary, verbose);
    labelTestSet(posTagger, testSentences, basePath + "/en-web-test.tagged");
  }
  /**
   * @param unusedLocale the wanted locale (actually unused).
   * @throws MavenReportException if any
   */
  protected void executeReport(Locale unusedLocale) throws MavenReportException {

    File config = buildConfigurationFile();

    Commandline cli = new Commandline();
    cli.setWorkingDirectory(getBasedir().getAbsolutePath());
    cli.setExecutable(getExecutablePath());
    cli.createArgument().setValue(config.getAbsolutePath());

    Writer stringWriter = new StringWriter();
    StreamConsumer out = new WriterStreamConsumer(stringWriter);
    StreamConsumer err = new WriterStreamConsumer(stringWriter);

    try {
      int returnCode = CommandLineUtils.executeCommandLine(cli, out, err);

      if (!isQuiet()) {
        // Get all output from doxygen and put it to the log out of Maven.
        String[] lines = stringWriter.toString().split("\n");
        for (int i = 0; i < lines.length; i++) {
          lines[i] = lines[i].replaceAll("\n|\r", "");
          getLog().info("doxygen: " + lines[i]);
        }
      }

      if (returnCode != 0) {
        throw new MavenReportException("Failed to generate Doxygen documentation.");
      }

    } catch (CommandLineException ex) {
      throw new MavenReportException("Error while executing Doxygen.", ex);
    }
  }
  public static void main(String[] args) {

    // set up default options ..............................................
    Map<String, String> options = new HashMap<String, String>();
    options.put("--path", "../data/parser/");
    options.put("--data", "masc");
    options.put("--parser", "nlpclass.assignments.PCFGParserTester$BaselineParser");
    options.put("--maxLength", "20");

    // let command-line options supersede defaults .........................
    options.putAll(CommandLineUtils.simpleCommandLineParser(args));
    System.out.println("PCFGParserTester options:");
    for (Map.Entry<String, String> entry : options.entrySet()) {
      System.out.printf("  %-12s: %s%n", entry.getKey(), entry.getValue());
    }
    System.out.println();

    MAX_LENGTH = Integer.parseInt(options.get("--maxLength"));

    Parser parser;
    try {
      Class parserClass = Class.forName(options.get("--parser"));
      parser = (Parser) parserClass.newInstance();
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
    System.out.println("Using parser: " + parser);

    String basePath = options.get("--path");
    String preBasePath = basePath;
    String dataSet = options.get("--data");
    if (!basePath.endsWith("/")) {
      basePath += "/";
    }
    // basePath += dataSet;
    System.out.println("Data will be loaded from: " + basePath + "\n");

    List<Tree<String>> trainTrees = new ArrayList<Tree<String>>();
    List<Tree<String>> validationTrees = new ArrayList<Tree<String>>();
    List<Tree<String>> testTrees = new ArrayList<Tree<String>>();

    if (dataSet.equals("miniTest")) {
      // training data: first 3 of 4 datums
      basePath += "parser/" + dataSet;
      System.out.println("Loading training trees...");
      trainTrees = readTrees(basePath, 1, 3);
      System.out.println("done.");

      // test data: last of 4 datums
      System.out.println("Loading test trees...");
      testTrees = readTrees(basePath, 4, 4);
      System.out.println("done.");
    } else if (dataSet.equals("masc")) {
      basePath += "parser/";
      // training data: MASC train
      System.out.println("Loading MASC training trees... from: " + basePath + "masc/train");
      trainTrees.addAll(readMASCTrees(basePath + "masc/train", 0, 38));
      System.out.println("done.");
      System.out.println("Train trees size: " + trainTrees.size());

      System.out.println("First train tree: " + Trees.PennTreeRenderer.render(trainTrees.get(0)));
      System.out.println(
          "Last train tree: "
              + Trees.PennTreeRenderer.render(trainTrees.get(trainTrees.size() - 1)));

      // test data: MASC devtest
      System.out.println("Loading MASC test trees...");
      testTrees.addAll(readMASCTrees(basePath + "masc/devtest", 0, 11));
      // testTrees.addAll(readMASCTrees(basePath+"masc/blindtest", 0, 8));
      System.out.println("Test trees size: " + testTrees.size());
      System.out.println("done.");

      System.out.println("First test tree: " + Trees.PennTreeRenderer.render(testTrees.get(0)));
      System.out.println(
          "Last test tree: " + Trees.PennTreeRenderer.render(testTrees.get(testTrees.size() - 1)));
    } else if (!dataSet.equals("miniTest") && !dataSet.equals("masc")) {
      throw new RuntimeException("Bad data set: " + dataSet + ": use miniTest or masc.");
    }

    System.out.println("\nTraining parser...");
    parser.train(trainTrees);

    System.out.println("\nTesting parser...");
    testParser(parser, testTrees);
  }
示例#4
0
  public static void main(String[] args) {

    // set up default options ..............................................
    Map<String, String> options = new HashMap<String, String>();
    options.put("-path", "/afs/ir/class/cs224n/pa2/data/");
    options.put("-data", "miniTest");
    options.put("-parser", "cs224n.assignments.PCFGParserTester$BaselineParser");
    options.put("-maxLength", "20");

    // let command-line options supersede defaults .........................
    options.putAll(CommandLineUtils.simpleCommandLineParser(args));
    System.out.println("PCFGParserTester options:");
    for (Map.Entry<String, String> entry : options.entrySet()) {
      System.out.printf("  %-12s: %s%n", entry.getKey(), entry.getValue());
    }
    System.out.println();

    MAX_LENGTH = Integer.parseInt(options.get("-maxLength"));

    Parser parser;
    try {
      Class parserClass = Class.forName(options.get("-parser"));
      parser = (Parser) parserClass.newInstance();
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
    System.out.println("Using parser: " + parser);

    String basePath = options.get("-path");
    String dataSet = options.get("-data");
    if (!basePath.endsWith("/")) basePath += "/";
    // basePath += dataSet;
    System.out.println("Data will be loaded from: " + basePath + "\n");

    List<Tree<String>> trainTrees = new ArrayList<Tree<String>>(),
        validationTrees = new ArrayList<Tree<String>>(),
        testTrees = new ArrayList<Tree<String>>();

    if (!basePath.endsWith("/")) basePath += "/";
    basePath += dataSet;
    if (dataSet.equals("miniTest")) {
      System.out.print("Loading training trees...");
      trainTrees = readTrees(basePath, 1, 3);
      System.out.println("done.");
      System.out.print("Loading test trees...");
      testTrees = readTrees(basePath, 4, 4);
      System.out.println("done.");
    } else if (dataSet.equals("treebank")) {
      System.out.print("Loading training trees...");
      trainTrees = readTrees(basePath, 200, 2199);
      System.out.println("done.");
      System.out.print("Loading validation trees...");
      validationTrees = readTrees(basePath, 2200, 2202);
      System.out.println("done.");
      System.out.print("Loading test trees...");
      testTrees = readTrees(basePath, 2300, 2319); // 2301);
      System.out.println("done.");
    } else {
      throw new RuntimeException("Bad data set mode: " + dataSet + ", use miniTest, or treebank.");
    }
    parser.train(trainTrees);
    testParser(parser, testTrees);
  }
  public static void main(String[] args) throws IOException {
    // Parse command line flags and arguments
    Map<String,String> argMap = CommandLineUtils.simpleCommandLineParser(args);

    // Set up default parameters and settings
    String basePath = ".";
    int maxTrainingSentences = 0;
    int maxIterations = 20;
    boolean verbose = false;
    boolean initialize = false;
    String dataset = "mini";
    String model = "baseline";

    // Update defaults using command line specifications
    if (argMap.containsKey("-path")) {
      basePath = argMap.get("-path");
      System.out.println("Using base path: "+basePath);
    }
    if (argMap.containsKey("-sentences")) {
      maxTrainingSentences = Integer.parseInt(argMap.get("-sentences"));
      System.out.println("Using an additional "+maxTrainingSentences+" training sentences.");
    }
    if (argMap.containsKey("-data")) {
      dataset = argMap.get("-data");
      System.out.println("Running with data: "+dataset);
    } else {
      System.out.println("No data set specified.  Use -data [miniTest, validate].");
    }
    if (argMap.containsKey("-model")) {
      model = argMap.get("-model");
      System.out.println("Running with model: "+model);
    } else {
      System.out.println("No model specified.  Use -model modelname.");
    }
    if (argMap.containsKey("-verbose")) {
      verbose = true;
    }
    if (argMap.containsKey("-iterations")) {
    	maxIterations = Integer.parseInt(argMap.get("-iterations"));
    }
    if (argMap.containsKey("-initialize")) {
    	initialize = true;
    }

    // Read appropriate training and testing sets.
    List<SentencePair> trainingSentencePairs = new ArrayList<SentencePair>();
    if (! (dataset.equals("miniTest") || dataset.equals("mini")) && maxTrainingSentences > 0)
      trainingSentencePairs = readSentencePairs(basePath+"/training", maxTrainingSentences);
    List<SentencePair> testSentencePairs = new ArrayList<SentencePair>();
    Map<Integer,Alignment> testAlignments = new HashMap<Integer, Alignment>();
    if (dataset.equalsIgnoreCase("validate")) {
      testSentencePairs = readSentencePairs(basePath+"/trial", Integer.MAX_VALUE);
      testAlignments = readAlignments(basePath+"/trial/trial.wa");
    } else if (dataset.equals("miniTest") || dataset.equals("mini")) {
      testSentencePairs = readSentencePairs(basePath+"/mini", Integer.MAX_VALUE);
      testAlignments = readAlignments(basePath+"/mini/mini.wa");
    } else {
      throw new RuntimeException("Bad data set mode: "+ dataset+", use validate or miniTest.");
    }
    trainingSentencePairs.addAll(testSentencePairs);

    // Build model
    WordAligner wordAligner = null;
    if (model.equalsIgnoreCase("baseline")) {
      wordAligner = new BaselineWordAligner();
    }
    // TODO : build other alignment models
    else if (model.equalsIgnoreCase("heuristic")) {
    	wordAligner = new HeuristicWordAligner(trainingSentencePairs);
    }
    else if (model.equalsIgnoreCase("dice")) {
    	wordAligner = new DiceWordAligner(trainingSentencePairs);
    }
    else if (model.equalsIgnoreCase("ibm1") || model.equalsIgnoreCase("ibmModel1")) {
    	wordAligner = new IBMmodel1WordAligner(trainingSentencePairs, maxIterations, initialize);
    }
    else if (model.equalsIgnoreCase("ibm2") || model.equalsIgnoreCase("ibmModel2")) {
    	wordAligner = new IBMmodel2WordAligner(trainingSentencePairs, maxIterations, initialize);
    }

    // Test model
    test(wordAligner, testSentencePairs, testAlignments, verbose);
    
    // Generate file for submission //can comment out if not ready for submission
    testSentencePairs = readSentencePairs(basePath+"/test", Integer.MAX_VALUE);
    predict(wordAligner, testSentencePairs, basePath+"/"+model+".out");
  }