public static void main(String[] args) throws Exception { // Parse command line flags and arguments Map<String, String> argMap = CommandLineUtils.simpleCommandLineParser(args); // Set up default parameters and settings String basePath = "."; boolean verbose = false; // Update defaults using command line specifications // The path to the assignment data if (argMap.containsKey("-path")) { basePath = argMap.get("-path"); } System.out.println("Using base path: " + basePath); // Whether or not to print the individual errors. if (argMap.containsKey("-verbose")) { verbose = true; } // Read in data System.out.print("Loading training sentences..."); List<TaggedSentence> trainTaggedSentences = readTaggedSentences(basePath + "/en-wsj-train.pos", true); Set<String> trainingVocabulary = extractVocabulary(trainTaggedSentences); System.out.println("done."); System.out.print("Loading in-domain dev sentences..."); List<TaggedSentence> devInTaggedSentences = readTaggedSentences(basePath + "/en-wsj-dev.pos", true); System.out.println("done."); System.out.print("Loading out-of-domain dev sentences..."); List<TaggedSentence> devOutTaggedSentences = readTaggedSentences(basePath + "/en-web-weblogs-dev.pos", true); System.out.println("done."); System.out.print("Loading out-of-domain blind test sentences..."); List<TaggedSentence> testSentences = readTaggedSentences(basePath + "/en-web-test.blind", false); System.out.println("done."); // Construct tagger components // TODO : improve on the MostFrequentTagScorer LocalTrigramScorer localTrigramScorer = new MostFrequentTagScorer(false); // TODO : improve on the GreedyDecoder TrellisDecoder<State> trellisDecoder = new GreedyDecoder<State>(); // Train tagger POSTagger posTagger = new POSTagger(localTrigramScorer, trellisDecoder); posTagger.train(trainTaggedSentences); // Optionally tune hyperparameters on dev data posTagger.validate(devInTaggedSentences); // Test tagger System.out.println("Evaluating on in-domain data:."); evaluateTagger(posTagger, devInTaggedSentences, trainingVocabulary, verbose); System.out.println("Evaluating on out-of-domain data:."); evaluateTagger(posTagger, devOutTaggedSentences, trainingVocabulary, verbose); labelTestSet(posTagger, testSentences, basePath + "/en-web-test.tagged"); }
/** * @param unusedLocale the wanted locale (actually unused). * @throws MavenReportException if any */ protected void executeReport(Locale unusedLocale) throws MavenReportException { File config = buildConfigurationFile(); Commandline cli = new Commandline(); cli.setWorkingDirectory(getBasedir().getAbsolutePath()); cli.setExecutable(getExecutablePath()); cli.createArgument().setValue(config.getAbsolutePath()); Writer stringWriter = new StringWriter(); StreamConsumer out = new WriterStreamConsumer(stringWriter); StreamConsumer err = new WriterStreamConsumer(stringWriter); try { int returnCode = CommandLineUtils.executeCommandLine(cli, out, err); if (!isQuiet()) { // Get all output from doxygen and put it to the log out of Maven. String[] lines = stringWriter.toString().split("\n"); for (int i = 0; i < lines.length; i++) { lines[i] = lines[i].replaceAll("\n|\r", ""); getLog().info("doxygen: " + lines[i]); } } if (returnCode != 0) { throw new MavenReportException("Failed to generate Doxygen documentation."); } } catch (CommandLineException ex) { throw new MavenReportException("Error while executing Doxygen.", ex); } }
public static void main(String[] args) { // set up default options .............................................. Map<String, String> options = new HashMap<String, String>(); options.put("--path", "../data/parser/"); options.put("--data", "masc"); options.put("--parser", "nlpclass.assignments.PCFGParserTester$BaselineParser"); options.put("--maxLength", "20"); // let command-line options supersede defaults ......................... options.putAll(CommandLineUtils.simpleCommandLineParser(args)); System.out.println("PCFGParserTester options:"); for (Map.Entry<String, String> entry : options.entrySet()) { System.out.printf(" %-12s: %s%n", entry.getKey(), entry.getValue()); } System.out.println(); MAX_LENGTH = Integer.parseInt(options.get("--maxLength")); Parser parser; try { Class parserClass = Class.forName(options.get("--parser")); parser = (Parser) parserClass.newInstance(); } catch (Exception e) { throw new RuntimeException(e); } System.out.println("Using parser: " + parser); String basePath = options.get("--path"); String preBasePath = basePath; String dataSet = options.get("--data"); if (!basePath.endsWith("/")) { basePath += "/"; } // basePath += dataSet; System.out.println("Data will be loaded from: " + basePath + "\n"); List<Tree<String>> trainTrees = new ArrayList<Tree<String>>(); List<Tree<String>> validationTrees = new ArrayList<Tree<String>>(); List<Tree<String>> testTrees = new ArrayList<Tree<String>>(); if (dataSet.equals("miniTest")) { // training data: first 3 of 4 datums basePath += "parser/" + dataSet; System.out.println("Loading training trees..."); trainTrees = readTrees(basePath, 1, 3); System.out.println("done."); // test data: last of 4 datums System.out.println("Loading test trees..."); testTrees = readTrees(basePath, 4, 4); System.out.println("done."); } else if (dataSet.equals("masc")) { basePath += "parser/"; // training data: MASC train System.out.println("Loading MASC training trees... from: " + basePath + "masc/train"); trainTrees.addAll(readMASCTrees(basePath + "masc/train", 0, 38)); System.out.println("done."); System.out.println("Train trees size: " + trainTrees.size()); System.out.println("First train tree: " + Trees.PennTreeRenderer.render(trainTrees.get(0))); System.out.println( "Last train tree: " + Trees.PennTreeRenderer.render(trainTrees.get(trainTrees.size() - 1))); // test data: MASC devtest System.out.println("Loading MASC test trees..."); testTrees.addAll(readMASCTrees(basePath + "masc/devtest", 0, 11)); // testTrees.addAll(readMASCTrees(basePath+"masc/blindtest", 0, 8)); System.out.println("Test trees size: " + testTrees.size()); System.out.println("done."); System.out.println("First test tree: " + Trees.PennTreeRenderer.render(testTrees.get(0))); System.out.println( "Last test tree: " + Trees.PennTreeRenderer.render(testTrees.get(testTrees.size() - 1))); } else if (!dataSet.equals("miniTest") && !dataSet.equals("masc")) { throw new RuntimeException("Bad data set: " + dataSet + ": use miniTest or masc."); } System.out.println("\nTraining parser..."); parser.train(trainTrees); System.out.println("\nTesting parser..."); testParser(parser, testTrees); }
public static void main(String[] args) { // set up default options .............................................. Map<String, String> options = new HashMap<String, String>(); options.put("-path", "/afs/ir/class/cs224n/pa2/data/"); options.put("-data", "miniTest"); options.put("-parser", "cs224n.assignments.PCFGParserTester$BaselineParser"); options.put("-maxLength", "20"); // let command-line options supersede defaults ......................... options.putAll(CommandLineUtils.simpleCommandLineParser(args)); System.out.println("PCFGParserTester options:"); for (Map.Entry<String, String> entry : options.entrySet()) { System.out.printf(" %-12s: %s%n", entry.getKey(), entry.getValue()); } System.out.println(); MAX_LENGTH = Integer.parseInt(options.get("-maxLength")); Parser parser; try { Class parserClass = Class.forName(options.get("-parser")); parser = (Parser) parserClass.newInstance(); } catch (Exception e) { throw new RuntimeException(e); } System.out.println("Using parser: " + parser); String basePath = options.get("-path"); String dataSet = options.get("-data"); if (!basePath.endsWith("/")) basePath += "/"; // basePath += dataSet; System.out.println("Data will be loaded from: " + basePath + "\n"); List<Tree<String>> trainTrees = new ArrayList<Tree<String>>(), validationTrees = new ArrayList<Tree<String>>(), testTrees = new ArrayList<Tree<String>>(); if (!basePath.endsWith("/")) basePath += "/"; basePath += dataSet; if (dataSet.equals("miniTest")) { System.out.print("Loading training trees..."); trainTrees = readTrees(basePath, 1, 3); System.out.println("done."); System.out.print("Loading test trees..."); testTrees = readTrees(basePath, 4, 4); System.out.println("done."); } else if (dataSet.equals("treebank")) { System.out.print("Loading training trees..."); trainTrees = readTrees(basePath, 200, 2199); System.out.println("done."); System.out.print("Loading validation trees..."); validationTrees = readTrees(basePath, 2200, 2202); System.out.println("done."); System.out.print("Loading test trees..."); testTrees = readTrees(basePath, 2300, 2319); // 2301); System.out.println("done."); } else { throw new RuntimeException("Bad data set mode: " + dataSet + ", use miniTest, or treebank."); } parser.train(trainTrees); testParser(parser, testTrees); }
public static void main(String[] args) throws IOException { // Parse command line flags and arguments Map<String,String> argMap = CommandLineUtils.simpleCommandLineParser(args); // Set up default parameters and settings String basePath = "."; int maxTrainingSentences = 0; int maxIterations = 20; boolean verbose = false; boolean initialize = false; String dataset = "mini"; String model = "baseline"; // Update defaults using command line specifications if (argMap.containsKey("-path")) { basePath = argMap.get("-path"); System.out.println("Using base path: "+basePath); } if (argMap.containsKey("-sentences")) { maxTrainingSentences = Integer.parseInt(argMap.get("-sentences")); System.out.println("Using an additional "+maxTrainingSentences+" training sentences."); } if (argMap.containsKey("-data")) { dataset = argMap.get("-data"); System.out.println("Running with data: "+dataset); } else { System.out.println("No data set specified. Use -data [miniTest, validate]."); } if (argMap.containsKey("-model")) { model = argMap.get("-model"); System.out.println("Running with model: "+model); } else { System.out.println("No model specified. Use -model modelname."); } if (argMap.containsKey("-verbose")) { verbose = true; } if (argMap.containsKey("-iterations")) { maxIterations = Integer.parseInt(argMap.get("-iterations")); } if (argMap.containsKey("-initialize")) { initialize = true; } // Read appropriate training and testing sets. List<SentencePair> trainingSentencePairs = new ArrayList<SentencePair>(); if (! (dataset.equals("miniTest") || dataset.equals("mini")) && maxTrainingSentences > 0) trainingSentencePairs = readSentencePairs(basePath+"/training", maxTrainingSentences); List<SentencePair> testSentencePairs = new ArrayList<SentencePair>(); Map<Integer,Alignment> testAlignments = new HashMap<Integer, Alignment>(); if (dataset.equalsIgnoreCase("validate")) { testSentencePairs = readSentencePairs(basePath+"/trial", Integer.MAX_VALUE); testAlignments = readAlignments(basePath+"/trial/trial.wa"); } else if (dataset.equals("miniTest") || dataset.equals("mini")) { testSentencePairs = readSentencePairs(basePath+"/mini", Integer.MAX_VALUE); testAlignments = readAlignments(basePath+"/mini/mini.wa"); } else { throw new RuntimeException("Bad data set mode: "+ dataset+", use validate or miniTest."); } trainingSentencePairs.addAll(testSentencePairs); // Build model WordAligner wordAligner = null; if (model.equalsIgnoreCase("baseline")) { wordAligner = new BaselineWordAligner(); } // TODO : build other alignment models else if (model.equalsIgnoreCase("heuristic")) { wordAligner = new HeuristicWordAligner(trainingSentencePairs); } else if (model.equalsIgnoreCase("dice")) { wordAligner = new DiceWordAligner(trainingSentencePairs); } else if (model.equalsIgnoreCase("ibm1") || model.equalsIgnoreCase("ibmModel1")) { wordAligner = new IBMmodel1WordAligner(trainingSentencePairs, maxIterations, initialize); } else if (model.equalsIgnoreCase("ibm2") || model.equalsIgnoreCase("ibmModel2")) { wordAligner = new IBMmodel2WordAligner(trainingSentencePairs, maxIterations, initialize); } // Test model test(wordAligner, testSentencePairs, testAlignments, verbose); // Generate file for submission //can comment out if not ready for submission testSentencePairs = readSentencePairs(basePath+"/test", Integer.MAX_VALUE); predict(wordAligner, testSentencePairs, basePath+"/"+model+".out"); }