public static void main(String[] args) throws Exception { // Parse command line flags and arguments Map<String, String> argMap = CommandLineUtils.simpleCommandLineParser(args); // Set up default parameters and settings String basePath = "."; boolean verbose = false; // Update defaults using command line specifications // The path to the assignment data if (argMap.containsKey("-path")) { basePath = argMap.get("-path"); } System.out.println("Using base path: " + basePath); // Whether or not to print the individual errors. if (argMap.containsKey("-verbose")) { verbose = true; } // Read in data System.out.print("Loading training sentences..."); List<TaggedSentence> trainTaggedSentences = readTaggedSentences(basePath + "/en-wsj-train.pos", true); Set<String> trainingVocabulary = extractVocabulary(trainTaggedSentences); System.out.println("done."); System.out.print("Loading in-domain dev sentences..."); List<TaggedSentence> devInTaggedSentences = readTaggedSentences(basePath + "/en-wsj-dev.pos", true); System.out.println("done."); System.out.print("Loading out-of-domain dev sentences..."); List<TaggedSentence> devOutTaggedSentences = readTaggedSentences(basePath + "/en-web-weblogs-dev.pos", true); System.out.println("done."); System.out.print("Loading out-of-domain blind test sentences..."); List<TaggedSentence> testSentences = readTaggedSentences(basePath + "/en-web-test.blind", false); System.out.println("done."); // Construct tagger components // TODO : improve on the MostFrequentTagScorer LocalTrigramScorer localTrigramScorer = new MostFrequentTagScorer(false); // TODO : improve on the GreedyDecoder TrellisDecoder<State> trellisDecoder = new GreedyDecoder<State>(); // Train tagger POSTagger posTagger = new POSTagger(localTrigramScorer, trellisDecoder); posTagger.train(trainTaggedSentences); // Optionally tune hyperparameters on dev data posTagger.validate(devInTaggedSentences); // Test tagger System.out.println("Evaluating on in-domain data:."); evaluateTagger(posTagger, devInTaggedSentences, trainingVocabulary, verbose); System.out.println("Evaluating on out-of-domain data:."); evaluateTagger(posTagger, devOutTaggedSentences, trainingVocabulary, verbose); labelTestSet(posTagger, testSentences, basePath + "/en-web-test.tagged"); }