Exemple #1
0
  public static void main(String[] args) throws Exception {
    // Parse command line flags and arguments
    Map<String, String> argMap = CommandLineUtils.simpleCommandLineParser(args);

    // Set up default parameters and settings
    String basePath = ".";
    boolean verbose = false;

    // Update defaults using command line specifications

    // The path to the assignment data
    if (argMap.containsKey("-path")) {
      basePath = argMap.get("-path");
    }
    System.out.println("Using base path: " + basePath);

    // Whether or not to print the individual errors.
    if (argMap.containsKey("-verbose")) {
      verbose = true;
    }

    // Read in data
    System.out.print("Loading training sentences...");
    List<TaggedSentence> trainTaggedSentences =
        readTaggedSentences(basePath + "/en-wsj-train.pos", true);
    Set<String> trainingVocabulary = extractVocabulary(trainTaggedSentences);
    System.out.println("done.");
    System.out.print("Loading in-domain dev sentences...");
    List<TaggedSentence> devInTaggedSentences =
        readTaggedSentences(basePath + "/en-wsj-dev.pos", true);
    System.out.println("done.");
    System.out.print("Loading out-of-domain dev sentences...");
    List<TaggedSentence> devOutTaggedSentences =
        readTaggedSentences(basePath + "/en-web-weblogs-dev.pos", true);
    System.out.println("done.");
    System.out.print("Loading out-of-domain blind test sentences...");
    List<TaggedSentence> testSentences =
        readTaggedSentences(basePath + "/en-web-test.blind", false);
    System.out.println("done.");

    // Construct tagger components
    // TODO : improve on the MostFrequentTagScorer
    LocalTrigramScorer localTrigramScorer = new MostFrequentTagScorer(false);
    // TODO : improve on the GreedyDecoder
    TrellisDecoder<State> trellisDecoder = new GreedyDecoder<State>();

    // Train tagger
    POSTagger posTagger = new POSTagger(localTrigramScorer, trellisDecoder);
    posTagger.train(trainTaggedSentences);

    // Optionally tune hyperparameters on dev data
    posTagger.validate(devInTaggedSentences);

    // Test tagger
    System.out.println("Evaluating on in-domain data:.");
    evaluateTagger(posTagger, devInTaggedSentences, trainingVocabulary, verbose);
    System.out.println("Evaluating on out-of-domain data:.");
    evaluateTagger(posTagger, devOutTaggedSentences, trainingVocabulary, verbose);
    labelTestSet(posTagger, testSentences, basePath + "/en-web-test.tagged");
  }