Exemplo n.º 1
0
  private static Evaluation simpleParse(Parameters parameters, String model) throws IOException {

    DepTreebank tb;
    if (parameters.xconll) {
      tb = readXConllTreebank(parameters.input, -1);
    } else {
      tb = readTreebank(parameters.input);
    }
    ArcStandardTransitionBasedParserModel tbm = getModel(parameters, model);
    TransitionBasedSystem<DepTree> parser = new PerceptronTransitionBasedSystem<DepTree>(tbm);
    SimpleEvaluation eval = new SimpleEvaluation();
    SentenceProcessComposition spc = new SentenceProcessComposition();

    spc.add(TreebankProcesses.greedyParse(parser));

    spc.add(TreebankIO.saveInXConll("tmp.conll"));

    if (parameters.fixedMweOnly) {
      spc.add(TreebankProcesses.removeRegularMWEs()); // in case treebank contains regular MWEs
    }

    spc.add(TreebankProcesses.unlabelMWEArcs());

    spc.add(TreebankProcesses.mergeFixedMWEs());

    spc.add(TreebankProcesses.mergeRegularMWEs());

    spc.add(TreebankEvaluations.computeSegmentationAccuracy(false));
    spc.add(TreebankEvaluations.computeSegmentationAccuracy(true));
    // spc.add(TreebankEvaluations.computeSegmentationParsingScore());

    spc.add(TreebankProcesses.unmergeFixedMWE());
    // spc.add(TreebankIO.saveInXConll("unmerge.conll"));

    if (parameters.output != null) {
      spc.add(TreebankIO.saveInXConll(parameters.output));
    }

    spc.add(TreebankEvaluations.computeParsingAccuracy());

    TreebankProcesses.processTreebank(tb, spc, eval);

    return eval;
  }
Exemplo n.º 2
0
  private static void train(Parameters parameters) throws IOException {
    System.err.println("--------- New model  -----");
    // ArcStandardTransitionBasedParserModel tbm = null;

    DepTreebank tb;
    if (parameters.xconll) {
      tb =
          readXConllTreebank(
              parameters.train,
              parameters
                  .trainSize); // treebank in conll format; MWEs with specific arc labels (MWE_LABEL
      // for fixed MWEs; REG_MWE for regular MWEs)
    } else {
      tb = readTreebank(parameters.train, parameters.trainSize);
    }
    SentenceProcessComposition spc = new SentenceProcessComposition();
    spc.add(TreebankProcesses.copyGold());
    spc.add(TreebankIO.saveInXConll("tmpx.conll"));
    if (!parameters.fixedMweOnly) {
      // trainFullSystem(parameters.train, parameters.model, parameters.iters, -1, true);
      if (parameters.baseline) {
        if (parameters.xconll) {
          spc.add(TreebankProcesses.unmergeRegularMWE());
        }
        spc.add(TreebankIO.saveInXConll("tmp.conll"));
      } else {
        // spc.add(TreebankIO.saveInXConll("tmp.conll"));
        spc.add(TreebankProcesses.mergeFixedMWEs());
        spc.add(TreebankProcesses.mergeRegularMWEs());
        spc.add(TreebankProcesses.binarizeMWE(false));
        spc.add(TreebankIO.saveInXConll("tmp.conll"));
      }

    } else {
      spc.add(TreebankProcesses.removeRegularMWEs()); // in case treebank contains regular MWEs
      // spc.add(TreebankProcesses.removeMwePosInLabels());  //put an option in case one wants to
      // keep mwe pos label

      if (parameters.baseline) {
        spc.add(TreebankIO.saveInXConll("tmp.conll"));

      } else {
        spc.add(TreebankProcesses.mergeFixedMWEs());
        spc.add(TreebankProcesses.binarizeMWE(false));
        spc.add(TreebankIO.saveInXConll("tmp.conll"));

        // trainWithMerge(parameters.train, parameters.model, parameters.iters, -1);
      }
    }

    TreebankProcesses.staticOracleTrain(tb, spc, parameters);
  }