Пример #1
0
  private NameFinderME loadCustomModel(String loadModelFile) {
    InputStream is = null;
    NameFinderME finderME = null;
    try {
      is = new FileInputStream(ModelTypes.BIN_FILE_BASE_LOCATION + loadModelFile + ".bin");
      TokenNameFinderModel model = new TokenNameFinderModel(is);
      finderME = new NameFinderME(model);

    } catch (InvalidFormatException ife) {
      ife.printStackTrace();
    } catch (IOException ioe) {
      ioe.printStackTrace();
    } finally {
      try {
        is.close();
      } catch (IOException e) {
        e.printStackTrace();
      }
    }
    return finderME;
  }
  public void run(String format, String[] args) {
    super.run(format, args);

    mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
    if (mlParams != null && !TrainerFactory.isValid(mlParams.getSettings())) {
      throw new TerminateToolException(
          1, "Training parameters file '" + params.getParams() + "' is invalid!");
    }

    if (mlParams == null) {
      mlParams = ModelUtil.createDefaultTrainingParameters();
      mlParams.put(TrainingParameters.ALGORITHM_PARAM, getModelType(params.getType()).toString());
    }

    File modelOutFile = params.getModel();
    CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile);

    Dictionary ngramDict = null;

    Integer ngramCutoff = params.getNgram();

    if (ngramCutoff != null) {
      System.err.print("Building ngram dictionary ... ");
      try {
        ngramDict = POSTaggerME.buildNGramDictionary(sampleStream, ngramCutoff);
        sampleStream.reset();
      } catch (IOException e) {
        throw new TerminateToolException(
            -1, "IO error while building NGram Dictionary: " + e.getMessage(), e);
      }
      System.err.println("done");
    }

    POSTaggerFactory postaggerFactory = null;
    try {
      postaggerFactory = POSTaggerFactory.create(params.getFactory(), ngramDict, null);
    } catch (InvalidFormatException e) {
      throw new TerminateToolException(-1, e.getMessage(), e);
    }

    if (params.getDict() != null) {
      try {
        postaggerFactory.setTagDictionary(postaggerFactory.createTagDictionary(params.getDict()));
      } catch (IOException e) {
        throw new TerminateToolException(
            -1, "IO error while loading POS Dictionary: " + e.getMessage(), e);
      }
    }

    if (params.getTagDictCutoff() != null) {
      try {
        TagDictionary dict = postaggerFactory.getTagDictionary();
        if (dict == null) {
          dict = postaggerFactory.createEmptyTagDictionary();
          postaggerFactory.setTagDictionary(dict);
        }
        if (dict instanceof MutableTagDictionary) {
          POSTaggerME.populatePOSDictionary(
              sampleStream, (MutableTagDictionary) dict, params.getTagDictCutoff());
        } else {
          throw new IllegalArgumentException(
              "Can't extend a POSDictionary that does not implement MutableTagDictionary.");
        }
        sampleStream.reset();
      } catch (IOException e) {
        throw new TerminateToolException(
            -1, "IO error while creating/extending POS Dictionary: " + e.getMessage(), e);
      }
    }

    POSModel model;
    try {
      model =
          opennlp.tools.postag.POSTaggerME.train(
              params.getLang(), sampleStream, mlParams, postaggerFactory);
    } catch (IOException e) {
      throw new TerminateToolException(
          -1, "IO error while reading training data or indexing data: " + e.getMessage(), e);
    } finally {
      try {
        sampleStream.close();
      } catch (IOException e) {
        // sorry that this can fail
      }
    }

    CmdLineUtil.writeModel("pos tagger", modelOutFile, model);
  }