/** * Load the parameters in the {@code TrainingParameters} file. * * @param paramFile the parameter file * @param supportSequenceTraining wheter sequence training is supported * @return the parameters */ private static TrainingParameters loadTrainingParameters( final String paramFile, final boolean supportSequenceTraining) { TrainingParameters params = null; if (paramFile != null) { checkInputFile("Training Parameter", new File(paramFile)); InputStream paramsIn = null; try { paramsIn = new FileInputStream(new File(paramFile)); params = new opennlp.tools.util.TrainingParameters(paramsIn); } catch (IOException e) { throw new TerminateToolException( -1, "Error during parameters loading: " + e.getMessage(), e); } finally { try { if (paramsIn != null) { paramsIn.close(); } } catch (IOException e) { System.err.println("Error closing the input stream"); } } if (!TrainerFactory.isValid(params.getSettings())) { throw new TerminateToolException( 1, "Training parameters file '" + paramFile + "' is invalid!"); } } return params; }
/** @deprecated Use {@link TrainerFactory#isValid(Map)} instead. */ public static boolean isValid(Map<String, String> trainParams) { return TrainerFactory.isValid(trainParams); }
public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true); if (mlParams != null && !TrainerFactory.isValid(mlParams.getSettings())) { throw new TerminateToolException( 1, "Training parameters file '" + params.getParams() + "' is invalid!"); } if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); mlParams.put(TrainingParameters.ALGORITHM_PARAM, getModelType(params.getType()).toString()); } File modelOutFile = params.getModel(); CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile); Dictionary ngramDict = null; Integer ngramCutoff = params.getNgram(); if (ngramCutoff != null) { System.err.print("Building ngram dictionary ... "); try { ngramDict = POSTaggerME.buildNGramDictionary(sampleStream, ngramCutoff); sampleStream.reset(); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while building NGram Dictionary: " + e.getMessage(), e); } System.err.println("done"); } POSTaggerFactory postaggerFactory = null; try { postaggerFactory = POSTaggerFactory.create(params.getFactory(), ngramDict, null); } catch (InvalidFormatException e) { throw new TerminateToolException(-1, e.getMessage(), e); } if (params.getDict() != null) { try { postaggerFactory.setTagDictionary(postaggerFactory.createTagDictionary(params.getDict())); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while loading POS Dictionary: " + e.getMessage(), e); } } if (params.getTagDictCutoff() != null) { try { TagDictionary dict = postaggerFactory.getTagDictionary(); if (dict == null) { dict = postaggerFactory.createEmptyTagDictionary(); postaggerFactory.setTagDictionary(dict); } if (dict instanceof MutableTagDictionary) { POSTaggerME.populatePOSDictionary( sampleStream, (MutableTagDictionary) dict, params.getTagDictCutoff()); } else { throw new IllegalArgumentException( "Can't extend a POSDictionary that does not implement MutableTagDictionary."); } sampleStream.reset(); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while creating/extending POS Dictionary: " + e.getMessage(), e); } } POSModel model; try { model = opennlp.tools.postag.POSTaggerME.train( params.getLang(), sampleStream, mlParams, postaggerFactory); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while reading training data or indexing data: " + e.getMessage(), e); } finally { try { sampleStream.close(); } catch (IOException e) { // sorry that this can fail } } CmdLineUtil.writeModel("pos tagger", modelOutFile, model); }