public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(params.getIterations(), params.getCutoff()); } File modelOutFile = params.getModel(); CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile); ChunkerModel model; try { ChunkerFactory chunkerFactory = ChunkerFactory.create(params.getFactory()); model = ChunkerME.train(params.getLang(), sampleStream, mlParams, chunkerFactory); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while reading training data or indexing data: " + e.getMessage(), e); } finally { try { sampleStream.close(); } catch (IOException e) { // sorry that this can fail } } CmdLineUtil.writeModel("chunker", modelOutFile, model); }
public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true); if (mlParams != null && !TrainerFactory.isValid(mlParams.getSettings())) { throw new TerminateToolException( 1, "Training parameters file '" + params.getParams() + "' is invalid!"); } if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); mlParams.put(TrainingParameters.ALGORITHM_PARAM, getModelType(params.getType()).toString()); } File modelOutFile = params.getModel(); CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile); Dictionary ngramDict = null; Integer ngramCutoff = params.getNgram(); if (ngramCutoff != null) { System.err.print("Building ngram dictionary ... "); try { ngramDict = POSTaggerME.buildNGramDictionary(sampleStream, ngramCutoff); sampleStream.reset(); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while building NGram Dictionary: " + e.getMessage(), e); } System.err.println("done"); } POSTaggerFactory postaggerFactory = null; try { postaggerFactory = POSTaggerFactory.create(params.getFactory(), ngramDict, null); } catch (InvalidFormatException e) { throw new TerminateToolException(-1, e.getMessage(), e); } if (params.getDict() != null) { try { postaggerFactory.setTagDictionary(postaggerFactory.createTagDictionary(params.getDict())); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while loading POS Dictionary: " + e.getMessage(), e); } } if (params.getTagDictCutoff() != null) { try { TagDictionary dict = postaggerFactory.getTagDictionary(); if (dict == null) { dict = postaggerFactory.createEmptyTagDictionary(); postaggerFactory.setTagDictionary(dict); } if (dict instanceof MutableTagDictionary) { POSTaggerME.populatePOSDictionary( sampleStream, (MutableTagDictionary) dict, params.getTagDictCutoff()); } else { throw new IllegalArgumentException( "Can't extend a POSDictionary that does not implement MutableTagDictionary."); } sampleStream.reset(); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while creating/extending POS Dictionary: " + e.getMessage(), e); } } POSModel model; try { model = opennlp.tools.postag.POSTaggerME.train( params.getLang(), sampleStream, mlParams, postaggerFactory); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while reading training data or indexing data: " + e.getMessage(), e); } finally { try { sampleStream.close(); } catch (IOException e) { // sorry that this can fail } } CmdLineUtil.writeModel("pos tagger", modelOutFile, model); }