/** * Load the parameters in the {@code TrainingParameters} file. * * @param paramFile the parameter file * @param supportSequenceTraining wheter sequence training is supported * @return the parameters */ private static TrainingParameters loadTrainingParameters( final String paramFile, final boolean supportSequenceTraining) { TrainingParameters params = null; if (paramFile != null) { checkInputFile("Training Parameter", new File(paramFile)); InputStream paramsIn = null; try { paramsIn = new FileInputStream(new File(paramFile)); params = new opennlp.tools.util.TrainingParameters(paramsIn); } catch (IOException e) { throw new TerminateToolException( -1, "Error during parameters loading: " + e.getMessage(), e); } finally { try { if (paramsIn != null) { paramsIn.close(); } } catch (IOException e) { System.err.println("Error closing the input stream"); } } if (!TrainerFactory.isValid(params.getSettings())) { throw new TerminateToolException( 1, "Training parameters file '" + paramFile + "' is invalid!"); } } return params; }
public static POSModel train( String languageCode, ObjectStream<POSSample> samples, TrainingParameters trainParams, POSTaggerFactory posFactory) throws IOException { String beamSizeString = trainParams.getSettings().get(BeamSearch.BEAM_SIZE_PARAMETER); int beamSize = POSTaggerME.DEFAULT_BEAM_SIZE; if (beamSizeString != null) { beamSize = Integer.parseInt(beamSizeString); } POSContextGenerator contextGenerator = posFactory.getPOSContextGenerator(); Map<String, String> manifestInfoEntries = new HashMap<String, String>(); TrainerType trainerType = TrainerFactory.getTrainerType(trainParams.getSettings()); MaxentModel posModel = null; SequenceClassificationModel<String> seqPosModel = null; if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) { ObjectStream<Event> es = new POSSampleEventStream(samples, contextGenerator); EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams.getSettings(), manifestInfoEntries); posModel = trainer.train(es); } else if (TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) { POSSampleSequenceStream ss = new POSSampleSequenceStream(samples, contextGenerator); EventModelSequenceTrainer trainer = TrainerFactory.getEventModelSequenceTrainer( trainParams.getSettings(), manifestInfoEntries); posModel = trainer.train(ss); } else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) { SequenceTrainer trainer = TrainerFactory.getSequenceModelTrainer(trainParams.getSettings(), manifestInfoEntries); // TODO: This will probably cause issue, since the feature generator uses the outcomes array POSSampleSequenceStream ss = new POSSampleSequenceStream(samples, contextGenerator); seqPosModel = trainer.train(ss); } else { throw new IllegalArgumentException("Trainer type is not supported: " + trainerType); } if (posModel != null) { return new POSModel(languageCode, posModel, beamSize, manifestInfoEntries, posFactory); } else { return new POSModel(languageCode, seqPosModel, manifestInfoEntries, posFactory); } }
/** * @deprecated use {@link #train(String, ObjectStream, TrainingParameters, POSTaggerFactory)} * instead and pass in a {@link POSTaggerFactory} and a {@link TrainingParameters}. */ @Deprecated public static POSModel train( String languageCode, ObjectStream<POSSample> samples, ModelType modelType, POSDictionary tagDictionary, Dictionary ngramDictionary, int cutoff, int iterations) throws IOException { TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ALGORITHM_PARAM, modelType.toString()); params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(iterations)); params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(cutoff)); return train(languageCode, samples, params, tagDictionary, ngramDictionary); }