public static void main(String[] args) throws Exception { Options options = new Options(); options.parseOptions(args); TypeSystemDescription typeSystem = TypeSystemDescriptionFactory.createTypeSystemDescription( "uimaTypes.vpdmf-triage", "edu.isi.bmkeg.skm.cleartk.TypeSystem"); CollectionReader cr = CollectionReaderFactory.createCollectionReader( TriageScoreCollectionReader.class, typeSystem, TriageScoreCollectionReader.TRIAGE_CORPUS_NAME, options.triageCorpus, TriageScoreCollectionReader.TARGET_CORPUS_NAME, options.targetCorpus, TriageScoreCollectionReader.LOGIN, options.login, TriageScoreCollectionReader.PASSWORD, options.password, TriageScoreCollectionReader.DB_URL, options.dbName, TriageScoreCollectionReader.WORKING_DIRECTORY, options.workingDirectory, TriageScoreCollectionReader.SKIP_UNKNOWNS, true); AggregateBuilder builder = new AggregateBuilder(); builder.add(SentenceAnnotator.getDescription()); // Sentence segmentation builder.add(TokenAnnotator.getDescription()); // Tokenization // It would be better to write into the preprocessed instances the whole token // (skip stemming for now) and do Stemming while processing instances if desired. // So the instance processors can have both features, tokens and stems [MT]. // // builder.add(DefaultSnowballStemmer.getDescription("English")); // Stemming // The simple document classification annotator String triageCorpusName = options.triageCorpus; String targetCorpusName = options.targetCorpus; triageCorpusName = triageCorpusName.replaceAll("\\s+", "_"); triageCorpusName = triageCorpusName.replaceAll("\\/", "_"); targetCorpusName = targetCorpusName.replaceAll("\\s+", "_"); targetCorpusName = targetCorpusName.replaceAll("\\/", "_"); String dirPath = options.dir.getPath() + "/baseDir/" + targetCorpusName + "/" + triageCorpusName; builder.add( AnalysisEngineFactory.createPrimitiveDescription( SimpleOneLinePerDocWriter.class, SimpleOneLinePerDocWriter.PARAM_DIR_PATH, dirPath)); // /////////////////////////////////////////// // Run pipeline to create training data file // /////////////////////////////////////////// try { SimplePipeline.runPipeline(cr, builder.createAggregateDescription()); } catch (Exception e) { e.printStackTrace(); } }
@Override public synchronized void process(JCas jCas) throws AnalysisEngineProcessException { super.process(jCas); }