@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); try { this.converter = new Web1TConverter(outputPath, minNgramLength, maxNgramLength); } catch (IOException e) { throw new ResourceInitializationException(e); } converter.setWriteIndexes(createIndexes); converter.setSplitThreshold(splitThreshold); converter.setMinFrequency(minFreq); converter.setToLowercase(lowercase); converter.setOutputEncoding(outputEncoding); }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { try { converter.add(jcas, inputPaths, jcas.getCas().getTypeSystem().getType(contextType)); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } }
/** * The input files for each ngram level is read, splitted according to the frequency of the words * starting letter in the files and the split files are individually sorted and consolidated. */ @Override public void collectionProcessComplete() throws AnalysisEngineProcessException { super.collectionProcessComplete(); try { converter.createIndex(); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } }