예제 #1
0
  @Override
  public void initialize(UimaContext context) throws ResourceInitializationException {
    super.initialize(context);

    try {
      this.converter = new Web1TConverter(outputPath, minNgramLength, maxNgramLength);
    } catch (IOException e) {
      throw new ResourceInitializationException(e);
    }

    converter.setWriteIndexes(createIndexes);
    converter.setSplitThreshold(splitThreshold);
    converter.setMinFrequency(minFreq);
    converter.setToLowercase(lowercase);
    converter.setOutputEncoding(outputEncoding);
  }
예제 #2
0
  @Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {

    try {
      converter.add(jcas, inputPaths, jcas.getCas().getTypeSystem().getType(contextType));
    } catch (IOException e) {
      throw new AnalysisEngineProcessException(e);
    }
  }
예제 #3
0
  /**
   * The input files for each ngram level is read, splitted according to the frequency of the words
   * starting letter in the files and the split files are individually sorted and consolidated.
   */
  @Override
  public void collectionProcessComplete() throws AnalysisEngineProcessException {
    super.collectionProcessComplete();

    try {
      converter.createIndex();
    } catch (IOException e) {
      throw new AnalysisEngineProcessException(e);
    }
  }