コード例 #1
0
  @Override
  public void initialize() throws ResourceInitializationException {
    super.initialize();
    logger = getLogger();
    String fileName = (String) getConfigParameterValue(INPUT_FILE_PARAM);

    try {
      file = new File(URI.create(fileName));
    } catch (Exception e) {
      ExceptionHandler.logAndRethrow(logger, e);
    }

    if (!file.exists()) {
      ExceptionHandler.logAndThrow(logger, "Input file does not exists");
    }
  }
コード例 #2
0
  @Override
  public void getNext(final CAS aCAS) throws IOException, CollectionException {
    TikaProcessor processor = new TikaProcessor();
    try {
      processor = TikaProcessor.newInstance(file);
    } catch (Exception e) {
      ExceptionHandler.logAndRethrow(logger, "TikaProcessor: ", e);
    }

    String documentText = processor.getText();
    if (documentText == null || documentText.length() == 0) {
      ExceptionHandler.logAndThrow(logger, "Document text is null or empty");
    }
    aCAS.setDocumentText(documentText);

    String textLanguage = processor.getLanguage();
    if (!textLanguage.contains("ru")) {
      ExceptionHandler.logAndThrow(logger, "Document language is not russian");
    }
    aCAS.setDocumentLanguage(textLanguage);
  }