Example #1
0
  @Override
  protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    HSSFWorkbook workbook = new HSSFWorkbook(streamLimiter.getNewInputStream());
    ExcelExtractor excel = null;
    try {
      excel = new ExcelExtractor(workbook);
      ParserResultItem result = getNewParserResultItem();

      SummaryInformation info = excel.getSummaryInformation();
      if (info != null) {
        result.addField(ParserFieldEnum.title, info.getTitle());
        result.addField(ParserFieldEnum.author, info.getAuthor());
        result.addField(ParserFieldEnum.subject, info.getSubject());
      }

      String content = excel.getText();
      result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " "));

      result.langDetection(10000, ParserFieldEnum.content);
    } finally {
      IOUtils.close(excel);
    }
  }
  /** Command line extractor. */
  public static void main(String[] args) {

    CommandArgs cmdArgs;
    try {
      cmdArgs = new CommandArgs(args);
    } catch (CommandParseException e) {
      System.err.println(e.getMessage());
      printUsageMessage(System.err);
      System.exit(1);
      return; // suppress compiler error
    }

    if (cmdArgs.isRequestHelp()) {
      printUsageMessage(System.out);
      return;
    }

    try {
      InputStream is;
      if (cmdArgs.getInputFile() == null) {
        is = System.in;
      } else {
        is = new FileInputStream(cmdArgs.getInputFile());
      }
      HSSFWorkbook wb = new HSSFWorkbook(is);

      ExcelExtractor extractor = new ExcelExtractor(wb);
      extractor.setIncludeSheetNames(cmdArgs.shouldShowSheetNames());
      extractor.setFormulasNotResults(!cmdArgs.shouldEvaluateFormulas());
      extractor.setIncludeCellComments(cmdArgs.shouldShowCellComments());
      extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells());
      extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters());
      System.out.println(extractor.getText());
    } catch (Exception e) {
      e.printStackTrace();
      System.exit(1);
    }
  }