@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException { HSSFWorkbook workbook = new HSSFWorkbook(streamLimiter.getNewInputStream()); ExcelExtractor excel = null; try { excel = new ExcelExtractor(workbook); ParserResultItem result = getNewParserResultItem(); SummaryInformation info = excel.getSummaryInformation(); if (info != null) { result.addField(ParserFieldEnum.title, info.getTitle()); result.addField(ParserFieldEnum.author, info.getAuthor()); result.addField(ParserFieldEnum.subject, info.getSubject()); } String content = excel.getText(); result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " ")); result.langDetection(10000, ParserFieldEnum.content); } finally { IOUtils.close(excel); } }
/** Command line extractor. */ public static void main(String[] args) { CommandArgs cmdArgs; try { cmdArgs = new CommandArgs(args); } catch (CommandParseException e) { System.err.println(e.getMessage()); printUsageMessage(System.err); System.exit(1); return; // suppress compiler error } if (cmdArgs.isRequestHelp()) { printUsageMessage(System.out); return; } try { InputStream is; if (cmdArgs.getInputFile() == null) { is = System.in; } else { is = new FileInputStream(cmdArgs.getInputFile()); } HSSFWorkbook wb = new HSSFWorkbook(is); ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setIncludeSheetNames(cmdArgs.shouldShowSheetNames()); extractor.setFormulasNotResults(!cmdArgs.shouldEvaluateFormulas()); extractor.setIncludeCellComments(cmdArgs.shouldShowCellComments()); extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells()); extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters()); System.out.println(extractor.getText()); } catch (Exception e) { e.printStackTrace(); System.exit(1); } }