Пример #1
0
  @Override
  protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    HSSFWorkbook workbook = new HSSFWorkbook(streamLimiter.getNewInputStream());
    ExcelExtractor excel = null;
    try {
      excel = new ExcelExtractor(workbook);
      ParserResultItem result = getNewParserResultItem();

      SummaryInformation info = excel.getSummaryInformation();
      if (info != null) {
        result.addField(ParserFieldEnum.title, info.getTitle());
        result.addField(ParserFieldEnum.author, info.getAuthor());
        result.addField(ParserFieldEnum.subject, info.getSubject());
      }

      String content = excel.getText();
      result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " "));

      result.langDetection(10000, ParserFieldEnum.content);
    } finally {
      IOUtils.close(excel);
    }
  }