/** * Instantiates a new XlsProcessor. * * @param params the parameters * @param dataSet the data set * @param driver the driver */ public XlsProcessor(ExcelConnectorParams params, DataSet dataSet, Driver driver) { super(); _params = params; _dataSet = dataSet; _driver = driver; _types = new HashMap<Integer, Boolean>(); _lastRowNumber = -1; _dataSetRecord = null; _index = 1; _sheetNames = new ArrayList<String>(); _currentSheetIndex = -1; _hasKey = !Utils.isNothing(dataSet.getKeyFields()); _sheetName = (!Utils.isNothing(dataSet.getOwnerName()) && !Utils.isNothing(dataSet.getObjectName()) || Utils.isNothing(_params.getSheetName())) ? dataSet.getName() : params.getSheetName(); _sheetFound = false; }
/* * (non-Javadoc) * * @see * com.toolsverse.etl.connector.DataSetConnector#persist(com.toolsverse. * etl.connector.DataSetConnectorParams, com.toolsverse.etl.common.DataSet, * com.toolsverse.etl.driver.Driver) */ public ConnectorResult persist(ExcelConnectorParams params, DataSet dataSet, Driver driver) throws Exception { if (dataSet == null || params == null || (driver == null && dataSet.getDriver() == null) || Utils.isNothing(dataSet.getName()) || dataSet.getFieldCount() == 0 || (params.isFileNameRequired() && Utils.isNothing(params.getFileName()))) { ConnectorResult result = new ConnectorResult(); result.setRetCode(ConnectorResult.VALIDATION_FAILED_CODE); if (dataSet == null) result.addResult(ConnectorResource.VALIDATION_ERROR_DATA_SET_NULL.getValue()); if (driver == null && dataSet.getDriver() == null) result.addResult(ConnectorResource.VALIDATION_ERROR_DRIVER_NULL.getValue()); if (params == null) result.addResult(ConnectorResource.VALIDATION_ERROR_PARAMS_NULL.getValue()); if (dataSet != null && dataSet.getFieldCount() == 0) result.addResult(ConnectorResource.VALIDATION_ERROR_DATA_SET_NO_FIELDS.getValue()); if (dataSet != null && Utils.isNothing(dataSet.getName())) result.addResult(ConnectorResource.VALIDATION_ERROR_DATA_SET_NO_NAME.getValue()); if (params.isFileNameRequired() && Utils.isNothing(params.getFileName())) result.addResult(FileConnectorResource.VALIDATION_ERROR_FILE_NAME_NOT_SPECIFIED.getValue()); return result; } if (!params.isSilent()) Logger.log( Logger.INFO, EtlLogger.class, EtlResource.PERSISTING_DATASET_MSG.getValue() + dataSet.getName() + "..."); try { prePersist(params, dataSet, driver); int records = dataSet.getRecordCount(); // data for (int row = 0; row < records; row++) { DataSetRecord record = dataSet.getRecord(row); inlinePersist(params, dataSet, driver, record, row, records); } postPersist(params, dataSet, driver); ConnectorResult connectorResult = new ConnectorResult(); connectorResult.addResult( Utils.format( FileConnectorResource.FILE_PERSISTED.getValue(), new String[] {FilenameUtils.getName(params.getRealFileName())})); return connectorResult; } finally { cleanUp(params, dataSet, driver); } }
/* * (non-Javadoc) * * @see * com.toolsverse.etl.connector.DataSetConnector#prePersist(com.toolsverse * .etl.connector.DataSetConnectorParams, com.toolsverse.etl.common.DataSet, * com.toolsverse.etl.driver.Driver) */ @SuppressWarnings("resource") public void prePersist(ExcelConnectorParams params, DataSet dataSet, Driver driver) throws Exception { String fileName = null; OutputStream out = null; if (params.getOutputStream() == null) { fileName = SystemConfig.instance() .getPathUsingAppFolders( params.getFileName( dataSet.getOwnerName() != null ? dataSet.getOwnerName() : dataSet.getName(), ".xls", true)); params.setRealFileName(fileName); out = new FileOutputStream(fileName); if (params.getTransactionMonitor() != null) params.getTransactionMonitor().addFile(fileName); } else out = params.getOutputStream(); params.setOut(out); Workbook workbook = new HSSFWorkbook(); params.setWorkbook(workbook); Sheet sheet = workbook.createSheet( Utils.isNothing(params.getSheetName()) ? dataSet.getName() : params.getSheetName()); params.setSheet(sheet); Font labelFont = workbook.createFont(); labelFont.setBoldweight(Font.BOLDWEIGHT_BOLD); CellStyle labelCellStyle = workbook.createCellStyle(); labelCellStyle.setFont(labelFont); DataFormat dateTimeFormat = workbook.createDataFormat(); CellStyle dateTimeCellStyle = workbook.createCellStyle(); dateTimeCellStyle.setDataFormat(dateTimeFormat.getFormat(params.getDateTimeFormat())); params.setDateTimeCellStyle(dateTimeCellStyle); DataFormat dateFormat = workbook.createDataFormat(); CellStyle dateCellStyle = workbook.createCellStyle(); dateCellStyle.setDataFormat(dateFormat.getFormat(params.getDateFormat())); params.setDateCellStyle(dateCellStyle); DataFormat timeFormat = workbook.createDataFormat(); CellStyle timeCellStyle = workbook.createCellStyle(); timeCellStyle.setDataFormat(timeFormat.getFormat(params.getTimeFormat())); params.setTimeCellStyle(timeCellStyle); // column names Row excelRow = sheet.createRow(0); // metadata int col = 0; for (FieldDef fieldDef : dataSet.getFields().getList()) { if (!fieldDef.isVisible()) continue; Cell labelCell = excelRow.createCell(col++, Cell.CELL_TYPE_STRING); labelCell.setCellStyle(labelCellStyle); labelCell.setCellValue(fieldDef.getName()); } params.setPrePersistOccured(true); }
/* * (non-Javadoc) * * @see * com.toolsverse.etl.connector.DataSetConnector#populate(com.toolsverse * .etl.connector.DataSetConnectorParams, com.toolsverse.etl.common.DataSet, * com.toolsverse.etl.driver.Driver) */ public ConnectorResult populate(ExcelConnectorParams params, DataSet dataSet, Driver driver) throws Exception { if (dataSet == null || params == null || Utils.isNothing(dataSet.getName()) || (driver == null && dataSet.getDriver() == null)) { ConnectorResult result = new ConnectorResult(); result.setRetCode(ConnectorResult.VALIDATION_FAILED_CODE); if (dataSet == null) result.addResult(ConnectorResource.VALIDATION_ERROR_DATA_SET_NULL.getValue()); if (driver == null && dataSet.getDriver() == null) result.addResult(ConnectorResource.VALIDATION_ERROR_DRIVER_NULL.getValue()); if (params == null) result.addResult(ConnectorResource.VALIDATION_ERROR_PARAMS_NULL.getValue()); if (dataSet != null && Utils.isNothing(dataSet.getName())) result.addResult(ConnectorResource.VALIDATION_ERROR_DATA_SET_NO_NAME.getValue()); return result; } dataSet.clear(); driver = driver != null ? driver : dataSet.getDriver(); if (!params.isSilent()) Logger.log( Logger.INFO, EtlLogger.class, EtlResource.LOADING_DATASET_MSG.getValue() + dataSet.getName() + "..."); FileInputStream fin = null; POIFSFileSystem poifs = null; try { String fileName = null; if (params.getInputStream() == null) { fileName = SystemConfig.instance() .getPathUsingAppFolders( params.getFileName( dataSet.getOwnerName() != null ? dataSet.getOwnerName() : dataSet.getName(), ".xls", true)); fin = new FileInputStream(fileName); poifs = new POIFSFileSystem(fin); } else poifs = new POIFSFileSystem(params.getInputStream()); XlsProcessor xlsProcessor = new XlsProcessor(params, dataSet, driver); MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(xlsProcessor); FormatTrackingHSSFListener formatListener = new FormatTrackingHSSFListener(listener); xlsProcessor.setFormatListener(formatListener); HSSFRequest request = new HSSFRequest(); request.addListenerForAllRecords(formatListener); HSSFEventFactory factory = new HSSFEventFactory(); try { factory.processWorkbookEvents(request, poifs); } catch (Exception ex) { if (!params.isMaxRowsExceededException(ex) && !params.isSheetAlreadyExatractedException(ex)) throw ex; } if (dataSet.getFieldCount() > 0 && dataSet.getRecordCount() == 0 && params.getAddRecordCallback() != null) { params.getAddRecordCallback().onAddRecord(dataSet, driver, null, 0); } ConnectorResult connectorResult = new ConnectorResult(); connectorResult.addResult( Utils.format( FileConnectorResource.FILE_POPULATED.getValue(), new String[] {FilenameUtils.getName(fileName)})); return connectorResult; } finally { if (fin != null) fin.close(); if (params.getInputStream() != null && params.isCloseInput()) params.getInputStream().close(); if (params.getAfterCallback() != null) params.getAfterCallback().onAfter(dataSet, driver); } }
/* * (non-Javadoc) * * @see * com.toolsverse.etl.connector.DataSetConnector#inlinePersist(com.toolsverse * .etl.connector.DataSetConnectorParams, com.toolsverse.etl.common.DataSet, * com.toolsverse.etl.driver.Driver, * com.toolsverse.etl.common.DataSetRecord, int, int) */ public void inlinePersist( ExcelConnectorParams params, DataSet dataSet, Driver driver, DataSetRecord record, int row, int records) throws Exception { if (record == null) return; int currentRow = params.getCurrentRow(); Row excelRow = params.getSheet().createRow(currentRow); params.setCurrentRow(++currentRow); int colCount = dataSet.getFieldCount(); for (int col = 0; col < colCount; col++) { FieldDef fieldDef = dataSet.getFields().get(col); if (!fieldDef.isVisible()) continue; Object fieldValue = record.get(col); int fType = fieldDef.getSqlDataType(); String value = null; Cell dataCell; if (fieldValue != null) { value = dataSet.encode(fieldDef, fieldValue, driver, params.getParams(), false); } if (SqlUtils.isNumber(fType)) { dataCell = excelRow.createCell(col, Cell.CELL_TYPE_NUMERIC); dataCell.setCellValue(value); } else if (SqlUtils.isDateOnly(fType)) { dataCell = excelRow.createCell(col, Cell.CELL_TYPE_NUMERIC); dataCell.setCellStyle(params.getDateCellStyle()); if (fieldValue instanceof java.util.Date) dataCell.setCellValue((java.util.Date) fieldValue); else dataCell.setCellValue(value); } else if (SqlUtils.isTime(fType)) { dataCell = excelRow.createCell(col, Cell.CELL_TYPE_NUMERIC); dataCell.setCellStyle(params.getTimeCellStyle()); if (fieldValue instanceof java.util.Date) dataCell.setCellValue((java.util.Date) fieldValue); else dataCell.setCellValue(value); } else if (SqlUtils.isTimestamp(fType)) { dataCell = excelRow.createCell(col, Cell.CELL_TYPE_NUMERIC); dataCell.setCellStyle(params.getDateTimeCellStyle()); if (fieldValue instanceof java.util.Date) dataCell.setCellValue((java.util.Date) fieldValue); else dataCell.setCellValue(value); } else if (SqlUtils.isBoolean(fType)) { dataCell = excelRow.createCell(col, Cell.CELL_TYPE_BOOLEAN); if (fieldValue instanceof Boolean) dataCell.setCellValue((Boolean) fieldValue); else dataCell.setCellValue(value); } else { dataCell = excelRow.createCell(col, Cell.CELL_TYPE_STRING); dataCell.setCellValue(value); } } if (row >= 0 && records >= 0 && !params.isSilent() && params.getLogStep() > 0 && (row % params.getLogStep()) == 0) Logger.log( Logger.INFO, EtlLogger.class, dataSet.getName() + ": " + EtlResource.PERSITING_RECORD.getValue() + row + " out of " + records); }
/* * (non-Javadoc) * * @see * org.apache.poi.hssf.eventusermodel.HSSFListener#processRecord(org * .apache.poi.hssf.record.Record) */ public void processRecord(Record record) { int row = -1; int column = -1; int fType = Types.VARCHAR; Object cellValue = null; boolean isNewValue = false; TypedKeyValue<Integer, Number> typeAndValue; if (BOFRecord.sid == record.getSid()) { BOFRecord bof = (BOFRecord) record; if (bof.getType() == BOFRecord.TYPE_WORKSHEET) { _currentSheetIndex++; } } boolean isFound = _currentSheetIndex >= 0 && _currentSheetIndex == _sheetNames.indexOf(_sheetName); _sheetFound = _sheetFound || isFound; if (_currentSheetIndex >= 0 && !isFound) { if (!_sheetFound) return; throw new RuntimeException(ExcelConnectorParams.SHEET_ALREADY_EXTRACTED_EXCEPTION); } switch (record.getSid()) { case BoundSheetRecord.sid: BoundSheetRecord bsr = (BoundSheetRecord) record; _sheetNames.add(bsr.getSheetname()); break; case SSTRecord.sid: _sstRecord = (SSTRecord) record; break; case BlankRecord.sid: BlankRecord brec = (BlankRecord) record; row = brec.getRow(); column = brec.getColumn(); cellValue = null; fType = Types.VARCHAR; isNewValue = true; break; case BoolErrRecord.sid: BoolErrRecord berec = (BoolErrRecord) record; row = berec.getRow(); column = berec.getColumn(); cellValue = berec.getBooleanValue(); isNewValue = true; fType = Types.BOOLEAN; break; case FormulaRecord.sid: FormulaRecord frec = (FormulaRecord) record; row = frec.getRow(); column = frec.getColumn(); if (Double.isNaN(frec.getValue())) { // Formula result is a string // This is stored in the next record _outputNextStringRecord = true; _nextRow = frec.getRow(); _nextColumn = frec.getColumn(); } else { cellValue = Utils.str2Number(_formatListener.formatNumberDateCell(frec), null); fType = Types.NUMERIC; isNewValue = true; } break; case StringRecord.sid: if (_outputNextStringRecord) { // String for formula StringRecord srec = (StringRecord) record; cellValue = srec.getString(); row = _nextRow; column = _nextColumn; _outputNextStringRecord = false; fType = Types.VARCHAR; isNewValue = true; } break; case LabelRecord.sid: LabelRecord lrec = (LabelRecord) record; row = lrec.getRow(); column = lrec.getColumn(); cellValue = lrec.getValue(); fType = Types.VARCHAR; isNewValue = true; break; case LabelSSTRecord.sid: LabelSSTRecord lsrec = (LabelSSTRecord) record; if (_sstRecord == null) break; row = lsrec.getRow(); column = lsrec.getColumn(); fType = Types.VARCHAR; cellValue = _sstRecord.getString(lsrec.getSSTIndex()).toString(); typeAndValue = SqlUtils.getNumberTypeAndValue((String) cellValue); if (typeAndValue != null) { fType = typeAndValue.getKey(); cellValue = typeAndValue.getValue(); } isNewValue = true; break; case NoteRecord.sid: break; case NumberRecord.sid: NumberRecord numrec = (NumberRecord) record; row = numrec.getRow(); column = numrec.getColumn(); int fIndex = numrec.getXFIndex(); String formatString = _formatListener.getFormatString(numrec); if (_params.isDateTimeFormat(formatString)) { cellValue = Utils.str2Date( Utils.date2Str( DateUtil.getJavaDate(numrec.getValue()), _params.getDateTimeFormat()), null, _params.getDateTimeFormat()); fType = Types.TIMESTAMP; } else if (_params.isDateFormat(formatString)) { cellValue = Utils.str2Date( Utils.date2Str( DateUtil.getJavaDate(numrec.getValue()), _params.getDateFormat()), null, _params.getDateFormat()); fType = Types.DATE; } else if (_params.isTimeFormat(formatString)) { cellValue = Utils.str2Date( Utils.date2Str( DateUtil.getJavaDate(numrec.getValue()), _params.getTimeFormat()), null, _params.getTimeFormat()); fType = Types.TIME; } else if (DateUtil.isADateFormat(fIndex, formatString)) { cellValue = DateUtil.getJavaDate(numrec.getValue()); if (cellValue instanceof Date && (Utils.getDate((Date) cellValue, Calendar.YEAR, null) != 1900)) fType = Types.TIMESTAMP; else { typeAndValue = SqlUtils.getNumberTypeAndValue(_formatListener.formatNumberDateCell(numrec)); if (typeAndValue == null) { fType = Types.NUMERIC; cellValue = null; } else { fType = typeAndValue.getKey(); cellValue = typeAndValue.getValue(); } } } else { typeAndValue = SqlUtils.getNumberTypeAndValue(_formatListener.formatNumberDateCell(numrec)); if (typeAndValue == null) { fType = Types.NUMERIC; cellValue = null; } else { fType = typeAndValue.getKey(); cellValue = typeAndValue.getValue(); } } isNewValue = true; break; case RKRecord.sid: break; default: break; } // Handle new row if (row > 0 && row != _lastRowNumber) { try { if (row == 1 && _params.getBeforeCallback() != null) _params.getBeforeCallback().onBefore(_dataSet, _driver); } catch (Exception ex) { new RuntimeException(ex); } _dataSetRecord = new DataSetRecord(); if (!_params.isSilent() && _params.getLogStep() > 0 && (_index % _params.getLogStep()) == 0) Logger.log( Logger.INFO, EtlLogger.class, _dataSet.getName() + ": " + _index + EtlResource.READING_DATASET_MSG.getValue()); _index++; } // Handle missing column if (record instanceof MissingCellDummyRecord) { MissingCellDummyRecord mc = (MissingCellDummyRecord) record; row = mc.getRow(); column = mc.getColumn(); cellValue = ""; fType = Types.VARCHAR; isNewValue = true; } // If we got something to add, do so if (isNewValue && row >= 0 && column >= 0) { FieldDef fieldDef = null; // fields defs if (row == 0) { fieldDef = new FieldDef(); fieldDef.setName(cellValue != null ? cellValue.toString() : "field" + column); _dataSet.addField(fieldDef); } else if (_dataSet.getFieldCount() > column) { fieldDef = _dataSet.getFieldDef(column); if (fieldDef != null) { if (!Utils.isEmpty(cellValue)) { int type = fieldDef.getSqlDataType(); fType = SqlUtils.getFieldType(fType, type, _types.containsKey(column)); fieldDef.setSqlDataType(fType); fieldDef.setNativeDataType( _driver.getType(new FieldDef(fType, "VARCHAR"), null, null)); _types.put(column, true); } else cellValue = null; if (_dataSetRecord != null) { try { if (_params.getAddFieldValueCallback() != null) _params .getAddFieldValueCallback() .onAddFieldValue(_dataSet, _driver, _dataSetRecord, fieldDef); } catch (Exception ex) { new RuntimeException(ex); } addValue(cellValue, _dataSetRecord, _dataSet); } } } } // Update column and row count if (row > 0) _lastRowNumber = row; // Handle end of row if (record instanceof LastCellOfRowDummyRecord) { // We're onto a new row if (_dataSetRecord != null) { if (_params.getMaxRows() >= 0 && _dataSet.getRecordCount() >= _params.getMaxRows()) { throw new RuntimeException(DataSetConnectorParams.MAX_ROWS_EXCEEDED_EXCEPTION); } boolean added = _dataSet.addRecord(_dataSetRecord); try { if (added && _params.getAddRecordCallback() != null) _params .getAddRecordCallback() .onAddRecord(_dataSet, _driver, _dataSetRecord, _index - 1); } catch (Exception ex) { new RuntimeException(ex); } } } }