@Override public void processRecord(Record record) { switch (record.getSid()) { case BoundSheetRecord.sid: BoundSheetRecord boundSheetRecord = (BoundSheetRecord) record; parsedDataMap.put(boundSheetRecord.getSheetname(), new ArrayList<List<Object>>()); boundSheetRecords.add(boundSheetRecord); missingRows.put(boundSheetRecord.getSheetname(), new ArrayList<Integer>()); missingCells.put(boundSheetRecord.getSheetname(), new ArrayList<int[]>()); break; case BOFRecord.sid: BOFRecord bofRecord = (BOFRecord) record; bofRecordType = bofRecord.getType(); switch (bofRecordType) { case BOFRecord.TYPE_WORKBOOK: LOG.trace("loading excel data information."); break; case BOFRecord.TYPE_WORKSHEET: sheetIndex += 1; sheetName = boundSheetRecords.get(sheetIndex).getSheetname(); currentSheet = parsedDataMap.get(sheetName); originalRowIndex = 0; maxColumnlength = -1; currentRow = new ArrayList<Object>(); break; } break; case RowRecord.sid: RowRecord rowRecord = (RowRecord) record; int firstColumn = rowRecord.getFirstCol(); int lastColumn = rowRecord.getLastCol(); int length = lastColumn - firstColumn; if (length > maxColumnlength) { maxColumnlength = length; } break; case BlankRecord.sid: BlankRecord blankRecord = (BlankRecord) record; missingCells.get(sheetName).add(new int[] {blankRecord.getColumn(), blankRecord.getRow()}); currentRow.add(null); break; case BoolErrRecord.sid: BoolErrRecord boolErrRecord = (BoolErrRecord) record; if (boolErrRecord.isBoolean()) { currentRow.add(boolErrRecord.getErrorValue() == BOOLEAN_CELL_TRUE_FLAG); } else if (boolErrRecord.isError()) { LOG.warn(boolErrRecord); } break; case FormulaRecord.sid: FormulaRecord formulaRecord = (FormulaRecord) record; LOG.trace( "formulaRecord:[" + formulaRecord.getColumn() + "," + formulaRecord.getRow() + "]"); break; case StringRecord.sid: StringRecord stringRecord = (StringRecord) record; currentRow.add(stringRecord.getString()); break; case LabelRecord.sid: LabelRecord labelRecord = (LabelRecord) record; currentRow.add(labelRecord.getValue()); break; case LabelSSTRecord.sid: LabelSSTRecord labelSSTRecord = (LabelSSTRecord) record; currentRow.add( sheetRecordCollectingListener .getSSTRecord() .getString(labelSSTRecord.getSSTIndex()) .getString()); break; case NoteRecord.sid: NoteRecord notegRecord = (NoteRecord) record; LOG.trace("formulaRecord:[" + notegRecord.getColumn() + "," + notegRecord.getRow() + "]"); break; case NumberRecord.sid: NumberRecord numberRecord = (NumberRecord) record; double numberValue = numberRecord.getValue(); int formatIndex = formatTrackingHSSFListener.getFormatIndex(numberRecord); if (HSSFDateUtil.isInternalDateFormat(formatIndex)) { currentRow.add(HSSFDateUtil.getJavaDate(numberValue)); } else { currentRow.add(numberValue); } break; case RKRecord.sid: RKRecord pkRecord = (RKRecord) record; currentRow.add(pkRecord.getRKNumber()); break; case EOFRecord.sid: switch (bofRecordType) { case BOFRecord.TYPE_WORKBOOK: LOG.trace("loading excel data information complete."); break; case BOFRecord.TYPE_VB_MODULE: break; case BOFRecord.TYPE_WORKSHEET: int size = currentSheet.size(); if (size > 0) { if (currentSheet.get(size - 1).isEmpty()) { currentSheet.remove(size - 1); } } break; case BOFRecord.TYPE_CHART: break; case BOFRecord.TYPE_EXCEL_4_MACRO: break; case BOFRecord.TYPE_WORKSPACE_FILE: break; } break; default: if (record instanceof LastCellOfRowDummyRecord) { // remove empty row. if (!currentRow.isEmpty()) { int nullSize = 0; for (Object obj : currentRow) { if (obj == null) { nullSize += 1; } } // remove the row of all member is null if (nullSize == currentRow.size()) { currentRow.clear(); missingRows.get(sheetName).add(originalRowIndex); } else { LastCellOfRowDummyRecord lastCellOfRowDummyRecord = (LastCellOfRowDummyRecord) record; if (lastCellOfRowDummyRecord.getLastColumnNumber() > -1) { for (int i = currentRow.size(); i < maxColumnlength; i += 1) { missingCells.get(sheetName).add(new int[] {i, lastCellOfRowDummyRecord.getRow()}); currentRow.add(null); } } currentSheet.add(currentRow); currentRow = new ArrayList<Object>(); } } originalRowIndex += 1; } else if (record instanceof MissingCellDummyRecord) { MissingCellDummyRecord missingCellDummyRecord = (MissingCellDummyRecord) record; currentRow.add(null); missingCells .get(sheetName) .add(new int[] {missingCellDummyRecord.getColumn(), missingCellDummyRecord.getRow()}); } else if (record instanceof MissingRowDummyRecord) { MissingRowDummyRecord missingRowDummyRecord = (MissingRowDummyRecord) record; missingRows.get(sheetName).add(missingRowDummyRecord.getRowNumber()); } break; } }
public Object clone() { StringRecord rec = new StringRecord(); rec._is16bitUnicode = _is16bitUnicode; rec._text = _text; return rec; }
/* * (non-Javadoc) * * @see * org.apache.poi.hssf.eventusermodel.HSSFListener#processRecord(org * .apache.poi.hssf.record.Record) */ public void processRecord(Record record) { int row = -1; int column = -1; int fType = Types.VARCHAR; Object cellValue = null; boolean isNewValue = false; TypedKeyValue<Integer, Number> typeAndValue; if (BOFRecord.sid == record.getSid()) { BOFRecord bof = (BOFRecord) record; if (bof.getType() == BOFRecord.TYPE_WORKSHEET) { _currentSheetIndex++; } } boolean isFound = _currentSheetIndex >= 0 && _currentSheetIndex == _sheetNames.indexOf(_sheetName); _sheetFound = _sheetFound || isFound; if (_currentSheetIndex >= 0 && !isFound) { if (!_sheetFound) return; throw new RuntimeException(ExcelConnectorParams.SHEET_ALREADY_EXTRACTED_EXCEPTION); } switch (record.getSid()) { case BoundSheetRecord.sid: BoundSheetRecord bsr = (BoundSheetRecord) record; _sheetNames.add(bsr.getSheetname()); break; case SSTRecord.sid: _sstRecord = (SSTRecord) record; break; case BlankRecord.sid: BlankRecord brec = (BlankRecord) record; row = brec.getRow(); column = brec.getColumn(); cellValue = null; fType = Types.VARCHAR; isNewValue = true; break; case BoolErrRecord.sid: BoolErrRecord berec = (BoolErrRecord) record; row = berec.getRow(); column = berec.getColumn(); cellValue = berec.getBooleanValue(); isNewValue = true; fType = Types.BOOLEAN; break; case FormulaRecord.sid: FormulaRecord frec = (FormulaRecord) record; row = frec.getRow(); column = frec.getColumn(); if (Double.isNaN(frec.getValue())) { // Formula result is a string // This is stored in the next record _outputNextStringRecord = true; _nextRow = frec.getRow(); _nextColumn = frec.getColumn(); } else { cellValue = Utils.str2Number(_formatListener.formatNumberDateCell(frec), null); fType = Types.NUMERIC; isNewValue = true; } break; case StringRecord.sid: if (_outputNextStringRecord) { // String for formula StringRecord srec = (StringRecord) record; cellValue = srec.getString(); row = _nextRow; column = _nextColumn; _outputNextStringRecord = false; fType = Types.VARCHAR; isNewValue = true; } break; case LabelRecord.sid: LabelRecord lrec = (LabelRecord) record; row = lrec.getRow(); column = lrec.getColumn(); cellValue = lrec.getValue(); fType = Types.VARCHAR; isNewValue = true; break; case LabelSSTRecord.sid: LabelSSTRecord lsrec = (LabelSSTRecord) record; if (_sstRecord == null) break; row = lsrec.getRow(); column = lsrec.getColumn(); fType = Types.VARCHAR; cellValue = _sstRecord.getString(lsrec.getSSTIndex()).toString(); typeAndValue = SqlUtils.getNumberTypeAndValue((String) cellValue); if (typeAndValue != null) { fType = typeAndValue.getKey(); cellValue = typeAndValue.getValue(); } isNewValue = true; break; case NoteRecord.sid: break; case NumberRecord.sid: NumberRecord numrec = (NumberRecord) record; row = numrec.getRow(); column = numrec.getColumn(); int fIndex = numrec.getXFIndex(); String formatString = _formatListener.getFormatString(numrec); if (_params.isDateTimeFormat(formatString)) { cellValue = Utils.str2Date( Utils.date2Str( DateUtil.getJavaDate(numrec.getValue()), _params.getDateTimeFormat()), null, _params.getDateTimeFormat()); fType = Types.TIMESTAMP; } else if (_params.isDateFormat(formatString)) { cellValue = Utils.str2Date( Utils.date2Str( DateUtil.getJavaDate(numrec.getValue()), _params.getDateFormat()), null, _params.getDateFormat()); fType = Types.DATE; } else if (_params.isTimeFormat(formatString)) { cellValue = Utils.str2Date( Utils.date2Str( DateUtil.getJavaDate(numrec.getValue()), _params.getTimeFormat()), null, _params.getTimeFormat()); fType = Types.TIME; } else if (DateUtil.isADateFormat(fIndex, formatString)) { cellValue = DateUtil.getJavaDate(numrec.getValue()); if (cellValue instanceof Date && (Utils.getDate((Date) cellValue, Calendar.YEAR, null) != 1900)) fType = Types.TIMESTAMP; else { typeAndValue = SqlUtils.getNumberTypeAndValue(_formatListener.formatNumberDateCell(numrec)); if (typeAndValue == null) { fType = Types.NUMERIC; cellValue = null; } else { fType = typeAndValue.getKey(); cellValue = typeAndValue.getValue(); } } } else { typeAndValue = SqlUtils.getNumberTypeAndValue(_formatListener.formatNumberDateCell(numrec)); if (typeAndValue == null) { fType = Types.NUMERIC; cellValue = null; } else { fType = typeAndValue.getKey(); cellValue = typeAndValue.getValue(); } } isNewValue = true; break; case RKRecord.sid: break; default: break; } // Handle new row if (row > 0 && row != _lastRowNumber) { try { if (row == 1 && _params.getBeforeCallback() != null) _params.getBeforeCallback().onBefore(_dataSet, _driver); } catch (Exception ex) { new RuntimeException(ex); } _dataSetRecord = new DataSetRecord(); if (!_params.isSilent() && _params.getLogStep() > 0 && (_index % _params.getLogStep()) == 0) Logger.log( Logger.INFO, EtlLogger.class, _dataSet.getName() + ": " + _index + EtlResource.READING_DATASET_MSG.getValue()); _index++; } // Handle missing column if (record instanceof MissingCellDummyRecord) { MissingCellDummyRecord mc = (MissingCellDummyRecord) record; row = mc.getRow(); column = mc.getColumn(); cellValue = ""; fType = Types.VARCHAR; isNewValue = true; } // If we got something to add, do so if (isNewValue && row >= 0 && column >= 0) { FieldDef fieldDef = null; // fields defs if (row == 0) { fieldDef = new FieldDef(); fieldDef.setName(cellValue != null ? cellValue.toString() : "field" + column); _dataSet.addField(fieldDef); } else if (_dataSet.getFieldCount() > column) { fieldDef = _dataSet.getFieldDef(column); if (fieldDef != null) { if (!Utils.isEmpty(cellValue)) { int type = fieldDef.getSqlDataType(); fType = SqlUtils.getFieldType(fType, type, _types.containsKey(column)); fieldDef.setSqlDataType(fType); fieldDef.setNativeDataType( _driver.getType(new FieldDef(fType, "VARCHAR"), null, null)); _types.put(column, true); } else cellValue = null; if (_dataSetRecord != null) { try { if (_params.getAddFieldValueCallback() != null) _params .getAddFieldValueCallback() .onAddFieldValue(_dataSet, _driver, _dataSetRecord, fieldDef); } catch (Exception ex) { new RuntimeException(ex); } addValue(cellValue, _dataSetRecord, _dataSet); } } } } // Update column and row count if (row > 0) _lastRowNumber = row; // Handle end of row if (record instanceof LastCellOfRowDummyRecord) { // We're onto a new row if (_dataSetRecord != null) { if (_params.getMaxRows() >= 0 && _dataSet.getRecordCount() >= _params.getMaxRows()) { throw new RuntimeException(DataSetConnectorParams.MAX_ROWS_EXCEEDED_EXCEPTION); } boolean added = _dataSet.addRecord(_dataSetRecord); try { if (added && _params.getAddRecordCallback() != null) _params .getAddRecordCallback() .onAddRecord(_dataSet, _driver, _dataSetRecord, _index - 1); } catch (Exception ex) { new RuntimeException(ex); } } } }
@Override public void processRecord(Record record) { int curCol = -1; double curNum = Double.NaN; ValueString curStr = null; switch (record.getSid()) { case BoundSheetRecord.sid: case BOFRecord.sid: // we just run together multiple sheets break; case SSTRecord.sid: _sstRecord = (SSTRecord) record; break; case BlankRecord.sid: BlankRecord brec = (BlankRecord) record; curCol = brec.getColumn(); curStr = _str.setTo(""); break; case BoolErrRecord.sid: BoolErrRecord berec = (BoolErrRecord) record; curCol = berec.getColumn(); curStr = _str.setTo(""); break; case FormulaRecord.sid: FormulaRecord frec = (FormulaRecord) record; curCol = frec.getColumn(); curNum = frec.getValue(); if (Double.isNaN(curNum)) { // Formula result is a string // This is stored in the next record _outputNextStringRecord = true; _nextCol = frec.getColumn(); } break; case StringRecord.sid: if (_outputNextStringRecord) { // String for formula StringRecord srec = (StringRecord) record; curStr = _str.setTo(srec.getString()); curCol = _nextCol; _outputNextStringRecord = false; } break; case LabelRecord.sid: LabelRecord lrec = (LabelRecord) record; curCol = lrec.getColumn(); curStr = _str.setTo(lrec.getValue()); break; case LabelSSTRecord.sid: LabelSSTRecord lsrec = (LabelSSTRecord) record; if (_sstRecord == null) { System.err.println("[ExcelParser] Missing SST record"); } else { curCol = lsrec.getColumn(); curStr = _str.setTo(_sstRecord.getString(lsrec.getSSTIndex()).toString()); } break; case NoteRecord.sid: System.err.println("[ExcelParser] Warning cell notes are unsupported"); break; case NumberRecord.sid: NumberRecord numrec = (NumberRecord) record; curCol = numrec.getColumn(); curNum = numrec.getValue(); break; case RKRecord.sid: System.err.println("[ExcelParser] Warning RK records are unsupported"); break; default: break; } // Handle missing column if (record instanceof MissingCellDummyRecord) { MissingCellDummyRecord mc = (MissingCellDummyRecord) record; curCol = mc.getColumn(); curNum = Double.NaN; } // Handle end of row if (record instanceof LastCellOfRowDummyRecord) { if (_firstRow) { _firstRow = false; String[] arr = new String[_columnNames.size()]; arr = _columnNames.toArray(arr); _callback.setColumnNames(arr); } _callback.newLine(); } if (curCol == -1) return; if (_firstRow) { _columnNames.add(curStr == null ? "" : curStr.toString()); } else { if (curStr == null) if (Double.isNaN(curNum)) _callback.addInvalidCol(curCol); else _callback.addCol(curCol, curNum); else _callback.addStrCol(curCol, curStr); } }