@Override
  public void processRecord(Record record) {
    switch (record.getSid()) {
      case BoundSheetRecord.sid:
        BoundSheetRecord boundSheetRecord = (BoundSheetRecord) record;
        parsedDataMap.put(boundSheetRecord.getSheetname(), new ArrayList<List<Object>>());
        boundSheetRecords.add(boundSheetRecord);

        missingRows.put(boundSheetRecord.getSheetname(), new ArrayList<Integer>());
        missingCells.put(boundSheetRecord.getSheetname(), new ArrayList<int[]>());
        break;
      case BOFRecord.sid:
        BOFRecord bofRecord = (BOFRecord) record;
        bofRecordType = bofRecord.getType();
        switch (bofRecordType) {
          case BOFRecord.TYPE_WORKBOOK:
            LOG.trace("loading excel data information.");
            break;
          case BOFRecord.TYPE_WORKSHEET:
            sheetIndex += 1;
            sheetName = boundSheetRecords.get(sheetIndex).getSheetname();
            currentSheet = parsedDataMap.get(sheetName);
            originalRowIndex = 0;
            maxColumnlength = -1;

            currentRow = new ArrayList<Object>();
            break;
        }
        break;
      case RowRecord.sid:
        RowRecord rowRecord = (RowRecord) record;
        int firstColumn = rowRecord.getFirstCol();
        int lastColumn = rowRecord.getLastCol();
        int length = lastColumn - firstColumn;
        if (length > maxColumnlength) {
          maxColumnlength = length;
        }
        break;
      case BlankRecord.sid:
        BlankRecord blankRecord = (BlankRecord) record;
        missingCells.get(sheetName).add(new int[] {blankRecord.getColumn(), blankRecord.getRow()});
        currentRow.add(null);
        break;
      case BoolErrRecord.sid:
        BoolErrRecord boolErrRecord = (BoolErrRecord) record;
        if (boolErrRecord.isBoolean()) {
          currentRow.add(boolErrRecord.getErrorValue() == BOOLEAN_CELL_TRUE_FLAG);
        } else if (boolErrRecord.isError()) {
          LOG.warn(boolErrRecord);
        }
        break;
      case FormulaRecord.sid:
        FormulaRecord formulaRecord = (FormulaRecord) record;
        LOG.trace(
            "formulaRecord:[" + formulaRecord.getColumn() + "," + formulaRecord.getRow() + "]");
        break;
      case StringRecord.sid:
        StringRecord stringRecord = (StringRecord) record;
        currentRow.add(stringRecord.getString());
        break;
      case LabelRecord.sid:
        LabelRecord labelRecord = (LabelRecord) record;
        currentRow.add(labelRecord.getValue());
        break;
      case LabelSSTRecord.sid:
        LabelSSTRecord labelSSTRecord = (LabelSSTRecord) record;
        currentRow.add(
            sheetRecordCollectingListener
                .getSSTRecord()
                .getString(labelSSTRecord.getSSTIndex())
                .getString());
        break;
      case NoteRecord.sid:
        NoteRecord notegRecord = (NoteRecord) record;
        LOG.trace("formulaRecord:[" + notegRecord.getColumn() + "," + notegRecord.getRow() + "]");
        break;
      case NumberRecord.sid:
        NumberRecord numberRecord = (NumberRecord) record;
        double numberValue = numberRecord.getValue();
        int formatIndex = formatTrackingHSSFListener.getFormatIndex(numberRecord);
        if (HSSFDateUtil.isInternalDateFormat(formatIndex)) {
          currentRow.add(HSSFDateUtil.getJavaDate(numberValue));
        } else {
          currentRow.add(numberValue);
        }
        break;
      case RKRecord.sid:
        RKRecord pkRecord = (RKRecord) record;
        currentRow.add(pkRecord.getRKNumber());
        break;
      case EOFRecord.sid:
        switch (bofRecordType) {
          case BOFRecord.TYPE_WORKBOOK:
            LOG.trace("loading excel data information complete.");
            break;
          case BOFRecord.TYPE_VB_MODULE:
            break;
          case BOFRecord.TYPE_WORKSHEET:
            int size = currentSheet.size();
            if (size > 0) {
              if (currentSheet.get(size - 1).isEmpty()) {
                currentSheet.remove(size - 1);
              }
            }

            break;
          case BOFRecord.TYPE_CHART:
            break;
          case BOFRecord.TYPE_EXCEL_4_MACRO:
            break;
          case BOFRecord.TYPE_WORKSPACE_FILE:
            break;
        }
        break;
      default:
        if (record instanceof LastCellOfRowDummyRecord) {
          // remove empty row.
          if (!currentRow.isEmpty()) {
            int nullSize = 0;
            for (Object obj : currentRow) {
              if (obj == null) {
                nullSize += 1;
              }
            }
            // remove the row of all member is null
            if (nullSize == currentRow.size()) {
              currentRow.clear();
              missingRows.get(sheetName).add(originalRowIndex);
            } else {
              LastCellOfRowDummyRecord lastCellOfRowDummyRecord = (LastCellOfRowDummyRecord) record;
              if (lastCellOfRowDummyRecord.getLastColumnNumber() > -1) {
                for (int i = currentRow.size(); i < maxColumnlength; i += 1) {
                  missingCells.get(sheetName).add(new int[] {i, lastCellOfRowDummyRecord.getRow()});
                  currentRow.add(null);
                }
              }

              currentSheet.add(currentRow);
              currentRow = new ArrayList<Object>();
            }
          }
          originalRowIndex += 1;
        } else if (record instanceof MissingCellDummyRecord) {
          MissingCellDummyRecord missingCellDummyRecord = (MissingCellDummyRecord) record;
          currentRow.add(null);
          missingCells
              .get(sheetName)
              .add(new int[] {missingCellDummyRecord.getColumn(), missingCellDummyRecord.getRow()});
        } else if (record instanceof MissingRowDummyRecord) {
          MissingRowDummyRecord missingRowDummyRecord = (MissingRowDummyRecord) record;
          missingRows.get(sheetName).add(missingRowDummyRecord.getRowNumber());
        }
        break;
    }
  }
Exemple #2
0
 public Object clone() {
   StringRecord rec = new StringRecord();
   rec._is16bitUnicode = _is16bitUnicode;
   rec._text = _text;
   return rec;
 }
    /*
     * (non-Javadoc)
     *
     * @see
     * org.apache.poi.hssf.eventusermodel.HSSFListener#processRecord(org
     * .apache.poi.hssf.record.Record)
     */
    public void processRecord(Record record) {
      int row = -1;
      int column = -1;
      int fType = Types.VARCHAR;
      Object cellValue = null;
      boolean isNewValue = false;
      TypedKeyValue<Integer, Number> typeAndValue;

      if (BOFRecord.sid == record.getSid()) {
        BOFRecord bof = (BOFRecord) record;

        if (bof.getType() == BOFRecord.TYPE_WORKSHEET) {
          _currentSheetIndex++;
        }
      }

      boolean isFound =
          _currentSheetIndex >= 0 && _currentSheetIndex == _sheetNames.indexOf(_sheetName);

      _sheetFound = _sheetFound || isFound;

      if (_currentSheetIndex >= 0 && !isFound) {
        if (!_sheetFound) return;

        throw new RuntimeException(ExcelConnectorParams.SHEET_ALREADY_EXTRACTED_EXCEPTION);
      }

      switch (record.getSid()) {
        case BoundSheetRecord.sid:
          BoundSheetRecord bsr = (BoundSheetRecord) record;

          _sheetNames.add(bsr.getSheetname());

          break;
        case SSTRecord.sid:
          _sstRecord = (SSTRecord) record;
          break;
        case BlankRecord.sid:
          BlankRecord brec = (BlankRecord) record;

          row = brec.getRow();
          column = brec.getColumn();
          cellValue = null;
          fType = Types.VARCHAR;
          isNewValue = true;

          break;
        case BoolErrRecord.sid:
          BoolErrRecord berec = (BoolErrRecord) record;

          row = berec.getRow();
          column = berec.getColumn();
          cellValue = berec.getBooleanValue();
          isNewValue = true;

          fType = Types.BOOLEAN;

          break;
        case FormulaRecord.sid:
          FormulaRecord frec = (FormulaRecord) record;

          row = frec.getRow();
          column = frec.getColumn();

          if (Double.isNaN(frec.getValue())) {
            // Formula result is a string
            // This is stored in the next record
            _outputNextStringRecord = true;
            _nextRow = frec.getRow();
            _nextColumn = frec.getColumn();
          } else {
            cellValue = Utils.str2Number(_formatListener.formatNumberDateCell(frec), null);

            fType = Types.NUMERIC;
            isNewValue = true;
          }
          break;
        case StringRecord.sid:
          if (_outputNextStringRecord) {
            // String for formula
            StringRecord srec = (StringRecord) record;
            cellValue = srec.getString();
            row = _nextRow;
            column = _nextColumn;
            _outputNextStringRecord = false;
            fType = Types.VARCHAR;
            isNewValue = true;
          }
          break;
        case LabelRecord.sid:
          LabelRecord lrec = (LabelRecord) record;

          row = lrec.getRow();
          column = lrec.getColumn();
          cellValue = lrec.getValue();
          fType = Types.VARCHAR;
          isNewValue = true;

          break;
        case LabelSSTRecord.sid:
          LabelSSTRecord lsrec = (LabelSSTRecord) record;

          if (_sstRecord == null) break;

          row = lsrec.getRow();
          column = lsrec.getColumn();

          fType = Types.VARCHAR;

          cellValue = _sstRecord.getString(lsrec.getSSTIndex()).toString();

          typeAndValue = SqlUtils.getNumberTypeAndValue((String) cellValue);

          if (typeAndValue != null) {
            fType = typeAndValue.getKey();
            cellValue = typeAndValue.getValue();
          }

          isNewValue = true;

          break;
        case NoteRecord.sid:
          break;
        case NumberRecord.sid:
          NumberRecord numrec = (NumberRecord) record;

          row = numrec.getRow();
          column = numrec.getColumn();

          int fIndex = numrec.getXFIndex();

          String formatString = _formatListener.getFormatString(numrec);

          if (_params.isDateTimeFormat(formatString)) {
            cellValue =
                Utils.str2Date(
                    Utils.date2Str(
                        DateUtil.getJavaDate(numrec.getValue()), _params.getDateTimeFormat()),
                    null,
                    _params.getDateTimeFormat());

            fType = Types.TIMESTAMP;
          } else if (_params.isDateFormat(formatString)) {
            cellValue =
                Utils.str2Date(
                    Utils.date2Str(
                        DateUtil.getJavaDate(numrec.getValue()), _params.getDateFormat()),
                    null,
                    _params.getDateFormat());

            fType = Types.DATE;
          } else if (_params.isTimeFormat(formatString)) {
            cellValue =
                Utils.str2Date(
                    Utils.date2Str(
                        DateUtil.getJavaDate(numrec.getValue()), _params.getTimeFormat()),
                    null,
                    _params.getTimeFormat());

            fType = Types.TIME;
          } else if (DateUtil.isADateFormat(fIndex, formatString)) {
            cellValue = DateUtil.getJavaDate(numrec.getValue());

            if (cellValue instanceof Date
                && (Utils.getDate((Date) cellValue, Calendar.YEAR, null) != 1900))
              fType = Types.TIMESTAMP;
            else {
              typeAndValue =
                  SqlUtils.getNumberTypeAndValue(_formatListener.formatNumberDateCell(numrec));

              if (typeAndValue == null) {
                fType = Types.NUMERIC;
                cellValue = null;
              } else {
                fType = typeAndValue.getKey();
                cellValue = typeAndValue.getValue();
              }
            }
          } else {
            typeAndValue =
                SqlUtils.getNumberTypeAndValue(_formatListener.formatNumberDateCell(numrec));

            if (typeAndValue == null) {
              fType = Types.NUMERIC;
              cellValue = null;
            } else {
              fType = typeAndValue.getKey();
              cellValue = typeAndValue.getValue();
            }
          }

          isNewValue = true;

          break;
        case RKRecord.sid:
          break;
        default:
          break;
      }

      // Handle new row
      if (row > 0 && row != _lastRowNumber) {
        try {
          if (row == 1 && _params.getBeforeCallback() != null)
            _params.getBeforeCallback().onBefore(_dataSet, _driver);
        } catch (Exception ex) {
          new RuntimeException(ex);
        }

        _dataSetRecord = new DataSetRecord();

        if (!_params.isSilent() && _params.getLogStep() > 0 && (_index % _params.getLogStep()) == 0)
          Logger.log(
              Logger.INFO,
              EtlLogger.class,
              _dataSet.getName() + ": " + _index + EtlResource.READING_DATASET_MSG.getValue());
        _index++;
      }

      // Handle missing column
      if (record instanceof MissingCellDummyRecord) {
        MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
        row = mc.getRow();
        column = mc.getColumn();
        cellValue = "";
        fType = Types.VARCHAR;
        isNewValue = true;
      }

      // If we got something to add, do so
      if (isNewValue && row >= 0 && column >= 0) {
        FieldDef fieldDef = null;

        // fields defs
        if (row == 0) {
          fieldDef = new FieldDef();
          fieldDef.setName(cellValue != null ? cellValue.toString() : "field" + column);

          _dataSet.addField(fieldDef);
        } else if (_dataSet.getFieldCount() > column) {
          fieldDef = _dataSet.getFieldDef(column);

          if (fieldDef != null) {
            if (!Utils.isEmpty(cellValue)) {
              int type = fieldDef.getSqlDataType();

              fType = SqlUtils.getFieldType(fType, type, _types.containsKey(column));

              fieldDef.setSqlDataType(fType);
              fieldDef.setNativeDataType(
                  _driver.getType(new FieldDef(fType, "VARCHAR"), null, null));

              _types.put(column, true);
            } else cellValue = null;

            if (_dataSetRecord != null) {
              try {
                if (_params.getAddFieldValueCallback() != null)
                  _params
                      .getAddFieldValueCallback()
                      .onAddFieldValue(_dataSet, _driver, _dataSetRecord, fieldDef);
              } catch (Exception ex) {
                new RuntimeException(ex);
              }

              addValue(cellValue, _dataSetRecord, _dataSet);
            }
          }
        }
      }

      // Update column and row count
      if (row > 0) _lastRowNumber = row;

      // Handle end of row
      if (record instanceof LastCellOfRowDummyRecord) {
        // We're onto a new row
        if (_dataSetRecord != null) {
          if (_params.getMaxRows() >= 0 && _dataSet.getRecordCount() >= _params.getMaxRows()) {
            throw new RuntimeException(DataSetConnectorParams.MAX_ROWS_EXCEEDED_EXCEPTION);
          }

          boolean added = _dataSet.addRecord(_dataSetRecord);

          try {
            if (added && _params.getAddRecordCallback() != null)
              _params
                  .getAddRecordCallback()
                  .onAddRecord(_dataSet, _driver, _dataSetRecord, _index - 1);
          } catch (Exception ex) {
            new RuntimeException(ex);
          }
        }
      }
    }
Exemple #4
0
  @Override
  public void processRecord(Record record) {
    int curCol = -1;
    double curNum = Double.NaN;
    ValueString curStr = null;

    switch (record.getSid()) {
      case BoundSheetRecord.sid:
      case BOFRecord.sid:
        // we just run together multiple sheets
        break;
      case SSTRecord.sid:
        _sstRecord = (SSTRecord) record;
        break;
      case BlankRecord.sid:
        BlankRecord brec = (BlankRecord) record;

        curCol = brec.getColumn();
        curStr = _str.setTo("");
        break;
      case BoolErrRecord.sid:
        BoolErrRecord berec = (BoolErrRecord) record;

        curCol = berec.getColumn();
        curStr = _str.setTo("");
        break;

      case FormulaRecord.sid:
        FormulaRecord frec = (FormulaRecord) record;

        curCol = frec.getColumn();
        curNum = frec.getValue();

        if (Double.isNaN(curNum)) {
          // Formula result is a string
          // This is stored in the next record
          _outputNextStringRecord = true;
          _nextCol = frec.getColumn();
        }
        break;
      case StringRecord.sid:
        if (_outputNextStringRecord) {
          // String for formula
          StringRecord srec = (StringRecord) record;
          curStr = _str.setTo(srec.getString());
          curCol = _nextCol;
          _outputNextStringRecord = false;
        }
        break;
      case LabelRecord.sid:
        LabelRecord lrec = (LabelRecord) record;

        curCol = lrec.getColumn();
        curStr = _str.setTo(lrec.getValue());
        break;
      case LabelSSTRecord.sid:
        LabelSSTRecord lsrec = (LabelSSTRecord) record;
        if (_sstRecord == null) {
          System.err.println("[ExcelParser] Missing SST record");
        } else {
          curCol = lsrec.getColumn();
          curStr = _str.setTo(_sstRecord.getString(lsrec.getSSTIndex()).toString());
        }
        break;
      case NoteRecord.sid:
        System.err.println("[ExcelParser] Warning cell notes are unsupported");
        break;
      case NumberRecord.sid:
        NumberRecord numrec = (NumberRecord) record;
        curCol = numrec.getColumn();
        curNum = numrec.getValue();
        break;
      case RKRecord.sid:
        System.err.println("[ExcelParser] Warning RK records are unsupported");
        break;
      default:
        break;
    }

    // Handle missing column
    if (record instanceof MissingCellDummyRecord) {
      MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
      curCol = mc.getColumn();
      curNum = Double.NaN;
    }

    // Handle end of row
    if (record instanceof LastCellOfRowDummyRecord) {
      if (_firstRow) {
        _firstRow = false;
        String[] arr = new String[_columnNames.size()];
        arr = _columnNames.toArray(arr);
        _callback.setColumnNames(arr);
      }
      _callback.newLine();
    }

    if (curCol == -1) return;

    if (_firstRow) {
      _columnNames.add(curStr == null ? "" : curStr.toString());
    } else {
      if (curStr == null)
        if (Double.isNaN(curNum)) _callback.addInvalidCol(curCol);
        else _callback.addCol(curCol, curNum);
      else _callback.addStrCol(curCol, curStr);
    }
  }