/** Adds invalid value to the column. */ public void addInvalidCol(int colIdx) { ++_colIdx; if (colIdx >= _ncolumns) return; ++_invalidValues[colIdx]; if (_phase == Pass.ONE) return; switch (_colTypes[colIdx]) { case BYTE: case DBYTE: _ab.put1(-1); break; case SHORT: case DSHORT: _ab.put2((short) -1); break; case INT: _ab.put4(Integer.MIN_VALUE); break; case LONG: _ab.put8(Long.MIN_VALUE); break; case FLOAT: _ab.put4f(Float.NaN); break; case DOUBLE: _ab.put8d(Double.NaN); break; case STRINGCOL: // TODO, replace with empty space! _ab.put1(-1); break; default: assert false : "illegal case: " + _colTypes[colIdx]; } }
/** Adds string (enum) value to the column. */ public void addStrCol(int colIdx, ValueString str) { if (colIdx >= _ncolumns) return; switch (_phase) { case ONE: ++_colIdx; // If this is a yet unspecified but non-numeric column, attempt a time-parse if (_colTypes[colIdx] == UCOL) { long time = attemptTimeParse(str); if (time != Long.MIN_VALUE) _colTypes[colIdx] = TCOL; } else if (_colTypes[colIdx] == TCOL) { return; } // Now attempt to make this an Enum col Enum e = _enums[colIdx]; if (e == null || e.isKilled()) return; if (_colTypes[colIdx] == UCOL) _colTypes[colIdx] = ECOL; e.addKey(str); ++_invalidValues[ colIdx]; // invalid count in phase0 is in fact number of non-numbers (it is used for // mean computation, is recomputed in 2nd pass) break; case TWO: if (_enums[colIdx] != null) { ++_colIdx; int id = _enums[colIdx].getTokenId(str); // we do not expect any misses here assert 0 <= id && id < _enums[colIdx].size(); switch (_colTypes[colIdx]) { case BYTE: _ab.put1(id); break; case SHORT: _ab.put2((char) id); break; case INT: _ab.put4(id); break; default: assert false : "illegal case: " + _colTypes[colIdx]; } } else if (_colTypes[colIdx] == LONG) { ++_colIdx; // Times are strings with a numeric column type of LONG _ab.put8(attemptTimeParse(str)); } else { addInvalidCol(colIdx); } break; default: assert (false); } }
@SuppressWarnings("fallthrough") public void addNumCol(int colIdx, long number, int exp) { ++_colIdx; if (colIdx >= _ncolumns) return; switch (_phase) { case ONE: double d = number * pow10(exp); if (d < _min[colIdx]) _min[colIdx] = d; if (d > _max[colIdx]) _max[colIdx] = d; _mean[colIdx] += d; if (exp != 0) { if (exp < _scale[colIdx]) _scale[colIdx] = exp; if (_colTypes[colIdx] != DCOL) { if (Math.abs(number) > MAX_FLOAT_MANTISSA || exp < -35 || exp > 35) _colTypes[colIdx] = DCOL; else _colTypes[colIdx] = FCOL; } } else if (_colTypes[colIdx] < ICOL) { _colTypes[colIdx] = ICOL; } break; case TWO: switch (_colTypes[colIdx]) { case BYTE: _ab.put1((byte) (number * pow10i(exp - _scale[colIdx]) - _bases[colIdx])); break; case SHORT: _ab.put2((short) (number * pow10i(exp - _scale[colIdx]) - _bases[colIdx])); break; case INT: _ab.put4((int) (number * pow10i(exp - _scale[colIdx]) - _bases[colIdx])); break; case LONG: _ab.put8(number * pow10i(exp - _scale[colIdx])); break; case FLOAT: _ab.put4f((float) (number * pow10(exp))); break; case DOUBLE: _ab.put8d(number * pow10(exp)); break; case DBYTE: _ab.put1((short) (number * pow10i(exp - _scale[colIdx]) - _bases[colIdx])); break; case DSHORT: // scale is computed as negative in the first pass, // therefore to compute the positive exponent after scale, we add scale and the original // exponent _ab.put2((short) (number * pow10i(exp - _scale[colIdx]) - _bases[colIdx])); break; case STRINGCOL: break; } // update sigma if (!Double.isNaN(_mean[colIdx])) { d = number * pow10(exp) - _mean[colIdx]; _sigma[colIdx] += d * d; } break; default: assert (false); } }