// Read up to 'len' bytes of Value. Value should already be persisted to // disk. A racing delete can trigger a failure where we get a null return, // but no crash (although one could argue that a racing load&delete is a bug // no matter what). @Override public byte[] load(Value v) { long skip = 0; Key k = v._key; // Convert an arraylet chunk into a long-offset from the base file. if (k._kb[0] == Key.ARRAYLET_CHUNK) { skip = ValueArray.getChunkOffset(k); // The offset k = ValueArray.getArrayKey(k); // From the base file key } if (k._kb[0] == Key.DVEC) { skip = water.fvec.NFSFileVec.chunkOffset(k); // The offset } try { FileInputStream s = null; try { s = new FileInputStream(getFileForKey(k)); FileChannel fc = s.getChannel(); fc.position(skip); AutoBuffer ab = new AutoBuffer(fc, true, Value.NFS); byte[] b = ab.getA1(v._max); ab.close(); assert v.isPersisted(); return b; } finally { if (s != null) s.close(); } } catch (IOException e) { // Broken disk / short-file??? H2O.ignore(e); return null; } }
/** * Stores the stream to its chunk using the atomic union. After the data from the stream is * stored, its memory is freed up. */ public void store() { assert _ab.eof(); Key k = ValueArray.getChunkKey(_chunkIndex, _resultKey); AtomicUnion u = new AtomicUnion(_ab.bufClose(), _chunkOffset); alsoBlockFor(u.fork(k)); _ab = null; // free mem }
public static Compression guessCompressionMethod(byte [] bits){ AutoBuffer ab = new AutoBuffer(bits); // Look for ZIP magic if( bits.length > ZipFile.LOCHDR && ab.get4(0) == ZipFile.LOCSIG ) return Compression.ZIP; if( bits.length > 2 && ab.get2(0) == GZIPInputStream.GZIP_MAGIC ) return Compression.GZIP; return Compression.NONE; }
/** Adds string (enum) value to the column. */ public void addStrCol(int colIdx, ValueString str) { if (colIdx >= _ncolumns) return; switch (_phase) { case ONE: ++_colIdx; // If this is a yet unspecified but non-numeric column, attempt a time-parse if (_colTypes[colIdx] == UCOL) { long time = attemptTimeParse(str); if (time != Long.MIN_VALUE) _colTypes[colIdx] = TCOL; } else if (_colTypes[colIdx] == TCOL) { return; } // Now attempt to make this an Enum col Enum e = _enums[colIdx]; if (e == null || e.isKilled()) return; if (_colTypes[colIdx] == UCOL) _colTypes[colIdx] = ECOL; e.addKey(str); ++_invalidValues[ colIdx]; // invalid count in phase0 is in fact number of non-numbers (it is used for // mean computation, is recomputed in 2nd pass) break; case TWO: if (_enums[colIdx] != null) { ++_colIdx; int id = _enums[colIdx].getTokenId(str); // we do not expect any misses here assert 0 <= id && id < _enums[colIdx].size(); switch (_colTypes[colIdx]) { case BYTE: _ab.put1(id); break; case SHORT: _ab.put2((char) id); break; case INT: _ab.put4(id); break; default: assert false : "illegal case: " + _colTypes[colIdx]; } } else if (_colTypes[colIdx] == LONG) { ++_colIdx; // Times are strings with a numeric column type of LONG _ab.put8(attemptTimeParse(str)); } else { addInvalidCol(colIdx); } break; default: assert (false); } }
/** Adds invalid value to the column. */ public void addInvalidCol(int colIdx) { ++_colIdx; if (colIdx >= _ncolumns) return; ++_invalidValues[colIdx]; if (_phase == Pass.ONE) return; switch (_colTypes[colIdx]) { case BYTE: case DBYTE: _ab.put1(-1); break; case SHORT: case DSHORT: _ab.put2((short) -1); break; case INT: _ab.put4(Integer.MIN_VALUE); break; case LONG: _ab.put8(Long.MIN_VALUE); break; case FLOAT: _ab.put4f(Float.NaN); break; case DOUBLE: _ab.put8d(Double.NaN); break; case STRINGCOL: // TODO, replace with empty space! _ab.put1(-1); break; default: assert false : "illegal case: " + _colTypes[colIdx]; } }
@Override public C2SChunk read(AutoBuffer bb) { _mem = bb.bufClose(); _start = -1; _len = (_mem.length - OFF) >> 1; _scale = UDP.get8d(_mem, 0); _bias = UDP.get4(_mem, 8); return this; }
/** Advances to new line. In phase two it also must make sure that the */ public void newLine() { ++_myrows; if (_phase == Pass.TWO) { while (_colIdx < _ncolumns) addInvalidCol(_colIdx); _colIdx = 0; // if we are at the end of current stream, move to the next one if (_ab.eof()) { _outputStreams2[_outputIdx].store(); ++_outputIdx; if (_outputIdx < _outputStreams2.length) { _ab = _outputStreams2[_outputIdx].initialize(); } else { _ab = null; // just to be sure we throw a NPE if there is a problem } } } }
@Override public AutoBuffer write(AutoBuffer bb) { bb.put4(size()); for(Map.Entry<K, V> e:entrySet())bb.put(e.getKey()).put(e.getValue()); return bb; }
@Override public IcedArrayList<T> read(AutoBuffer bb) { int n = bb.get4(); for(int i = 0; i < n; ++i) add(bb.<T>get()); return this; }
@Override public AutoBuffer write(AutoBuffer bb) { bb.put4(size()); for(T t:this) bb.put(t); return bb; }
@Override protected Keyed readAll_impl(AutoBuffer ab, Futures fs) { ab.getKey(_output._u_key, fs); ab.getKey(_output._v_key, fs); return super.readAll_impl(ab, fs); }
@Override protected Keyed readAll_impl(AutoBuffer ab, Futures fs) { ab.getKey(_output._init_key, fs); ab.getKey(_output._representation_key, fs); return super.readAll_impl(ab, fs); }
/** Write out K/V pairs */ @Override protected AutoBuffer writeAll_impl(AutoBuffer ab) { ab.putKey(_output._init_key); ab.putKey(_output._representation_key); return super.writeAll_impl(ab); }
@Override public IcedHashMap<K,V> read(AutoBuffer bb) { int n = bb.get4(); for(int i = 0; i < n; ++i) put(bb.<K>get(),bb.<V>get()); return this; }
@SuppressWarnings("fallthrough") public void addNumCol(int colIdx, long number, int exp) { ++_colIdx; if (colIdx >= _ncolumns) return; switch (_phase) { case ONE: double d = number * pow10(exp); if (d < _min[colIdx]) _min[colIdx] = d; if (d > _max[colIdx]) _max[colIdx] = d; _mean[colIdx] += d; if (exp != 0) { if (exp < _scale[colIdx]) _scale[colIdx] = exp; if (_colTypes[colIdx] != DCOL) { if (Math.abs(number) > MAX_FLOAT_MANTISSA || exp < -35 || exp > 35) _colTypes[colIdx] = DCOL; else _colTypes[colIdx] = FCOL; } } else if (_colTypes[colIdx] < ICOL) { _colTypes[colIdx] = ICOL; } break; case TWO: switch (_colTypes[colIdx]) { case BYTE: _ab.put1((byte) (number * pow10i(exp - _scale[colIdx]) - _bases[colIdx])); break; case SHORT: _ab.put2((short) (number * pow10i(exp - _scale[colIdx]) - _bases[colIdx])); break; case INT: _ab.put4((int) (number * pow10i(exp - _scale[colIdx]) - _bases[colIdx])); break; case LONG: _ab.put8(number * pow10i(exp - _scale[colIdx])); break; case FLOAT: _ab.put4f((float) (number * pow10(exp))); break; case DOUBLE: _ab.put8d(number * pow10(exp)); break; case DBYTE: _ab.put1((short) (number * pow10i(exp - _scale[colIdx]) - _bases[colIdx])); break; case DSHORT: // scale is computed as negative in the first pass, // therefore to compute the positive exponent after scale, we add scale and the original // exponent _ab.put2((short) (number * pow10i(exp - _scale[colIdx]) - _bases[colIdx])); break; case STRINGCOL: break; } // update sigma if (!Double.isNaN(_mean[colIdx])) { d = number * pow10(exp) - _mean[colIdx]; _sigma[colIdx] += d * d; } break; default: assert (false); } }
@Override public AutoBuffer write(AutoBuffer bb) { return bb.putA1(_mem, _mem.length); }
// TODO: Drop this writeJSON_impl and use the default one. // TODO: Pull out the help text & metadata into the ParameterSchema for the front-end to display. public final AutoBuffer writeJSON_impl(AutoBuffer ab) { ab.putJSON("job", job); ab.put1(','); ab.putJSONStr("algo", algo); ab.put1(','); ab.putJSONStr("algo_full_name", algo_full_name); ab.put1(','); ab.putJSONAEnum("can_build", can_build); ab.put1(','); ab.putJSONEnum("visibility", visibility); ab.put1(','); ab.putJSONA("messages", messages); ab.put1(','); ab.putJSON4("error_count", error_count); ab.put1(','); // Builds ModelParameterSchemaV2 objects for each field, and then calls writeJSON on the array ModelParametersSchemaV3.writeParametersJSON( ab, parameters, createParametersSchema().fillFromImpl((Model.Parameters) parameters.createImpl())); return ab; }
// deep clone all weights/biases DeepLearningModelInfo deep_clone() { AutoBuffer ab = new AutoBuffer(); this.write(ab); ab.flipForReading(); return (DeepLearningModelInfo) new DeepLearningModelInfo().read(ab); }
public DataInfo deep_clone() { AutoBuffer ab = new AutoBuffer(); this.write(ab); ab.flipForReading(); return new DataInfo().read(ab); }
// Insert just the predictions: a single byte/short if we are predicting a // single class, or else the full distribution. @Override protected AutoBuffer compress(AutoBuffer ab) { assert !Double.isNaN(_pred); return ab.put4f((float) _pred); }
/** Write out K/V pairs */ @Override protected AutoBuffer writeAll_impl(AutoBuffer ab) { ab.putKey(_output._u_key); ab.putKey(_output._v_key); return super.writeAll_impl(ab); }