Ejemplo n.º 1
0
 // Compute a compressed double buffer
 private Chunk chunkD() {
   HashMap<Long, Byte> hs = new HashMap<>(CUDChunk.MAX_UNIQUES);
   Byte dummy = 0;
   final byte[] bs = MemoryManager.malloc1(_len * 8, true);
   int j = 0;
   boolean fitsInUnique = true;
   for (int i = 0; i < _len; ++i) {
     double d = 0;
     if (_id == null || _id.length == 0 || (j < _id.length && _id[j] == i)) {
       d =
           _ds != null
               ? _ds[j]
               : (isNA2(j) || isCategorical(j)) ? Double.NaN : _ls[j] * PrettyPrint.pow10(_xs[j]);
       ++j;
     }
     if (fitsInUnique) {
       if (hs.size() < CUDChunk.MAX_UNIQUES) // still got space
       hs.put(
             Double.doubleToLongBits(d),
             dummy); // store doubles as longs to avoid NaN comparison issues during extraction
       else
         fitsInUnique =
             (hs.size() == CUDChunk.MAX_UNIQUES)
                 && // full, but might not need more space because of repeats
                 hs.containsKey(Double.doubleToLongBits(d));
     }
     UnsafeUtils.set8d(bs, 8 * i, d);
   }
   assert j == sparseLen() : "j = " + j + ", _len = " + sparseLen();
   if (fitsInUnique && CUDChunk.computeByteSize(hs.size(), len()) < 0.8 * bs.length)
     return new CUDChunk(bs, hs, len());
   else return new C8DChunk(bs);
 }
Ejemplo n.º 2
0
 // Compute a sparse float buffer
 private byte[] bufD(final int valsz) {
   int log = 0;
   while ((1 << log) < valsz) ++log;
   assert (1 << log) == valsz;
   final int ridsz = _len >= 65535 ? 4 : 2;
   final int elmsz = ridsz + valsz;
   int off = CXDChunk._OFF;
   byte[] buf = MemoryManager.malloc1(off + sparseLen() * elmsz, true);
   for (int i = 0; i < sparseLen(); i++, off += elmsz) {
     if (ridsz == 2) UnsafeUtils.set2(buf, off, (short) _id[i]);
     else UnsafeUtils.set4(buf, off, _id[i]);
     final double dval =
         _ds == null ? isNA2(i) ? Double.NaN : _ls[i] * PrettyPrint.pow10(_xs[i]) : _ds[i];
     switch (valsz) {
       case 4:
         UnsafeUtils.set4f(buf, off + ridsz, (float) dval);
         break;
       case 8:
         UnsafeUtils.set8d(buf, off + ridsz, dval);
         break;
       default:
         throw H2O.fail();
     }
   }
   assert off == buf.length;
   return buf;
 }
Ejemplo n.º 3
0
 Col(String s, int rows, boolean isClass) {
   _name = s;
   _isClass = isClass;
   _rawB = MemoryManager.malloc1(rows);
   _isFloat = false;
   _isByte = true;
   _colBinLimit = 0;
 }
Ejemplo n.º 4
0
 @Override
 public byte[] atomic(byte[] bits1) {
   byte[] mem = DKV.get(_key).get();
   int len = Math.max(_dst_off + mem.length, bits1 == null ? 0 : bits1.length);
   byte[] bits2 = MemoryManager.malloc1(len);
   if (bits1 != null) System.arraycopy(bits1, 0, bits2, 0, bits1.length);
   System.arraycopy(mem, 0, bits2, _dst_off, mem.length);
   return bits2;
 }
Ejemplo n.º 5
0
 private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) {
   try {
     if (fs == null) return;
     for (FileStatus file : fs.listStatus(p)) {
       Path pfs = file.getPath();
       if (file.isDir()) {
         addFolder(fs, pfs, succeeded, failed);
       } else {
         Key k = Key.make(pfs.toString());
         long size = file.getLen();
         Value val = null;
         if (pfs.getName().endsWith(Extensions.JSON)) {
           JsonParser parser = new JsonParser();
           JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject();
           JsonElement v = json.get(Constants.VERSION);
           if (v == null) throw new RuntimeException("Missing version");
           JsonElement type = json.get(Constants.TYPE);
           if (type == null) throw new RuntimeException("Missing type");
           Class c = Class.forName(type.getAsString());
           OldModel model = (OldModel) c.newInstance();
           model.fromJson(json);
         } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file?
           FSDataInputStream s = fs.open(pfs);
           int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg
           byte[] mem = MemoryManager.malloc1(sz);
           s.readFully(mem);
           // Convert to a ValueArray (hope it fits in 1Meg!)
           ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem));
           val = new Value(k, ary, Value.HDFS);
         } else if (size >= 2 * ValueArray.CHUNK_SZ) {
           val =
               new Value(
                   k,
                   new ValueArray(k, size),
                   Value.HDFS); // ValueArray byte wrapper over a large file
         } else {
           val = new Value(k, (int) size, Value.HDFS); // Plain Value
           val.setdsk();
         }
         DKV.put(k, val);
         Log.info("PersistHdfs: DKV.put(" + k + ")");
         JsonObject o = new JsonObject();
         o.addProperty(Constants.KEY, k.toString());
         o.addProperty(Constants.FILE, pfs.toString());
         o.addProperty(Constants.VALUE_SIZE, file.getLen());
         succeeded.add(o);
       }
     }
   } catch (Exception e) {
     Log.err(e);
     JsonObject o = new JsonObject();
     o.addProperty(Constants.FILE, p.toString());
     o.addProperty(Constants.ERROR, e.getMessage());
     failed.add(o);
   }
 }
Ejemplo n.º 6
0
 private void append_ss(String str) {
   if (_ss == null) {
     _ss = MemoryManager.malloc1((str.length() + 1) * 4);
   }
   while (_ss.length < (_sslen + str.length() + 1)) {
     _ss = MemoryManager.arrayCopyOf(_ss, _ss.length << 1);
   }
   for (byte b : str.getBytes()) _ss[_sslen++] = b;
   _ss[_sslen++] = (byte) 0; // for trailing 0;
 }
Ejemplo n.º 7
0
 // Compute a compressed double buffer
 private Chunk chunkD() {
   assert _len2 == _len;
   final byte[] bs = MemoryManager.malloc1(_len * 8);
   for (int i = 0; i < _len; ++i)
     UDP.set8d(
         bs,
         8 * i,
         _ds != null
             ? _ds[i]
             : (isNA(i) || isEnum(i)) ? Double.NaN : _ls[i] * DParseTask.pow10(_xs[i]));
   return new C8DChunk(bs);
 }
Ejemplo n.º 8
0
  private void append_ss(String str) {
    byte[] bytes = str.getBytes(Charsets.UTF_8);

    // Allocate memory if necessary
    if (_ss == null) _ss = MemoryManager.malloc1((bytes.length + 1) * 4);
    while (_ss.length < (_sslen + bytes.length + 1))
      _ss = MemoryManager.arrayCopyOf(_ss, _ss.length << 1);

    // Copy bytes to _ss
    for (byte b : bytes) _ss[_sslen++] = b;
    _ss[_sslen++] = (byte) 0; // for trailing 0;
  }
Ejemplo n.º 9
0
  private void append_ss(BufferedString str) {
    int strlen = str.length();
    int off = str.getOffset();
    byte b[] = str.getBuffer();

    if (_ss == null) {
      _ss = MemoryManager.malloc1((strlen + 1) * 4);
    }
    while (_ss.length < (_sslen + strlen + 1)) {
      _ss = MemoryManager.arrayCopyOf(_ss, _ss.length << 1);
    }
    for (int i = off; i < off + strlen; i++) _ss[_sslen++] = b[i];
    _ss[_sslen++] = (byte) 0; // for trailing 0;
  }
Ejemplo n.º 10
0
 @Override
 public byte[] load(final Value v) {
   final byte[] b = MemoryManager.malloc1(v._max);
   long skip = 0;
   Key k = v._key;
   final Path p;
   if (_iceRoot != null) {
     p = new Path(_iceRoot, getIceName(v));
   } else {
     // Convert an arraylet chunk into a long-offset from the base file.
     if (k._kb[0] == Key.ARRAYLET_CHUNK) {
       skip = ValueArray.getChunkOffset(k); // The offset
       k = ValueArray.getArrayKey(k); // From the base file key
       if (k.toString().endsWith(Extensions.HEX)) { // Hex file?
         int value_len = DKV.get(k).memOrLoad().length; // How long is the ValueArray header?
         skip += value_len;
       }
     }
     p = new Path(k.toString());
   }
   final long skip_ = skip;
   run(
       new Callable() {
         @Override
         public Object call() throws Exception {
           FileSystem fs = FileSystem.get(p.toUri(), CONF);
           FSDataInputStream s = null;
           try {
             s = fs.open(p);
             // NOTE:
             // The following line degrades performance of HDFS load from S3 API:
             // s.readFully(skip,b,0,b.length);
             // Google API's simple seek has better performance
             // Load of 300MB file via Google API ~ 14sec, via s.readFully ~ 5min (under the same
             // condition)
             ByteStreams.skipFully(s, skip_);
             ByteStreams.readFully(s, b);
             assert v.isPersisted();
           } finally {
             Utils.close(s);
           }
           return null;
         }
       },
       true,
       v._max);
   return b;
 }
Ejemplo n.º 11
0
 // Compute a compressed UUID buffer
 private Chunk chunkUUID() {
   final byte[] bs = MemoryManager.malloc1(_len * 16, true);
   int j = 0;
   for (int i = 0; i < _len; ++i) {
     long lo = 0, hi = 0;
     if (_id == null || _id.length == 0 || (j < _id.length && _id[j] == i)) {
       lo = _ls[j];
       hi = Double.doubleToRawLongBits(_ds[j++]);
       if (_xs != null && _xs[j] == Integer.MAX_VALUE) { // NA?
         lo = Long.MIN_VALUE;
         hi = 0; // Canonical NA value
       }
     }
     UnsafeUtils.set8(bs, 16 * i, lo);
     UnsafeUtils.set8(bs, 16 * i + 8, hi);
   }
   assert j == sparseLen() : "j = " + j + ", _len = " + sparseLen();
   return new C16Chunk(bs);
 }
Ejemplo n.º 12
0
 // Compute a sparse integer buffer
 private byte[] bufS(final int valsz) {
   int log = 0;
   while ((1 << log) < valsz) ++log;
   assert valsz == 0 || (1 << log) == valsz;
   final int ridsz = _len >= 65535 ? 4 : 2;
   final int elmsz = ridsz + valsz;
   int off = CXIChunk._OFF;
   byte[] buf = MemoryManager.malloc1(off + sparseLen() * elmsz, true);
   for (int i = 0; i < sparseLen(); i++, off += elmsz) {
     if (ridsz == 2) UnsafeUtils.set2(buf, off, (short) _id[i]);
     else UnsafeUtils.set4(buf, off, _id[i]);
     if (valsz == 0) {
       assert _xs[i] == 0 && _ls[i] == 1;
       continue;
     }
     assert _xs[i] == Integer.MIN_VALUE || _xs[i] >= 0
         : "unexpected exponent " + _xs[i]; // assert we have int or NA
     final long lval =
         _xs[i] == Integer.MIN_VALUE ? NAS[log] : _ls[i] * PrettyPrint.pow10i(_xs[i]);
     switch (valsz) {
       case 1:
         buf[off + ridsz] = (byte) lval;
         break;
       case 2:
         short sval = (short) lval;
         UnsafeUtils.set2(buf, off + ridsz, sval);
         break;
       case 4:
         int ival = (int) lval;
         UnsafeUtils.set4(buf, off + ridsz, ival);
         break;
       case 8:
         UnsafeUtils.set8(buf, off + ridsz, lval);
         break;
       default:
         throw H2O.fail();
     }
   }
   assert off == buf.length;
   return buf;
 }
Ejemplo n.º 13
0
 @Override
 public byte[] load(final Value v) {
   final byte[] b = MemoryManager.malloc1(v._max);
   long skip = 0;
   Key k = v._key;
   if (k._kb[0] == Key.ARRAYLET_CHUNK) {
     skip = ValueArray.getChunkOffset(k); // The offset
     k = ValueArray.getArrayKey(k); // From the base file key
   } else if (k._kb[0] == Key.DVEC) {
     skip = water.fvec.NFSFileVec.chunkOffset(k); // The offset
   }
   final Path p =
       _iceRoot == null ? new Path(getPathForKey(k)) : new Path(_iceRoot, getIceName(v));
   final long skip_ = skip;
   run(
       new Callable() {
         @Override
         public Object call() throws Exception {
           FileSystem fs = FileSystem.get(p.toUri(), CONF);
           FSDataInputStream s = null;
           try {
             s = fs.open(p);
             // NOTE:
             // The following line degrades performance of HDFS load from S3 API:
             // s.readFully(skip,b,0,b.length);
             // Google API's simple seek has better performance
             // Load of 300MB file via Google API ~ 14sec, via s.readFully ~ 5min (under the same
             // condition)
             ByteStreams.skipFully(s, skip_);
             ByteStreams.readFully(s, b);
             assert v.isPersisted();
           } finally {
             Utils.close(s);
           }
           return null;
         }
       },
       true,
       v._max);
   return b;
 }