Beispiel #1
0
 @Override
 public Value lazyArrayChunk(final Key key) {
   final Key arykey = ValueArray.getArrayKey(key); // From the base file key
   final long off = (_iceRoot != null) ? 0 : ValueArray.getChunkOffset(key); // The offset
   final Path p =
       (_iceRoot != null)
           ? new Path(_iceRoot, getIceName(key, (byte) 'V'))
           : new Path(arykey.toString());
   final Size sz = new Size();
   run(
       new Callable() {
         @Override
         public Object call() throws Exception {
           FileSystem fs = FileSystem.get(p.toUri(), CONF);
           long rem = fs.getFileStatus(p).getLen() - off;
           sz._value = (rem > ValueArray.CHUNK_SZ * 2) ? (int) ValueArray.CHUNK_SZ : (int) rem;
           return null;
         }
       },
       true,
       0);
   Value val = new Value(key, sz._value, Value.HDFS);
   val.setdsk(); // But its already on disk.
   return val;
 }
Beispiel #2
0
 public static Response redirect(JsonObject fromPageResponse, Key rfModelKey) {
   RFModel rfModel = DKV.get(rfModelKey).get();
   ValueArray data = DKV.get(rfModel._dataKey).get();
   return redirect(
       fromPageResponse,
       null,
       rfModelKey,
       rfModel._dataKey,
       rfModel._totalTrees,
       data.numCols() - 1,
       null,
       true,
       false);
 }
Beispiel #3
0
 @Override
 public byte[] load(final Value v) {
   final byte[] b = MemoryManager.malloc1(v._max);
   long skip = 0;
   Key k = v._key;
   final Path p;
   if (_iceRoot != null) {
     p = new Path(_iceRoot, getIceName(v));
   } else {
     // Convert an arraylet chunk into a long-offset from the base file.
     if (k._kb[0] == Key.ARRAYLET_CHUNK) {
       skip = ValueArray.getChunkOffset(k); // The offset
       k = ValueArray.getArrayKey(k); // From the base file key
       if (k.toString().endsWith(Extensions.HEX)) { // Hex file?
         int value_len = DKV.get(k).memOrLoad().length; // How long is the ValueArray header?
         skip += value_len;
       }
     }
     p = new Path(k.toString());
   }
   final long skip_ = skip;
   run(
       new Callable() {
         @Override
         public Object call() throws Exception {
           FileSystem fs = FileSystem.get(p.toUri(), CONF);
           FSDataInputStream s = null;
           try {
             s = fs.open(p);
             // NOTE:
             // The following line degrades performance of HDFS load from S3 API:
             // s.readFully(skip,b,0,b.length);
             // Google API's simple seek has better performance
             // Load of 300MB file via Google API ~ 14sec, via s.readFully ~ 5min (under the same
             // condition)
             ByteStreams.skipFully(s, skip_);
             ByteStreams.readFully(s, b);
             assert v.isPersisted();
           } finally {
             Utils.close(s);
           }
           return null;
         }
       },
       true,
       v._max);
   return b;
 }
Beispiel #4
0
 public static KMeansScore score(KMeansModel model, ValueArray ary) {
   KMeansScore kms = new KMeansScore();
   kms._arykey = ary._key;
   kms._cols = model.columnMapping(ary.colNames());
   kms._clusters = model._clusters;
   kms._normalized = model._normalized;
   kms.invoke(ary._key);
   return kms;
 }
Beispiel #5
0
 @Override
 public void map(Key key) {
   _rows = new long[_clusters.length];
   _dist = new double[_clusters.length];
   assert key.home();
   ValueArray va = DKV.get(_arykey).get();
   AutoBuffer bits = va.getChunk(key);
   int rows = va.rpc(ValueArray.getChunkIndex(key));
   double[] values = new double[_cols.length - 1];
   ClusterDist cd = new ClusterDist();
   for (int row = 0; row < rows; row++) {
     KMeans.datad(va, bits, row, _cols, _normalized, values);
     KMeans.closest(_clusters, values, cd);
     _rows[cd._cluster]++;
     _dist[cd._cluster] += cd._dist;
   }
   _arykey = null;
   _cols = null;
   _clusters = null;
 }
Beispiel #6
0
 @Override
 public byte[] load(final Value v) {
   final byte[] b = MemoryManager.malloc1(v._max);
   long skip = 0;
   Key k = v._key;
   if (k._kb[0] == Key.ARRAYLET_CHUNK) {
     skip = ValueArray.getChunkOffset(k); // The offset
     k = ValueArray.getArrayKey(k); // From the base file key
   } else if (k._kb[0] == Key.DVEC) {
     skip = water.fvec.NFSFileVec.chunkOffset(k); // The offset
   }
   final Path p =
       _iceRoot == null ? new Path(getPathForKey(k)) : new Path(_iceRoot, getIceName(v));
   final long skip_ = skip;
   run(
       new Callable() {
         @Override
         public Object call() throws Exception {
           FileSystem fs = FileSystem.get(p.toUri(), CONF);
           FSDataInputStream s = null;
           try {
             s = fs.open(p);
             // NOTE:
             // The following line degrades performance of HDFS load from S3 API:
             // s.readFully(skip,b,0,b.length);
             // Google API's simple seek has better performance
             // Load of 300MB file via Google API ~ 14sec, via s.readFully ~ 5min (under the same
             // condition)
             ByteStreams.skipFully(s, skip_);
             ByteStreams.readFully(s, b);
             assert v.isPersisted();
           } finally {
             Utils.close(s);
           }
           return null;
         }
       },
       true,
       v._max);
   return b;
 }
Beispiel #7
0
 private void updateClusters(
     int[] clusters, int count, long chunk, long numrows, int rpc, long updatedRow) {
   final int offset = (int) (updatedRow - (rpc * chunk));
   final Key chunkKey = ValueArray.getChunkKey(chunk, _job.dest());
   final int[] message;
   if (count == clusters.length) message = clusters;
   else {
     message = new int[count];
     System.arraycopy(clusters, 0, message, 0, message.length);
   }
   final int rows = ValueArray.rpc(chunk, rpc, numrows);
   new Atomic() {
     @Override
     public Value atomic(Value val) {
       assert val == null || val._key.equals(chunkKey);
       AutoBuffer b = new AutoBuffer(rows * ROW_SIZE);
       if (val != null) b._bb.put(val.memOrLoad());
       for (int i = 0; i < message.length; i++) b.put4((offset + i) * 4, message[i]);
       b.position(b.limit());
       return new Value(chunkKey, b.buf());
     }
   }.invoke(chunkKey);
 }
Beispiel #8
0
    public static Job run(final Key dest, final KMeansModel model, final ValueArray ary) {
      final ChunkProgressJob job = new ChunkProgressJob(ary.chunks(), dest);
      new ValueArray(dest, 0).delete_and_lock(job.self());
      final H2OCountedCompleter fjtask =
          new H2OCountedCompleter() {
            @Override
            public void compute2() {
              KMeansApply kms = new KMeansApply();
              kms._job = job;
              kms._arykey = ary._key;
              kms._cols = model.columnMapping(ary.colNames());
              kms._clusters = model._clusters;
              kms._normalized = model._normalized;
              kms.invoke(ary._key);

              Column c = new Column();
              c._name = Constants.RESPONSE;
              c._size = ROW_SIZE;
              c._scale = 1;
              c._min = 0;
              c._max = model._clusters.length;
              c._mean = Double.NaN;
              c._sigma = Double.NaN;
              c._domain = null;
              c._n = ary.numRows();
              ValueArray res = new ValueArray(dest, ary.numRows(), c._size, new Column[] {c});
              res.unlock(job.self());
              job.remove();
              tryComplete();
            }

            @Override
            public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) {
              job.onException(ex);
              return super.onExceptionalCompletion(ex, caller);
            }
          };
      job.start(fjtask);
      H2O.submitTask(fjtask);
      return job;
    }
Beispiel #9
0
 /**
  * Creates a new ValueArray with classes. New ValueArray is not aligned with source one
  * unfortunately so have to send results to each chunk owner using Atomic.
  */
 @Override
 public void map(Key key) {
   assert key.home();
   if (Job.isRunning(_job.self())) {
     ValueArray va = DKV.get(_arykey).get();
     AutoBuffer bits = va.getChunk(key);
     long startRow = va.startRow(ValueArray.getChunkIndex(key));
     int rows = va.rpc(ValueArray.getChunkIndex(key));
     int rpc = (int) (ValueArray.CHUNK_SZ / ROW_SIZE);
     long chunk = ValueArray.chknum(startRow, va.numRows(), ROW_SIZE);
     long updatedChk = chunk;
     long updatedRow = startRow;
     double[] values = new double[_cols.length - 1];
     ClusterDist cd = new ClusterDist();
     int[] clusters = new int[rows];
     int count = 0;
     for (int row = 0; row < rows; row++) {
       KMeans.datad(va, bits, row, _cols, _normalized, values);
       KMeans.closest(_clusters, values, cd);
       chunk = ValueArray.chknum(startRow + row, va.numRows(), ROW_SIZE);
       if (chunk != updatedChk) {
         updateClusters(clusters, count, updatedChk, va.numRows(), rpc, updatedRow);
         updatedChk = chunk;
         updatedRow = startRow + row;
         count = 0;
       }
       clusters[count++] = cd._cluster;
     }
     if (count > 0) updateClusters(clusters, count, chunk, va.numRows(), rpc, updatedRow);
     _job.updateProgress(1);
   }
   _job = null;
   _arykey = null;
   _cols = null;
   _clusters = null;
 }