Esempio n. 1
0
 @Override
 public void map(Key key) {
   _rows = new long[_clusters.length];
   _dist = new double[_clusters.length];
   assert key.home();
   ValueArray va = DKV.get(_arykey).get();
   AutoBuffer bits = va.getChunk(key);
   int rows = va.rpc(ValueArray.getChunkIndex(key));
   double[] values = new double[_cols.length - 1];
   ClusterDist cd = new ClusterDist();
   for (int row = 0; row < rows; row++) {
     KMeans.datad(va, bits, row, _cols, _normalized, values);
     KMeans.closest(_clusters, values, cd);
     _rows[cd._cluster]++;
     _dist[cd._cluster] += cd._dist;
   }
   _arykey = null;
   _cols = null;
   _clusters = null;
 }
Esempio n. 2
0
 private void updateClusters(
     int[] clusters, int count, long chunk, long numrows, int rpc, long updatedRow) {
   final int offset = (int) (updatedRow - (rpc * chunk));
   final Key chunkKey = ValueArray.getChunkKey(chunk, _job.dest());
   final int[] message;
   if (count == clusters.length) message = clusters;
   else {
     message = new int[count];
     System.arraycopy(clusters, 0, message, 0, message.length);
   }
   final int rows = ValueArray.rpc(chunk, rpc, numrows);
   new Atomic() {
     @Override
     public Value atomic(Value val) {
       assert val == null || val._key.equals(chunkKey);
       AutoBuffer b = new AutoBuffer(rows * ROW_SIZE);
       if (val != null) b._bb.put(val.memOrLoad());
       for (int i = 0; i < message.length; i++) b.put4((offset + i) * 4, message[i]);
       b.position(b.limit());
       return new Value(chunkKey, b.buf());
     }
   }.invoke(chunkKey);
 }
Esempio n. 3
0
 /**
  * Creates a new ValueArray with classes. New ValueArray is not aligned with source one
  * unfortunately so have to send results to each chunk owner using Atomic.
  */
 @Override
 public void map(Key key) {
   assert key.home();
   if (Job.isRunning(_job.self())) {
     ValueArray va = DKV.get(_arykey).get();
     AutoBuffer bits = va.getChunk(key);
     long startRow = va.startRow(ValueArray.getChunkIndex(key));
     int rows = va.rpc(ValueArray.getChunkIndex(key));
     int rpc = (int) (ValueArray.CHUNK_SZ / ROW_SIZE);
     long chunk = ValueArray.chknum(startRow, va.numRows(), ROW_SIZE);
     long updatedChk = chunk;
     long updatedRow = startRow;
     double[] values = new double[_cols.length - 1];
     ClusterDist cd = new ClusterDist();
     int[] clusters = new int[rows];
     int count = 0;
     for (int row = 0; row < rows; row++) {
       KMeans.datad(va, bits, row, _cols, _normalized, values);
       KMeans.closest(_clusters, values, cd);
       chunk = ValueArray.chknum(startRow + row, va.numRows(), ROW_SIZE);
       if (chunk != updatedChk) {
         updateClusters(clusters, count, updatedChk, va.numRows(), rpc, updatedRow);
         updatedChk = chunk;
         updatedRow = startRow + row;
         count = 0;
       }
       clusters[count++] = cd._cluster;
     }
     if (count > 0) updateClusters(clusters, count, chunk, va.numRows(), rpc, updatedRow);
     _job.updateProgress(1);
   }
   _job = null;
   _arykey = null;
   _cols = null;
   _clusters = null;
 }