@Override public void map(Key key) { _rows = new long[_clusters.length]; _dist = new double[_clusters.length]; assert key.home(); ValueArray va = DKV.get(_arykey).get(); AutoBuffer bits = va.getChunk(key); int rows = va.rpc(ValueArray.getChunkIndex(key)); double[] values = new double[_cols.length - 1]; ClusterDist cd = new ClusterDist(); for (int row = 0; row < rows; row++) { KMeans.datad(va, bits, row, _cols, _normalized, values); KMeans.closest(_clusters, values, cd); _rows[cd._cluster]++; _dist[cd._cluster] += cd._dist; } _arykey = null; _cols = null; _clusters = null; }
/** * Creates a new ValueArray with classes. New ValueArray is not aligned with source one * unfortunately so have to send results to each chunk owner using Atomic. */ @Override public void map(Key key) { assert key.home(); if (Job.isRunning(_job.self())) { ValueArray va = DKV.get(_arykey).get(); AutoBuffer bits = va.getChunk(key); long startRow = va.startRow(ValueArray.getChunkIndex(key)); int rows = va.rpc(ValueArray.getChunkIndex(key)); int rpc = (int) (ValueArray.CHUNK_SZ / ROW_SIZE); long chunk = ValueArray.chknum(startRow, va.numRows(), ROW_SIZE); long updatedChk = chunk; long updatedRow = startRow; double[] values = new double[_cols.length - 1]; ClusterDist cd = new ClusterDist(); int[] clusters = new int[rows]; int count = 0; for (int row = 0; row < rows; row++) { KMeans.datad(va, bits, row, _cols, _normalized, values); KMeans.closest(_clusters, values, cd); chunk = ValueArray.chknum(startRow + row, va.numRows(), ROW_SIZE); if (chunk != updatedChk) { updateClusters(clusters, count, updatedChk, va.numRows(), rpc, updatedRow); updatedChk = chunk; updatedRow = startRow + row; count = 0; } clusters[count++] = cd._cluster; } if (count > 0) updateClusters(clusters, count, chunk, va.numRows(), rpc, updatedRow); _job.updateProgress(1); } _job = null; _arykey = null; _cols = null; _clusters = null; }