protected void testKeyValues( Key k, double n1, double n2, double n3, double nx3, double nx2, double nx1) { ValueArray v = ValueArray.value(k); assertEquals(v.datad(0, 0), n1, 0.0); assertEquals(v.datad(1, 0), n2, 0.0); assertEquals(v.datad(2, 0), n3, 0.0); assertEquals(v.datad(v.numRows() - 3, 0), nx3, 0.0); assertEquals(v.datad(v.numRows() - 2, 0), nx2, 0.0); assertEquals(v.datad(v.numRows() - 1, 0), nx1, 0.0); }
protected void testScalarExpression(String expr, double result) { Key key = executeExpression(expr); ValueArray va = ValueArray.value(key); assertEquals(va.numRows(), 1); assertEquals(va.numCols(), 1); assertEquals(result, va.datad(0, 0), 0.0); UKV.remove(key); }
// Test kaggle/creditsample-test data @org.junit.Test public void kaggle_credit() { Key okey = loadAndParseFile("credit.hex", "smalldata/kaggle/creditsample-training.csv.gz"); UKV.remove(Key.make("smalldata/kaggle/creditsample-training.csv.gz_UNZIPPED")); UKV.remove(Key.make("smalldata\\kaggle\\creditsample-training.csv.gz_UNZIPPED")); ValueArray val = DKV.get(okey).get(); // Check parsed dataset final int n = new int[] {4, 2, 1}[ValueArray.LOG_CHK - 20]; assertEquals("Number of chunks", n, val.chunks()); assertEquals("Number of rows", 150000, val.numRows()); assertEquals("Number of cols", 12, val.numCols()); // setup default values for DRF int ntrees = 3; int depth = 30; int gini = StatType.GINI.ordinal(); int seed = 42; StatType statType = StatType.values()[gini]; final int cols[] = new int[] {0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 1}; // ignore column 6, classify column 1 // Start the distributed Random Forest final Key modelKey = Key.make("model"); DRFJob result = hex.rf.DRF.execute( modelKey, cols, val, ntrees, depth, 1024, statType, seed, true, null, -1, Sampling.Strategy.RANDOM, 1.0f, null, 0, 0, false); // Wait for completion on all nodes RFModel model = result.get(); assertEquals("Number of classes", 2, model.classes()); assertEquals("Number of trees", ntrees, model.size()); model.deleteKeys(); UKV.remove(modelKey); UKV.remove(okey); }
/** * Creates a new ValueArray with classes. New ValueArray is not aligned with source one * unfortunately so have to send results to each chunk owner using Atomic. */ @Override public void map(Key key) { assert key.home(); if (Job.isRunning(_job.self())) { ValueArray va = DKV.get(_arykey).get(); AutoBuffer bits = va.getChunk(key); long startRow = va.startRow(ValueArray.getChunkIndex(key)); int rows = va.rpc(ValueArray.getChunkIndex(key)); int rpc = (int) (ValueArray.CHUNK_SZ / ROW_SIZE); long chunk = ValueArray.chknum(startRow, va.numRows(), ROW_SIZE); long updatedChk = chunk; long updatedRow = startRow; double[] values = new double[_cols.length - 1]; ClusterDist cd = new ClusterDist(); int[] clusters = new int[rows]; int count = 0; for (int row = 0; row < rows; row++) { KMeans.datad(va, bits, row, _cols, _normalized, values); KMeans.closest(_clusters, values, cd); chunk = ValueArray.chknum(startRow + row, va.numRows(), ROW_SIZE); if (chunk != updatedChk) { updateClusters(clusters, count, updatedChk, va.numRows(), rpc, updatedRow); updatedChk = chunk; updatedRow = startRow + row; count = 0; } clusters[count++] = cd._cluster; } if (count > 0) updateClusters(clusters, count, chunk, va.numRows(), rpc, updatedRow); _job.updateProgress(1); } _job = null; _arykey = null; _cols = null; _clusters = null; }
public void testDataFrameStructure(Key k, int rows, int cols) { ValueArray v = ValueArray.value(k); assertEquals(v.numRows(), rows); assertEquals(v.numCols(), cols); }