Esempio n. 1
0
 protected void testKeyValues(
     Key k, double n1, double n2, double n3, double nx3, double nx2, double nx1) {
   ValueArray v = ValueArray.value(k);
   assertEquals(v.datad(0, 0), n1, 0.0);
   assertEquals(v.datad(1, 0), n2, 0.0);
   assertEquals(v.datad(2, 0), n3, 0.0);
   assertEquals(v.datad(v.numRows() - 3, 0), nx3, 0.0);
   assertEquals(v.datad(v.numRows() - 2, 0), nx2, 0.0);
   assertEquals(v.datad(v.numRows() - 1, 0), nx1, 0.0);
 }
Esempio n. 2
0
 protected void testScalarExpression(String expr, double result) {
   Key key = executeExpression(expr);
   ValueArray va = ValueArray.value(key);
   assertEquals(va.numRows(), 1);
   assertEquals(va.numCols(), 1);
   assertEquals(result, va.datad(0, 0), 0.0);
   UKV.remove(key);
 }
Esempio n. 3
0
  // Test kaggle/creditsample-test data
  @org.junit.Test
  public void kaggle_credit() {
    Key okey = loadAndParseFile("credit.hex", "smalldata/kaggle/creditsample-training.csv.gz");
    UKV.remove(Key.make("smalldata/kaggle/creditsample-training.csv.gz_UNZIPPED"));
    UKV.remove(Key.make("smalldata\\kaggle\\creditsample-training.csv.gz_UNZIPPED"));
    ValueArray val = DKV.get(okey).get();

    // Check parsed dataset
    final int n = new int[] {4, 2, 1}[ValueArray.LOG_CHK - 20];
    assertEquals("Number of chunks", n, val.chunks());
    assertEquals("Number of rows", 150000, val.numRows());
    assertEquals("Number of cols", 12, val.numCols());

    // setup default values for DRF
    int ntrees = 3;
    int depth = 30;
    int gini = StatType.GINI.ordinal();
    int seed = 42;
    StatType statType = StatType.values()[gini];
    final int cols[] =
        new int[] {0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 1}; // ignore column 6, classify column 1

    // Start the distributed Random Forest
    final Key modelKey = Key.make("model");
    DRFJob result =
        hex.rf.DRF.execute(
            modelKey,
            cols,
            val,
            ntrees,
            depth,
            1024,
            statType,
            seed,
            true,
            null,
            -1,
            Sampling.Strategy.RANDOM,
            1.0f,
            null,
            0,
            0,
            false);
    // Wait for completion on all nodes
    RFModel model = result.get();

    assertEquals("Number of classes", 2, model.classes());
    assertEquals("Number of trees", ntrees, model.size());

    model.deleteKeys();
    UKV.remove(modelKey);
    UKV.remove(okey);
  }
Esempio n. 4
0
 /**
  * Creates a new ValueArray with classes. New ValueArray is not aligned with source one
  * unfortunately so have to send results to each chunk owner using Atomic.
  */
 @Override
 public void map(Key key) {
   assert key.home();
   if (Job.isRunning(_job.self())) {
     ValueArray va = DKV.get(_arykey).get();
     AutoBuffer bits = va.getChunk(key);
     long startRow = va.startRow(ValueArray.getChunkIndex(key));
     int rows = va.rpc(ValueArray.getChunkIndex(key));
     int rpc = (int) (ValueArray.CHUNK_SZ / ROW_SIZE);
     long chunk = ValueArray.chknum(startRow, va.numRows(), ROW_SIZE);
     long updatedChk = chunk;
     long updatedRow = startRow;
     double[] values = new double[_cols.length - 1];
     ClusterDist cd = new ClusterDist();
     int[] clusters = new int[rows];
     int count = 0;
     for (int row = 0; row < rows; row++) {
       KMeans.datad(va, bits, row, _cols, _normalized, values);
       KMeans.closest(_clusters, values, cd);
       chunk = ValueArray.chknum(startRow + row, va.numRows(), ROW_SIZE);
       if (chunk != updatedChk) {
         updateClusters(clusters, count, updatedChk, va.numRows(), rpc, updatedRow);
         updatedChk = chunk;
         updatedRow = startRow + row;
         count = 0;
       }
       clusters[count++] = cd._cluster;
     }
     if (count > 0) updateClusters(clusters, count, chunk, va.numRows(), rpc, updatedRow);
     _job.updateProgress(1);
   }
   _job = null;
   _arykey = null;
   _cols = null;
   _clusters = null;
 }
Esempio n. 5
0
 public void testDataFrameStructure(Key k, int rows, int cols) {
   ValueArray v = ValueArray.value(k);
   assertEquals(v.numRows(), rows);
   assertEquals(v.numCols(), cols);
 }