/** * Delete v1, v2 after potential modifying operations during processing: enums and/or train/test * adaptation. */ private void simpleCMTest( Frame v1, Frame v2, String[] actualDomain, String[] predictedDomain, String[] expectedDomain, double[][] expectedCM, boolean debug, boolean toEnum) { Scope.enter(); try { ConfusionMatrix cm = buildCM(v1.vecs()[0].toEnum(), v2.vecs()[0].toEnum()); // -- DEBUG -- if (debug) { System.err.println("actual : " + Arrays.toString(actualDomain)); System.err.println("predicted : " + Arrays.toString(predictedDomain)); System.err.println("CM domain : " + Arrays.toString(cm._domain)); System.err.println("expected CM domain: " + Arrays.toString(expectedDomain) + "\n"); for (int i = 0; i < cm._cm.length; i++) System.err.println(Arrays.toString(cm._cm[i])); System.err.println(""); System.err.println(cm.toASCII()); } // -- -- -- assertCMEqual(expectedDomain, expectedCM, cm); } finally { if (v1 != null) v1.delete(); if (v2 != null) v2.delete(); Scope.exit(); } }
private void simpleCMTest( String f1, String f2, String[] expectedActualDomain, String[] expectedPredictDomain, String[] expectedDomain, double[][] expectedCM, boolean debug, boolean toEnum) { try { Frame v1 = parseFrame(Key.make("v1.hex"), find_test_file(f1)); Frame v2 = parseFrame(Key.make("v2.hex"), find_test_file(f2)); v2 = v1.makeCompatible(v2); simpleCMTest( v1, v2, expectedActualDomain, expectedPredictDomain, expectedDomain, expectedCM, debug, toEnum); } catch (IOException e) { e.printStackTrace(); } }
@Override public void map(Chunk[] ix, NewChunk[] ncs) { final Vec[] vecs = new Vec[_cols.length]; final Vec anyv = _base.anyVec(); final long nrow = anyv.length(); long r = ix[0].at80(0); int last_ci = anyv.elem2ChunkIdx(r < nrow ? r : 0); // memoize the last chunk index long last_c0 = anyv._espc[last_ci]; // ... last chunk start long last_c1 = anyv._espc[last_ci + 1]; // ... last chunk end Chunk[] last_cs = new Chunk[vecs.length]; // ... last chunks for (int c = 0; c < _cols.length; c++) { vecs[c] = _base.vecs()[_cols[c]]; last_cs[c] = vecs[c].elem2BV(last_ci); } for (int i = 0; i < ix[0]._len; i++) { // select one row r = ix[0].at80(i) - 1; // next row to select if (r < 0) continue; if (r >= nrow) { for (int c = 0; c < vecs.length; c++) ncs[c].addNum(Double.NaN); } else { if (r < last_c0 || r >= last_c1) { last_ci = anyv.elem2ChunkIdx(r); last_c0 = anyv._espc[last_ci]; last_c1 = anyv._espc[last_ci + 1]; for (int c = 0; c < vecs.length; c++) last_cs[c] = vecs[c].elem2BV(last_ci); } for (int c = 0; c < vecs.length; c++) ncs[c].addNum(last_cs[c].at(r)); } } }
@Override Val apply(Env env, Env.StackHelp stk, AST asts[]) { Val v = stk.track(asts[1].exec(env)); if (v instanceof ValRow) { ValRow vv = (ValRow) v; return vv.slice(asts[2].columns(vv._names)); } Frame fr = v.getFrame(); int[] cols = asts[2].columns(fr.names()); Frame fr2 = new Frame(); if (cols.length == 0) { // Empty inclusion list? } else if (cols[0] >= 0) { // Positive (inclusion) list if (cols[cols.length - 1] > fr.numCols()) throw new IllegalArgumentException( "Column must be an integer from 0 to " + (fr.numCols() - 1)); for (int col : cols) fr2.add(fr.names()[col], fr.vecs()[col]); } else { // Negative (exclusion) list fr2 = new Frame(fr); // All of them at first Arrays.sort(cols); // This loop depends on the values in sorted order for (int col : cols) if (0 <= -col - 1 && -col - 1 < fr.numCols()) fr2.remove(-col - 1); // Remove named column } return new ValFrame(fr2); }
@Test public void testAggregatorBinary() { CreateFrame cf = new CreateFrame(); cf.rows = 1000; cf.cols = 10; cf.categorical_fraction = 0.6; cf.integer_fraction = 0.0; cf.binary_fraction = 0.0; cf.real_range = 100; cf.integer_range = 100; cf.missing_fraction = 0.1; cf.factors = 5; cf.seed = 1234; Frame frame = cf.execImpl().get(); AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters(); parms._train = frame._key; parms._radius_scale = 1.0; parms._transform = DataInfo.TransformType.NORMALIZE; parms._categorical_encoding = Model.Parameters.CategoricalEncodingScheme.Binary; long start = System.currentTimeMillis(); AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.905 System.out.println( "AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds"); agg.checkConsistency(); Frame output = agg._output._output_frame.get(); System.out.println(output.toTwoDimTable(0, 10)); Log.info("Number of exemplars: " + agg._exemplars.length); // Assert.assertTrue(agg._exemplars.length==649); output.remove(); frame.remove(); agg.remove(); }
private static void assertRowFrameEquals(double[] expected, Frame actual) { assertEquals(1, actual.numRows()); assertEquals(expected.length, actual.numCols()); for (int i = 0; i < expected.length; i++) { assertEquals("Wrong sum in column " + actual.name(i), expected[i], actual.vec(i).at(0), 1e-8); } }
@Override public Response serve() { Frame fr = DKV.get(data_key.value()).get(); if (fr == null) return RequestServer._http404.serve(); // Build a frame with the selected Vecs Frame fr2 = new Frame(new String[0], new Vec[0]); int[] idxs = vecs.value(); for (int idx : idxs) // The selected frame columns fr2.add(fr._names[idx], fr._vecs[idx]); // Add the class-vec last Vec cvec = class_vec.value(); fr2.add(fr._names[class_vec._colIdx.get()], cvec); domain = cvec.domain(); // Class/enum/factor names mtrys = features.value() == null ? (int) (Math.sqrt(idxs.length) + 0.5) : features.value(); DRF drf = DRF.start( DRF.makeKey(), fr2, depth.value(), ntrees.value(), mtrys, sample_rate.value(), seed.value()); drf.get(); // Block for result cm = drf.cm(); // Get CM result return new Response(Response.Status.done, this, -1, -1, null); }
@Test public void testBasicDdply() { Frame fr = null; String tree = "(ddply hex [1] { x . (mean (cols x 2) TRUE)})"; // Group-By on col 1 (not 0) mean of col 2 try { fr = chkTree(tree, "smalldata/iris/iris_wheader.csv"); chkDim(fr, 2, 23); chkFr(fr, 0, 0, 2.0); // Group 2.0, mean is 3.5 chkFr(fr, 1, 0, 3.5); chkFr(fr, 0, 1, 2.2); // Group 2.2, mean is 4.5 chkFr(fr, 1, 1, 4.5); chkFr(fr, 0, 7, 2.8); // Group 2.8, mean is 5.043, largest group chkFr(fr, 1, 7, 5.042857142857143); chkFr(fr, 0, 22, 4.4); // Group 4.4, mean is 1.5, last group chkFr(fr, 1, 22, 1.5); fr.delete(); fr = chkTree( "(ddply hex [1] { x . (sum (* (cols x 2) (cols x 3)))})", "smalldata/iris/iris_wheader.csv"); chkDim(fr, 2, 23); } finally { if (fr != null) fr.delete(); Keyed.remove(Key.make("hex")); } }
@Test public void testCatGroup() { Frame fr = null; String tree = "(GB hex [4] nrow 0 \"all\" mean 2 \"all\")"; // Group-By on col 4, no order-by, nrow and // mean of col 2 try { fr = chkTree(tree, "smalldata/iris/iris_wheader.csv"); chkDim(fr, 3, 3); chkFr(fr, 0, 0, "Iris-setosa"); chkFr(fr, 1, 0, 50); chkFr(fr, 2, 0, 1.464); chkFr(fr, 0, 1, "Iris-versicolor"); chkFr(fr, 1, 1, 50); chkFr(fr, 2, 1, 4.26); chkFr(fr, 0, 2, "Iris-virginica"); chkFr(fr, 1, 2, 50); chkFr(fr, 2, 2, 5.552); fr.delete(); fr = chkTree("(GB hex [1] mode 4 \"all\" )", "smalldata/iris/iris_wheader.csv"); chkDim(fr, 2, 23); } finally { if (fr != null) fr.delete(); Keyed.remove(Key.make("hex")); } }
@Test public void testAllAggs() { Frame fr = null; try { String tree = "(GB hex [4] nrow 0 \"rm\" mean 1 \"rm\" sum 1 \"rm\" min 1 \"rm\" max 1 \"rm\" )"; fr = chkTree(tree, "smalldata/iris/iris_wheader.csv"); chkDim(fr, 6, 3); chkFr(fr, 0, 0, "Iris-setosa"); chkFr(fr, 1, 0, 50); // nrow chkFr(fr, 2, 0, 3.418); // mean chkFr(fr, 3, 0, 170.9); // sum chkFr(fr, 4, 0, 2.3); // min chkFr(fr, 5, 0, 4.4); // max chkFr(fr, 0, 1, "Iris-versicolor"); chkFr(fr, 1, 1, 50); // nrow chkFr(fr, 2, 1, 2.770); // mean chkFr(fr, 3, 1, 138.5); // sum chkFr(fr, 4, 1, 2.0); // min chkFr(fr, 5, 1, 3.4); // max chkFr(fr, 0, 2, "Iris-virginica"); chkFr(fr, 1, 2, 50); // nrow chkFr(fr, 2, 2, 2.974); // mean chkFr(fr, 3, 2, 148.7); // sum chkFr(fr, 4, 2, 2.2); // min chkFr(fr, 5, 2, 3.8); // max } finally { if (fr != null) fr.delete(); Keyed.remove(Key.make("hex")); } }
private static void assertColFrameEquals(double[] expected, Frame actual) { assertEquals(1, actual.numCols()); assertEquals(expected.length, actual.numRows()); for (int i = 0; i < expected.length; i++) { assertEquals("Wrong sum in row " + i, expected[i], actual.vec(0).at(i), 1e-8); } }
// Adapt a trained model to a test dataset with different enums /*@Test*/ public void testModelAdapt() { File file1 = TestUtil.find_test_file("./smalldata/kaggle/KDDTrain.arff.gz"); Key fkey1 = NFSFileVec.make(file1); Key dest1 = Key.make("KDDTrain.hex"); File file2 = TestUtil.find_test_file("./smalldata/kaggle/KDDTest.arff.gz"); Key fkey2 = NFSFileVec.make(file2); Key dest2 = Key.make("KDDTest.hex"); GBM gbm = null; Frame fr = null; try { gbm = new GBM(); gbm.source = ParseDataset2.parse(dest1, new Key[] {fkey1}); UKV.remove(fkey1); gbm.response = gbm.source.remove(41); // Response is col 41 gbm.ntrees = 2; gbm.max_depth = 8; gbm.learn_rate = 0.2f; gbm.min_rows = 10; gbm.nbins = 50; gbm.invoke(); // The test data set has a few more enums than the train Frame ftest = ParseDataset2.parse(dest2, new Key[] {fkey2}); Frame preds = gbm.score(ftest); } finally { UKV.remove(dest1); // Remove original hex frame key if (gbm != null) { UKV.remove(gbm.dest()); // Remove the model UKV.remove(gbm.response._key); gbm.remove(); // Remove GBM Job if (fr != null) fr.remove(); } } }
static Frame exec_str(String str, String id) { Val val = Exec.exec(str); switch (val.type()) { case Val.FRM: Frame fr = val.getFrame(); Key k = Key.make(id); // Smart delete any prior top-level result Iced i = DKV.getGet(k); if (i instanceof Lockable) ((Lockable) i).delete(); else if (i instanceof Keyed) ((Keyed) i).remove(); else if (i != null) throw new IllegalArgumentException("Attempting to overright an unexpected key"); DKV.put(fr = new Frame(k, fr._names, fr.vecs())); System.out.println(fr); checkSaneFrame(); return fr; case Val.NUM: System.out.println("num= " + val.getNum()); assert id == null; checkSaneFrame(); return null; case Val.STR: System.out.println("str= " + val.getStr()); assert id == null; checkSaneFrame(); return null; default: throw water.H2O.fail(); } }
@Test public void testExpandCatsIris() throws InterruptedException, ExecutionException { double[][] iris = ard( ard(6.3, 2.5, 4.9, 1.5, 1), ard(5.7, 2.8, 4.5, 1.3, 1), ard(5.6, 2.8, 4.9, 2.0, 2), ard(5.0, 3.4, 1.6, 0.4, 0), ard(6.0, 2.2, 5.0, 1.5, 2)); double[][] iris_expandR = ard( ard(0, 1, 0, 6.3, 2.5, 4.9, 1.5), ard(0, 1, 0, 5.7, 2.8, 4.5, 1.3), ard(0, 0, 1, 5.6, 2.8, 4.9, 2.0), ard(1, 0, 0, 5.0, 3.4, 1.6, 0.4), ard(0, 0, 1, 6.0, 2.2, 5.0, 1.5)); String[] iris_cols = new String[] {"sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"}; String[][] iris_domains = new String[][] {null, null, null, null, new String[] {"setosa", "versicolor", "virginica"}}; Frame fr = null; try { fr = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv"); DataInfo dinfo = new DataInfo( Key.make(), fr, null, 0, true, DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, false, false, false, /* weights */ false, /* offset */ false, /* fold */ false); Log.info("Original matrix:\n" + colFormat(iris_cols, "%8.7s") + ArrayUtils.pprint(iris)); double[][] iris_perm = ArrayUtils.permuteCols(iris, dinfo._permutation); Log.info( "Permuted matrix:\n" + colFormat(iris_cols, "%8.7s", dinfo._permutation) + ArrayUtils.pprint(iris_perm)); double[][] iris_exp = GLRM.expandCats(iris_perm, dinfo); Log.info( "Expanded matrix:\n" + colExpFormat(iris_cols, iris_domains, "%8.7s", dinfo._permutation) + ArrayUtils.pprint(iris_exp)); Assert.assertArrayEquals(iris_expandR, iris_exp); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { if (fr != null) fr.delete(); } }
@Override protected void init() { if (validation != null && n_folds != 0) throw new UnsupportedOperationException( "Cannot specify a validation dataset and non-zero number of cross-validation folds."); if (n_folds < 0) throw new UnsupportedOperationException( "The number of cross-validation folds must be >= 0."); super.init(); xval_models = new Key[n_folds]; for (int i = 0; i < xval_models.length; ++i) xval_models[i] = Key.make(dest().toString() + "_xval" + i); int rIndex = 0; for (int i = 0; i < source.vecs().length; i++) if (source.vecs()[i] == response) { rIndex = i; break; } _responseName = source._names != null && rIndex >= 0 ? source._names[rIndex] : "response"; _train = selectVecs(source); _names = new String[cols.length]; for (int i = 0; i < cols.length; i++) _names[i] = source._names[cols[i]]; // Compute source response domain if (classification) _sourceResponseDomain = getVectorDomain(response); // Is validation specified? if (validation != null) { // Extract a validation response int idx = validation.find(source.names()[rIndex]); if (idx == -1) throw new IllegalArgumentException( "Validation set does not have a response column called " + _responseName); _validResponse = validation.vecs()[idx]; // Compute output confusion matrix domain for classification: // - if validation dataset is specified then CM domain is union of train and validation // response domains // else it is only domain of response column. if (classification) { _validResponseDomain = getVectorDomain(_validResponse); if (_validResponseDomain != null) { _cmDomain = Utils.domainUnion(_sourceResponseDomain, _validResponseDomain); if (!Arrays.deepEquals(_sourceResponseDomain, _validResponseDomain)) { _fromModel2CM = Model.getDomainMapping( _cmDomain, _sourceResponseDomain, false); // transformation from model produced response ~> cmDomain _fromValid2CM = Model.getDomainMapping( _cmDomain, _validResponseDomain, false); // transformation from validation response domain ~> cmDomain } } else _cmDomain = _sourceResponseDomain; } /* end of if classification */ } else if (classification) _cmDomain = _sourceResponseDomain; }
/** * Annotate the number of columns and rows of the validation data set in the job parameter JSON * @return JsonObject annotated with num_cols and num_rows of the validation data set */ @Override protected JsonObject toJSON() { JsonObject jo = super.toJSON(); if (validation != null) { jo.getAsJsonObject("validation").addProperty("num_cols", validation.numCols()); jo.getAsJsonObject("validation").addProperty("num_rows", validation.numRows()); } return jo; }
public static NewChunk createNC(String fname, String[] data, int cidx, int len) { NewChunk[] nchunks = Frame.createNewChunks(fname, Vec.T_STR, cidx); for (int i = 0; i < len; i++) { nchunks[0].addStr(data[i] != null ? data[i] : null); } Frame.closeNewChunks(nchunks); return nchunks[0]; }
/** * Annotate the number of columns and rows of the training data set in the job parameter JSON * @return JsonObject annotated with num_cols and num_rows of the training data set */ @Override protected JsonObject toJSON() { JsonObject jo = super.toJSON(); if (source != null) { jo.getAsJsonObject("source").addProperty("num_cols", source.numCols()); jo.getAsJsonObject("source").addProperty("num_rows", source.numRows()); } return jo; }
public final Row extractDenseRow(double[] vals, Row row) { row.bad = false; row.rid = 0; row.cid = 0; if (row.weight == 0) return row; if (_skipMissing) for (double d : vals) if (Double.isNaN(d)) { row.bad = true; return row; } int nbins = 0; for (int i = 0; i < _cats; ++i) { int c = getCategoricalId(i, Double.isNaN(vals[i]) ? _catModes[i] : (int) vals[i]); if (c >= 0) row.binIds[nbins++] = c; } row.nBins = nbins; final int n = _nums; int numValsIdx = 0; for (int i = 0; i < n; ++i) { if (isInteractionVec(i)) { int offset; InteractionWrappedVec iwv = ((InteractionWrappedVec) _adaptedFrame.vec(_cats + i)); int v1 = _adaptedFrame.find(iwv.v1()); int v2 = _adaptedFrame.find(iwv.v2()); if (v1 < _cats) offset = getCategoricalId(v1, Double.isNaN(vals[v1]) ? _catModes[v1] : (int) vals[v1]); else if (v2 < _cats) offset = getCategoricalId(v2, Double.isNaN(vals[v2]) ? _catModes[v1] : (int) vals[v2]); else offset = 0; row.numVals[numValsIdx + offset] = vals[_cats + i]; // essentially: vals[v1] * vals[v2]) numValsIdx += nextNumericIdx(i); } else { double d = vals[_cats + i]; // can be NA if skipMissing() == false if (Double.isNaN(d)) d = _numMeans[numValsIdx]; if (_normMul != null && _normSub != null) d = (d - _normSub[numValsIdx]) * _normMul[numValsIdx]; row.numVals[numValsIdx++] = d; } } int off = responseChunkId(0); for (int i = off; i < Math.min(vals.length, off + _responses); ++i) { try { row.response[i] = vals[responseChunkId(i)]; } catch (Throwable t) { throw new RuntimeException(t); } if (_normRespMul != null) row.response[i] = (row.response[i] - _normRespSub[i]) * _normRespMul[i]; if (Double.isNaN(row.response[i])) { row.bad = true; return row; } } return row; }
@Test public void testColumnwisesumOnEmptyFrame() { Frame fr = register(new Frame(Key.<Frame>make())); Val val = Rapids.exec("(sumaxis " + fr._key + " 0 0)"); assertTrue(val instanceof ValFrame); Frame res = register(val.getFrame()); assertEquals(res.numCols(), 0); assertEquals(res.numRows(), 0); }
public static NewChunk createNC(String fname, int cidx, int len) { NewChunk[] nchunks = Frame.createNewChunks(fname, Vec.T_NUM, cidx); int starVal = cidx * 1000; for (int i = 0; i < len; i++) { nchunks[0].addNum(starVal + i); } Frame.closeNewChunks(nchunks); return nchunks[0]; }
protected final Frame selectFrame(Frame frame) { Vec[] vecs = new Vec[cols.length]; String[] names = new String[cols.length]; for( int i = 0; i < cols.length; i++ ) { vecs[i] = frame.vecs()[cols[i]]; names[i] = frame.names()[cols[i]]; } return new Frame(names, vecs); }
@Test public void testCategoricalProstate() throws InterruptedException, ExecutionException { GLRM job = null; GLRMModel model = null; Frame train = null; final int[] cats = new int[] {1, 3, 4, 5}; // Categoricals: CAPSULE, RACE, DPROS, DCAPS try { Scope.enter(); train = parse_test_file(Key.make("prostate.hex"), "smalldata/logreg/prostate.csv"); for (int i = 0; i < cats.length; i++) Scope.track(train.replace(cats[i], train.vec(cats[i]).toCategoricalVec())._key); train.remove("ID").remove(); DKV.put(train._key, train); GLRMParameters parms = new GLRMParameters(); parms._train = train._key; parms._k = 8; parms._gamma_x = parms._gamma_y = 0.1; parms._regularization_x = GLRMModel.GLRMParameters.Regularizer.Quadratic; parms._regularization_y = GLRMModel.GLRMParameters.Regularizer.Quadratic; parms._init = GLRM.Initialization.PlusPlus; parms._transform = DataInfo.TransformType.STANDARDIZE; parms._recover_svd = false; parms._max_iterations = 200; try { job = new GLRM(parms); model = job.trainModel().get(); Log.info( "Iteration " + model._output._iterations + ": Objective value = " + model._output._objective); model.score(train).delete(); ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train); Log.info( "Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { job.remove(); } } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } finally { if (train != null) train.delete(); if (model != null) model.delete(); Scope.exit(); } }
// private constructor called by filterExpandedColumns private DataInfo( DataInfo dinfo, Frame fr, double[] normMul, double[] normSub, int[][] catLevels, int[] catModes) { _fullCatOffsets = dinfo._catOffsets; if (!dinfo._useAllFactorLevels) { _fullCatOffsets = dinfo._catOffsets.clone(); for (int i = 0; i < _fullCatOffsets.length; ++i) _fullCatOffsets[i] += i; // add for the skipped zeros. } _offset = dinfo._offset; _weights = dinfo._weights; _fold = dinfo._fold; _valid = false; _interactions = dinfo._interactions; _interactionVecs = dinfo._interactionVecs; assert dinfo._predictor_transform != null; assert dinfo._response_transform != null; _predictor_transform = dinfo._predictor_transform; _response_transform = dinfo._response_transform; _skipMissing = dinfo._skipMissing; _imputeMissing = dinfo._imputeMissing; _adaptedFrame = fr; _catOffsets = MemoryManager.malloc4(catLevels.length + 1); _catMissing = new boolean[catLevels.length]; Arrays.fill(_catMissing, !(dinfo._imputeMissing || dinfo._skipMissing)); int s = 0; for (int i = 0; i < catLevels.length; ++i) { _catOffsets[i] = s; s += catLevels[i].length; } _catLvls = catLevels; _catOffsets[_catOffsets.length - 1] = s; _responses = dinfo._responses; _cats = catLevels.length; _nums = fr.numCols() - _cats - dinfo._responses - (_offset ? 1 : 0) - (_weights ? 1 : 0) - (_fold ? 1 : 0); _numOffsets = _nums == 0 ? new int[0] : dinfo._numOffsets.clone(); int diff = _numOffsets.length > 0 ? _numOffsets[0] - s : 0; for (int i = 0; i < _numOffsets.length; i++) // need to shift everyone down by the offset! _numOffsets[i] -= diff; _useAllFactorLevels = true; // dinfo._useAllFactorLevels; _numMeans = new double[_nums]; _normMul = normMul; _normSub = normSub; _catModes = catModes; for (int i = 0; i < _nums; i++) _numMeans[i] = _adaptedFrame.vec(_cats + i).mean(); }
@Override public void compute2() { // Lock all possible data dataset.read_lock(jobKey); // Create a template vector for each segment final Vec[][] templates = makeTemplates(dataset, ratios); final int nsplits = templates.length; assert nsplits == ratios.length + 1 : "Unexpected number of split templates!"; // Launch number of distributed FJ for each split part final Vec[] datasetVecs = dataset.vecs(); splits = new Frame[nsplits]; for (int s = 0; s < nsplits; s++) { Frame split = new Frame(destKeys[s], dataset.names(), templates[s]); split.delete_and_lock(jobKey); splits[s] = split; } setPendingCount(1); H2O.submitTask( new H2OCountedCompleter(FrameSplitter.this) { @Override public void compute2() { setPendingCount(nsplits); for (int s = 0; s < nsplits; s++) { new FrameSplitTask( new H2OCountedCompleter(this) { // Completer for this task @Override public void compute2() {} @Override public boolean onExceptionalCompletion( Throwable ex, CountedCompleter caller) { synchronized ( FrameSplitter .this) { // synchronized on this since can be accessed from // different workers workersExceptions = workersExceptions != null ? Arrays.copyOf(workersExceptions, workersExceptions.length + 1) : new Throwable[1]; workersExceptions[workersExceptions.length - 1] = ex; } tryComplete(); // we handle the exception so wait perform normal // completion return false; } }, datasetVecs, ratios, s) .asyncExec(splits[s]); } tryComplete(); // complete the computation of nsplits-tasks } }); tryComplete(); // complete the computation of thrown tasks }
// -------------------------------------------------------------------------- // Build an entire layer of all K trees protected DHistogram[][][] buildLayer( final Frame fr, final int nbins, int nbins_cats, final DTree ktrees[], final int leafs[], final DHistogram hcs[][][], boolean subset, boolean build_tree_one_node) { // Build K trees, one per class. // Build up the next-generation tree splits from the current histograms. // Nearly all leaves will split one more level. This loop nest is // O( #active_splits * #bins * #ncols ) // but is NOT over all the data. ScoreBuildOneTree sb1ts[] = new ScoreBuildOneTree[_nclass]; Vec vecs[] = fr.vecs(); for (int k = 0; k < _nclass; k++) { final DTree tree = ktrees[k]; // Tree for class K if (tree == null) continue; // Build a frame with just a single tree (& work & nid) columns, so the // nested MRTask ScoreBuildHistogram in ScoreBuildOneTree does not try // to close other tree's Vecs when run in parallel. Frame fr2 = new Frame(Arrays.copyOf(fr._names, _ncols + 1), Arrays.copyOf(vecs, _ncols + 1)); fr2.add(fr._names[idx_tree(k)], vecs[idx_tree(k)]); fr2.add(fr._names[idx_work(k)], vecs[idx_work(k)]); fr2.add(fr._names[idx_nids(k)], vecs[idx_nids(k)]); if (idx_weight() >= 0) fr2.add(fr._names[idx_weight()], vecs[idx_weight()]); // Start building one of the K trees in parallel H2O.submitTask( sb1ts[k] = new ScoreBuildOneTree( this, k, nbins, nbins_cats, tree, leafs, hcs, fr2, subset, build_tree_one_node, _improvPerVar, _model._parms._distribution)); } // Block for all K trees to complete. boolean did_split = false; for (int k = 0; k < _nclass; k++) { final DTree tree = ktrees[k]; // Tree for class K if (tree == null) continue; sb1ts[k].join(); if (sb1ts[k]._did_split) did_split = true; } // The layer is done. return did_split ? hcs : null; }
@Test public void testRowwisesumOnFrameWithNonnumericColumnsOnly() { Frame fr = register(new Frame(Key.<Frame>make(), ar("c1", "s1"), aro(vc2, vs1))); Val val = Rapids.exec("(sumaxis " + fr._key + " 1 1)"); assertTrue(val instanceof ValFrame); Frame res = register(val.getFrame()); assertEquals("Unexpected column name", "sum", res.name(0)); assertEquals("Unexpected column type", Vec.T_NUM, res.types()[0]); assertColFrameEquals(ard(Double.NaN, Double.NaN, Double.NaN, Double.NaN, Double.NaN), res); }
@Test public void testRowwisesumOnFrameWithTimeColumnsOnly() { Frame fr = register(new Frame(Key.<Frame>make(), ar("t1", "s", "t2"), aro(vt1, vs1, vt2))); Val val = Rapids.exec("(sumaxis " + fr._key + " 1 1)"); assertTrue(val instanceof ValFrame); Frame res = register(val.getFrame()); assertEquals("Unexpected column name", "sum", res.name(0)); assertEquals("Unexpected column type", Vec.T_TIME, res.types()[0]); assertColFrameEquals(ard(30000000, 30000040, 30000060, 30000080, 30000120), res); }
@Test public void testRowwisesumWithoutNaRm() { Frame fr = register(new Frame(Key.<Frame>make(), ar("i1", "d1", "d2", "d3"), aro(vi1, vd1, vd2, vd3))); Val val = Rapids.exec("(sumaxis " + fr._key + " 0 1)"); assertTrue(val instanceof ValFrame); Frame res = register(val.getFrame()); assertColFrameEquals(ard(1.7, 2.9, Double.NaN, 10.3, Double.NaN), res); assertEquals("sum", res.name(0)); }
@Override Val apply(Env env, Env.StackHelp stk, AST asts[]) { Frame fr = stk.track(asts[1].exec(env)).getFrame(); if (fr.numCols() == 1 && fr.numRows() == 1) { if (fr.anyVec().isNumeric() || fr.anyVec().isBad()) return new ValNum(fr.anyVec().at(0)); else if (fr.anyVec().isString()) return new ValStr(fr.anyVec().atStr(new BufferedString(), 0).toString()); return new ValStr(fr.domains()[0][(int) fr.anyVec().at8(0)]); } return new ValFrame(fr); // did not flatten }