@Test public void testRowwisesumOnFrameWithNonnumericColumnsOnly() { Frame fr = register(new Frame(Key.<Frame>make(), ar("c1", "s1"), aro(vc2, vs1))); Val val = Rapids.exec("(sumaxis " + fr._key + " 1 1)"); assertTrue(val instanceof ValFrame); Frame res = register(val.getFrame()); assertEquals("Unexpected column name", "sum", res.name(0)); assertEquals("Unexpected column type", Vec.T_NUM, res.types()[0]); assertColFrameEquals(ard(Double.NaN, Double.NaN, Double.NaN, Double.NaN, Double.NaN), res); }
@Test public void testRowwisesumOnFrameWithTimeColumnsOnly() { Frame fr = register(new Frame(Key.<Frame>make(), ar("t1", "s", "t2"), aro(vt1, vs1, vt2))); Val val = Rapids.exec("(sumaxis " + fr._key + " 1 1)"); assertTrue(val instanceof ValFrame); Frame res = register(val.getFrame()); assertEquals("Unexpected column name", "sum", res.name(0)); assertEquals("Unexpected column type", Vec.T_TIME, res.types()[0]); assertColFrameEquals(ard(30000000, 30000040, 30000060, 30000080, 30000120), res); }
@Test public void testColumnwisesumBinaryVec() { assertTrue(vc1.isBinary() && !vc2.isBinary()); Frame fr = register(new Frame(Key.<Frame>make(), ar("C1", "C2"), aro(vc1, vc2))); Val val = Rapids.exec("(sumaxis " + fr._key + " 1 0)"); assertTrue(val instanceof ValFrame); Frame res = register(val.getFrame()); assertArrayEquals(fr.names(), res.names()); assertArrayEquals(ar(Vec.T_NUM, Vec.T_NUM), res.types()); assertRowFrameEquals(ard(3.0, Double.NaN), res); }
@Test public void testRowwisesumWithNaRm() { Frame fr = register( new Frame( Key.<Frame>make(), ar("i1", "d1", "d2", "d3", "s1"), aro(vi1, vd1, vd2, vd3, vs1))); Val val = Rapids.exec("(sumaxis " + fr._key + " 1 1)"); assertTrue(val instanceof ValFrame); Frame res = register(val.getFrame()); assertEquals("Unexpected column name", "sum", res.name(0)); assertEquals("Unexpected column type", Vec.T_NUM, res.types()[0]); assertColFrameEquals(ard(1.7, 2.9, 4.1, 10.3, 10.0), res); }
@Test public void testColumnwiseSumWithNaRm() { Frame fr = register( new Frame( Key.<Frame>make(), ar("I", "D", "DD", "DN", "T", "S", "C"), aro(vi1, vd1, vd2, vd3, vt1, vs1, vc2))); Val val = Rapids.exec("(sumaxis " + fr._key + " 1 0)"); assertTrue(val instanceof ValFrame); Frame res = register(val.getFrame()); assertArrayEquals(fr.names(), res.names()); assertArrayEquals( ar(Vec.T_NUM, Vec.T_NUM, Vec.T_NUM, Vec.T_NUM, Vec.T_TIME, Vec.T_NUM, Vec.T_NUM), res.types()); assertRowFrameEquals(ard(0.0, 20.0, 3.0, 6.0, 50000150.0, Double.NaN, Double.NaN), res); }
@Override Val apply(Env env, Env.StackHelp stk, AST asts[]) { Frame fr = stk.track(asts[1].exec(env)).getFrame(); Frame returningFrame; long nrows = fr.numRows(); if (asts[2] instanceof ASTNumList) { final ASTNumList nums = (ASTNumList) asts[2]; long[] rows = nums._isList ? nums.expand8Sort() : null; if (rows != null) { if (rows.length == 0) { // Empty inclusion list? } else if (rows[0] >= 0) { // Positive (inclusion) list if (rows[rows.length - 1] > nrows) throw new IllegalArgumentException("Row must be an integer from 0 to " + (nrows - 1)); } else { // Negative (exclusion) list // Invert the list to make a positive list, ignoring out-of-bounds values BitSet bs = new BitSet((int) nrows); for (int i = 0; i < rows.length; i++) { int idx = (int) (-rows[i] - 1); // The positive index if (idx >= 0 && idx < nrows) bs.set(idx); // Set column to EXCLUDE } rows = new long[(int) nrows - bs.cardinality()]; for (int i = bs.nextClearBit(0), j = 0; i < nrows; i = bs.nextClearBit(i + 1)) rows[j++] = i; } } final long[] ls = rows; returningFrame = new MRTask() { @Override public void map(Chunk[] cs, NewChunk[] ncs) { if (nums.cnt() == 0) return; long start = cs[0].start(); long end = start + cs[0]._len; long min = ls == null ? (long) nums.min() : ls[0], max = ls == null ? (long) nums.max() - 1 : ls[ls.length - 1]; // exclusive max to inclusive max when stride == 1 // [ start, ..., end ] the chunk // 1 [] nums out left: nums.max() < start // 2 [] nums out rite: nums.min() > end // 3 [ nums ] nums run left: nums.min() < start && nums.max() <= // end // 4 [ nums ] nums run in : start <= nums.min() && nums.max() <= // end // 5 [ nums ] nums run rite: start <= nums.min() && end < // nums.max() if (!(max < start || min > end)) { // not situation 1 or 2 above long startOffset = (min > start ? min : start); // situation 4 and 5 => min > start; for (int i = (int) (startOffset - start); i < cs[0]._len; ++i) { if ((ls == null && nums.has(start + i)) || (ls != null && Arrays.binarySearch(ls, start + i) >= 0)) { for (int c = 0; c < cs.length; ++c) { if (cs[c] instanceof CStrChunk) ncs[c].addStr(cs[c], i); else if (cs[c] instanceof C16Chunk) ncs[c].addUUID(cs[c], i); else if (cs[c].isNA(i)) ncs[c].addNA(); else ncs[c].addNum(cs[c].atd(i)); } } } } } }.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains()); } else if ((asts[2] instanceof ASTNum)) { long[] rows = new long[] {(long) (((ASTNum) asts[2])._v.getNum())}; returningFrame = fr.deepSlice(rows, null); } else if ((asts[2] instanceof ASTExec) || (asts[2] instanceof ASTId)) { Frame predVec = stk.track(asts[2].exec(env)).getFrame(); if (predVec.numCols() != 1) throw new IllegalArgumentException( "Conditional Row Slicing Expression evaluated to " + predVec.numCols() + " columns. Must be a boolean Vec."); returningFrame = fr.deepSlice(predVec, null); } else throw new IllegalArgumentException( "Row slicing requires a number-list as the last argument, but found a " + asts[2].getClass()); return new ValFrame(returningFrame); }
@Override Val apply(Env env, Env.StackHelp stk, AST asts[]) { // Execute all args. Find a canonical frame; all Frames must look like this one. // Each argument turns into either a Frame (whose rows are entirely // inlined) or a scalar (which is replicated across as a single row). Frame fr = null; // Canonical Frame; all frames have the same column count, types and names int nchks = 0; // Total chunks Val vals[] = new Val[asts.length]; // Computed AST results for (int i = 1; i < asts.length; i++) { vals[i] = stk.track(asts[i].exec(env)); if (vals[i].isFrame()) { fr = vals[i].getFrame(); nchks += fr.anyVec().nChunks(); // Total chunks } else nchks++; // One chunk per scalar } // No Frame, just a pile-o-scalars? Vec zz = null; // The zero-length vec for the zero-frame frame if (fr == null) { // Zero-length, 1-column, default name fr = new Frame(new String[] {Frame.defaultColName(0)}, new Vec[] {zz = Vec.makeZero(0)}); if (asts.length == 1) return new ValFrame(fr); } // Verify all Frames are the same columns, names, and types. Domains can vary, and will be the // union final Frame frs[] = new Frame[asts.length]; // Input frame final byte[] types = fr.types(); // Column types final int ncols = fr.numCols(); final long[] espc = new long[nchks + 1]; // Compute a new layout! int coffset = 0; for (int i = 1; i < asts.length; i++) { Val val = vals[i]; // Save values computed for pass 2 Frame fr0 = val.isFrame() ? val.getFrame() // Scalar: auto-expand into a 1-row frame : stk.track(new Frame(fr._names, Vec.makeCons(val.getNum(), 1L, fr.numCols()))); // Check that all frames are compatible if (fr.numCols() != fr0.numCols()) throw new IllegalArgumentException( "rbind frames must have all the same columns, found " + fr.numCols() + " and " + fr0.numCols() + " columns."); if (!Arrays.deepEquals(fr._names, fr0._names)) throw new IllegalArgumentException( "rbind frames must have all the same column names, found " + Arrays.toString(fr._names) + " and " + Arrays.toString(fr0._names)); if (!Arrays.equals(types, fr0.types())) throw new IllegalArgumentException( "rbind frames must have all the same column types, found " + Arrays.toString(types) + " and " + Arrays.toString(fr0.types())); frs[i] = fr0; // Save frame // Roll up the ESPC row counts long roffset = espc[coffset]; long[] espc2 = fr0.anyVec().espc(); for (int j = 1; j < espc2.length; j++) // Roll up the row counts espc[coffset + j] = (roffset + espc2[j]); coffset += espc2.length - 1; // Chunk offset } if (zz != null) zz.remove(); // build up the new domains for each vec HashMap<String, Integer>[] dmap = new HashMap[types.length]; String[][] domains = new String[types.length][]; int[][][] cmaps = new int[types.length][][]; for (int k = 0; k < types.length; ++k) { dmap[k] = new HashMap<>(); int c = 0; byte t = types[k]; if (t == Vec.T_CAT) { int[][] maps = new int[frs.length][]; for (int i = 1; i < frs.length; i++) { maps[i] = new int[frs[i].vec(k).domain().length]; for (int j = 0; j < maps[i].length; j++) { String s = frs[i].vec(k).domain()[j]; if (!dmap[k].containsKey(s)) dmap[k].put(s, maps[i][j] = c++); else maps[i][j] = dmap[k].get(s); } } cmaps[k] = maps; } else { cmaps[k] = new int[frs.length][]; } domains[k] = c == 0 ? null : new String[c]; for (Map.Entry<String, Integer> e : dmap[k].entrySet()) domains[k][e.getValue()] = e.getKey(); } // Now make Keys for the new Vecs Key<Vec>[] keys = fr.anyVec().group().addVecs(fr.numCols()); Vec[] vecs = new Vec[fr.numCols()]; int rowLayout = Vec.ESPC.rowLayout(keys[0], espc); for (int i = 0; i < vecs.length; i++) vecs[i] = new Vec(keys[i], rowLayout, domains[i], types[i]); // Do the row-binds column-by-column. // Switch to F/J thread for continuations ParallelRbinds t; H2O.submitTask(t = new ParallelRbinds(frs, espc, vecs, cmaps)).join(); return new ValFrame(new Frame(fr.names(), t._vecs)); }