public static void assertValues(Vec v, String[] expValues) { Assert.assertEquals("Number of rows", expValues.length, v.length()); BufferedString tmpStr = new BufferedString(); for (int i = 0; i < v.length(); i++) { if (v.isNA(i)) Assert.assertEquals("NAs should match", null, expValues[i]); else Assert.assertEquals("Values should match", expValues[i], v.atStr(tmpStr, i).toString()); } }
@Override Val apply(Env env, Env.StackHelp stk, AST asts[]) { // Compute the variable args. Find the common row count Val vals[] = new Val[asts.length]; Vec vec = null; for (int i = 1; i < asts.length; i++) { vals[i] = stk.track(asts[i].exec(env)); if (vals[i].isFrame()) { Vec anyvec = vals[i].getFrame().anyVec(); if (anyvec == null) continue; // Ignore the empty frame if (vec == null) vec = anyvec; else if (vec.length() != anyvec.length()) throw new IllegalArgumentException( "cbind frames must have all the same rows, found " + vec.length() + " and " + anyvec.length() + " rows."); } } boolean clean = false; if (vec == null) { vec = Vec.makeZero(1); clean = true; } // Default to length 1 // Populate the new Frame Frame fr = new Frame(); for (int i = 1; i < asts.length; i++) { switch (vals[i].type()) { case Val.FRM: fr.add(fr.makeCompatible(vals[i].getFrame())); break; case Val.FUN: throw H2O.unimpl(); case Val.STR: throw H2O.unimpl(); case Val.NUM: // Auto-expand scalars to fill every row double d = vals[i].getNum(); fr.add(Double.toString(d), vec.makeCon(d)); break; default: throw H2O.unimpl(); } } if (clean) vec.remove(); return new ValFrame(fr); }
/** * Creates a new sparse vector by copying the values from another * * @param toCopy the vector to copy the values of */ public SparseVector(Vec toCopy) { this(toCopy.length(), toCopy.nnz()); for (IndexValue iv : toCopy) { indexes[used] = iv.getIndex(); values[used++] = iv.getValue(); } }
@Override public void map(Chunk[] ix, NewChunk[] ncs) { final Vec[] vecs = new Vec[_cols.length]; final Vec anyv = _base.anyVec(); final long nrow = anyv.length(); long r = ix[0].at80(0); int last_ci = anyv.elem2ChunkIdx(r < nrow ? r : 0); // memoize the last chunk index long last_c0 = anyv._espc[last_ci]; // ... last chunk start long last_c1 = anyv._espc[last_ci + 1]; // ... last chunk end Chunk[] last_cs = new Chunk[vecs.length]; // ... last chunks for (int c = 0; c < _cols.length; c++) { vecs[c] = _base.vecs()[_cols[c]]; last_cs[c] = vecs[c].elem2BV(last_ci); } for (int i = 0; i < ix[0]._len; i++) { // select one row r = ix[0].at80(i) - 1; // next row to select if (r < 0) continue; if (r >= nrow) { for (int c = 0; c < vecs.length; c++) ncs[c].addNum(Double.NaN); } else { if (r < last_c0 || r >= last_c1) { last_ci = anyv.elem2ChunkIdx(r); last_c0 = anyv._espc[last_ci]; last_c1 = anyv._espc[last_ci + 1]; for (int c = 0; c < vecs.length; c++) last_cs[c] = vecs[c].elem2BV(last_ci); } for (int c = 0; c < vecs.length; c++) ncs[c].addNum(last_cs[c].at(r)); } } }
@Override public void mutablePairwiseDivide(Vec b) { if (this.length() != b.length()) throw new ArithmeticException("Vectors must have the same length"); clearCaches(); for (int i = 0; i < used; i++) values[i] /= b.get(indexes[i]); // zeros stay zero }
@Override public void multiply(double c, Matrix A, Vec b) { if (this.length() != A.rows()) throw new ArithmeticException("Vector x Matrix dimensions do not agree"); else if (b.length() != A.cols()) throw new ArithmeticException("Destination vector is not the right size"); for (int i = 0; i < used; i++) { double val = c * this.values[i]; int index = this.indexes[i]; for (int j = 0; j < A.cols(); j++) b.increment(j, val * A.get(index, j)); } }
@Override protected void setupLocal() { // Precompute the first input chunk index and start row inside that chunk for this partition Vec anyInVec = _srcVecs[0]; long[] partSizes = Utils.partitione(anyInVec.length(), _ratios); long pnrows = 0; for (int p = 0; p < _partIdx; p++) pnrows += partSizes[p]; long[] espc = anyInVec._espc; while (_pcidx < espc.length - 1 && (pnrows -= (espc[_pcidx + 1] - espc[_pcidx])) > 0) _pcidx++; assert pnrows <= 0; _psrow = (int) (pnrows + espc[_pcidx + 1] - espc[_pcidx]); }
// Make vector templates for all output frame vectors private Vec[][] makeTemplates(Frame dataset, float[] ratios) { Vec anyVec = dataset.anyVec(); final long[][] espcPerSplit = computeEspcPerSplit(anyVec._espc, anyVec.length(), ratios); final int num = dataset.numCols(); // number of columns in input frame final int nsplits = espcPerSplit.length; // number of splits final String[][] domains = dataset.domains(); // domains Vec[][] t = new Vec[nsplits][ /*num*/]; // resulting vectors for all for (int i = 0; i < nsplits; i++) { // vectors for j-th split t[i] = new Vec(Vec.newKey(), espcPerSplit[i /*-th split*/]).makeZeros(num, domains); } return t; }
@Override public boolean equals(Object obj) { if (!(obj instanceof Vec)) return false; Vec otherVec = (Vec) obj; if (this.length() != otherVec.length()) return false; int z = 0; for (int i = 0; i < length(); i++) { // Move through until we hit the next null element, comparing the other vec to zero while (z < used && indexes[z] > i) if (otherVec.get(i++) != 0) return false; // We made it! (or are at the end). Is our non zero value the same? if (z < used && indexes[z] == i) if (values[z++] != otherVec.get(i)) return false; } return true; }
@Override public double pNormDist(double p, Vec y) { if (this.length() != y.length()) throw new ArithmeticException("Vectors must be of the same length"); double norm = 0; if (y instanceof SparseVector) { int p1 = 0, p2 = 0; SparseVector b = (SparseVector) y; while (p1 < this.used && p2 < b.used) { int a1 = indexes[p1], a2 = b.indexes[p2]; if (a1 == a2) { norm += Math.pow(Math.abs(this.values[p1] - b.values[p2]), p); p1++; p2++; } else if (a1 > a2) norm += Math.pow(Math.abs(b.values[p2++]), p); else // a1 < a2, this vec has a value, other does not norm += Math.pow(Math.abs(this.values[p1++]), p); } // One of them is now empty. // So just sum up the rest of the elements while (p1 < this.used) norm += Math.pow(Math.abs(this.values[p1++]), p); while (p2 < b.used) norm += Math.pow(Math.abs(b.values[p2++]), p); } else { int z = 0; for (int i = 0; i < length(); i++) { // Move through until we hit the next null element, comparing the other vec to zero while (z < used && indexes[z] > i) norm += Math.pow(Math.abs(-y.get(i++)), p); // We made it! (or are at the end). Is our non zero value the same? if (z < used && indexes[z] == i) norm += Math.pow(Math.abs(values[z] - y.get(i)), p); } } return Math.pow(norm, 1.0 / p); }
public Frame deepSlice(Object orows, Object ocols) { // ocols is either a long[] or a Frame-of-1-Vec long[] cols; if (ocols == null) { cols = (long[]) ocols; assert cols == null; } else { if (ocols instanceof long[]) { cols = (long[]) ocols; } else if (ocols instanceof Frame) { Frame fr = (Frame) ocols; if (fr.numCols() != 1) { throw new IllegalArgumentException( "Columns Frame must have only one column (actually has " + fr.numCols() + " columns)"); } long n = fr.anyVec().length(); if (n > MAX_EQ2_COLS) { throw new IllegalArgumentException( "Too many requested columns (requested " + n + ", max " + MAX_EQ2_COLS + ")"); } cols = new long[(int) n]; Vec v = fr._vecs[0]; for (long i = 0; i < v.length(); i++) { cols[(int) i] = v.at8(i); } } else { throw new IllegalArgumentException( "Columns is specified by an unsupported data type (" + ocols.getClass().getName() + ")"); } } // Since cols is probably short convert to a positive list. int c2[] = null; if (cols == null) { c2 = new int[numCols()]; for (int i = 0; i < c2.length; i++) c2[i] = i; } else if (cols.length == 0) { c2 = new int[0]; } else if (cols[0] > 0) { c2 = new int[cols.length]; for (int i = 0; i < cols.length; i++) c2[i] = (int) cols[i] - 1; // Convert 1-based cols to zero-based } else { c2 = new int[numCols() - cols.length]; int j = 0; for (int i = 0; i < numCols(); i++) { if (j >= cols.length || i < (-cols[j] - 1)) c2[i - j] = i; else j++; } } for (int i = 0; i < c2.length; i++) if (c2[i] >= numCols()) throw new IllegalArgumentException( "Trying to select column " + c2[i] + " but only " + numCols() + " present."); if (c2.length == 0) throw new IllegalArgumentException( "No columns selected (did you try to select column 0 instead of column 1?)"); // Do Da Slice // orows is either a long[] or a Vec if (orows == null) return new DeepSlice((long[]) orows, c2) .doAll(c2.length, this) .outputFrame(names(c2), domains(c2)); else if (orows instanceof long[]) { final long CHK_ROWS = 1000000; long[] rows = (long[]) orows; if (rows.length == 0) return new DeepSlice(rows, c2).doAll(c2.length, this).outputFrame(names(c2), domains(c2)); if (rows[0] < 0) return new DeepSlice(rows, c2).doAll(c2.length, this).outputFrame(names(c2), domains(c2)); // Vec'ize the index array AppendableVec av = new AppendableVec("rownames"); int r = 0; int c = 0; while (r < rows.length) { NewChunk nc = new NewChunk(av, c); long end = Math.min(r + CHK_ROWS, rows.length); for (; r < end; r++) { nc.addNum(rows[r]); } nc.close(c++, null); } Vec c0 = av.close(null); // c0 is the row index vec Frame fr2 = new Slice(c2, this) .doAll(c2.length, new Frame(new String[] {"rownames"}, new Vec[] {c0})) .outputFrame(names(c2), domains(c2)); UKV.remove(c0._key); // Remove hidden vector return fr2; } Frame frows = (Frame) orows; Vec vrows = frows.anyVec(); // It's a compatible Vec; use it as boolean selector. // Build column names for the result. Vec[] vecs = new Vec[c2.length + 1]; String[] names = new String[c2.length + 1]; for (int i = 0; i < c2.length; ++i) { vecs[i] = _vecs[c2[i]]; names[i] = _names[c2[i]]; } vecs[c2.length] = vrows; names[c2.length] = "predicate"; return new DeepSelect() .doAll(c2.length, new Frame(names, vecs)) .outputFrame(names(c2), domains(c2)); }
public static String[] collectS(Vec v) { String[] res = new String[(int) v.length()]; BufferedString tmpStr = new BufferedString(); for (int i = 0; i < v.length(); i++) res[i] = v.isNA(i) ? null : v.atStr(tmpStr, i).toString(); return res; }