Example #1
0
 @Override
 public void map(Chunk chks[], NewChunk nchks[]) {
   long rstart = chks[0]._start;
   int rlen = chks[0]._len; // Total row count
   int rx = 0; // Which row to in/ex-clude
   int rlo = 0; // Lo/Hi for this block of rows
   int rhi = rlen;
   while (true) { // Still got rows to include?
     if (_rows != null) { // Got a row selector?
       if (rx >= _rows.length) break; // All done with row selections
       long r = _rows[rx++] - 1; // Next row selector
       if (r < 0) { // Row exclusion
         if (rx > 0 && _rows[rx - 1] < _rows[rx]) throw H2O.unimpl();
         long er = Math.abs(r) - 2;
         if (er < rstart) continue;
         // scoop up all of the rows before the first exclusion
         if (rx == 1 && ((int) (er + 1 - rstart)) > 0 && _ex) {
           rlo = (int) rstart;
           rhi = (int) (er - rstart);
           _ex = false;
           rx--;
         } else {
           rlo = (int) (er + 1 - rstart);
           // TODO: handle jumbled row indices ( e.g. -c(1,5,3) )
           while (rx < _rows.length && (_rows[rx] + 1 == _rows[rx - 1] && rlo < rlen)) {
             if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl();
             rx++;
             rlo++; // Exclude consecutive rows
           }
           rhi = rx >= _rows.length ? rlen : (int) Math.abs(_rows[rx] - 1) - 2;
           if (rx < _rows.length - 1 && _rows[rx] < _rows[rx + 1]) throw H2O.unimpl();
         }
       } else { // Positive row list?
         if (r < rstart) continue;
         rlo = (int) (r - rstart);
         rhi = rlo + 1; // Stop at the next row
         while (rx < _rows.length && (_rows[rx] - 1 - rstart) == rhi && rhi < rlen) {
           rx++;
           rhi++; // Grab sequential rows
         }
       }
     }
     // Process this next set of rows
     // For all cols in the new set
     for (int i = 0; i < _cols.length; i++) {
       Chunk oc = chks[_cols[i]];
       NewChunk nc = nchks[i];
       if (oc._vec.isInt()) { // Slice on integer columns
         for (int j = rlo; j < rhi; j++)
           if (oc.isNA0(j)) nc.addNA();
           else nc.addNum(oc.at80(j), 0);
       } else { // Slice on double columns
         for (int j = rlo; j < rhi; j++) nc.addNum(oc.at0(j));
       }
     }
     rlo = rhi;
     if (_rows == null) break;
   }
 }
Example #2
0
 @Override
 NewChunk inflate_impl(NewChunk nc) {
   double dx = Math.log10(_scale);
   assert DParseTask.fitsIntoInt(dx);
   Arrays.fill(nc._xs = MemoryManager.malloc4(_len), (int) dx);
   nc._ls = MemoryManager.malloc8(_len);
   for (int i = 0; i < _len; i++) {
     int res = UDP.get2(_mem, (i << 1) + OFF);
     if (res == C2Chunk._NA) nc.setNA_impl2(i);
     else nc._ls[i] = res + _bias;
   }
   return nc;
 }
Example #3
0
 public NewChunk convertEnum2Str(ValueString[] emap) {
   NewChunk strChunk = new NewChunk(_vec, _cidx);
   int j = 0, l = _len;
   for (int i = 0; i < l; ++i) {
     if (_id != null && _id.length > 0 && (j < _id.length && _id[j] == i)) // Sparse storage
       // adjust for enum ids using 1-based indexing
       strChunk.addStr(emap[(int) _ls[j++] - 1]);
     else if (_xs[i] != Integer.MIN_VALUE) // Categorical value isn't NA
     strChunk.addStr(emap[(int) _ls[i] - 1]);
     else strChunk.addNA();
   }
   if (_id != null) assert j == sparseLen() : "j = " + j + ", sparseLen = " + sparseLen();
   return strChunk;
 }
Example #4
0
 @Override
 public NewChunk inflate_impl(NewChunk nc) {
   double dx = Math.log10(_scale);
   assert water.util.PrettyPrint.fitsIntoInt(dx);
   nc.set_sparseLen(0);
   nc.set_len(0);
   final int len = _len;
   for (int i = 0; i < len; i++) {
     int res = 0xFF & _mem[i + _OFF];
     if (res == C1Chunk._NA) nc.addNA();
     else nc.addNum((res + _bias), (int) dx);
   }
   return nc;
 }
Example #5
0
  @Test
  public void test() {
    Frame frame = null;
    try {
      Futures fs = new Futures();
      Random random = new Random();
      Vec[] vecs = new Vec[1];
      AppendableVec vec = new AppendableVec(Vec.newKey(), Vec.T_NUM);
      for (int i = 0; i < 2; i++) {
        NewChunk chunk = new NewChunk(vec, i);
        for (int r = 0; r < 1000; r++) chunk.addNum(random.nextInt(1000));
        chunk.close(i, fs);
      }
      vecs[0] = vec.layout_and_close(fs);
      fs.blockForPending();
      frame = new Frame(Key.<Frame>make(), null, vecs);

      // Make sure we test the multi-chunk case
      vecs = frame.vecs();
      assert vecs[0].nChunks() > 1;
      long rows = frame.numRows();
      Vec v = vecs[0];
      double min = Double.POSITIVE_INFINITY, max = Double.NEGATIVE_INFINITY, mean = 0, sigma = 0;
      for (int r = 0; r < rows; r++) {
        double d = v.at(r);
        if (d < min) min = d;
        if (d > max) max = d;
        mean += d;
      }
      mean /= rows;
      for (int r = 0; r < rows; r++) {
        double d = v.at(r);
        sigma += (d - mean) * (d - mean);
      }
      sigma = Math.sqrt(sigma / (rows - 1));

      double epsilon = 1e-9;
      assertEquals(max, v.max(), epsilon);
      assertEquals(min, v.min(), epsilon);
      assertEquals(mean, v.mean(), epsilon);
      assertEquals(sigma, v.sigma(), epsilon);
    } finally {
      if (frame != null) frame.delete();
    }
  }
Example #6
0
 @Override
 public void map(Chunk cs) {
   int idx = _chunkOffset + cs.cidx();
   Key ckey = Vec.chunkKey(_v._key, idx);
   if (_cmap != null) {
     assert !cs.hasFloat()
         : "Input chunk (" + cs.getClass() + ") has float, but is expected to be categorical";
     NewChunk nc = new NewChunk(_v, idx);
     // loop over rows and update ints for new domain mapping according to vecs[c].domain()
     for (int r = 0; r < cs._len; ++r) {
       if (cs.isNA(r)) nc.addNA();
       else nc.addNum(_cmap[(int) cs.at8(r)], 0);
     }
     nc.close(_fs);
   } else {
     DKV.put(ckey, cs.deepCopy(), _fs, true);
   }
 }
Example #7
0
 public void add2Chunk(NewChunk c) {
   if (_ds == null && _ss == null) {
     c.addNum(_ls[_lId], _xs[_lId]);
   } else {
     if (_ls != null) {
       c.addUUID(_ls[_lId], Double.doubleToRawLongBits(_ds[_lId]));
     } else if (_ss != null) {
       int sidx = _is[_lId];
       int nextNotNAIdx = _lId + 1;
       // Find next not-NA value (_is[idx] != -1)
       while (nextNotNAIdx < _is.length && _is[nextNotNAIdx] == -1) nextNotNAIdx++;
       int slen = nextNotNAIdx < _is.length ? _is[nextNotNAIdx] - sidx : _sslen - sidx;
       // null-BufferedString represents NA value
       BufferedString bStr = sidx == -1 ? null : new BufferedString().set(_ss, sidx, slen);
       c.addStr(bStr);
     } else c.addNum(_ds[_lId]);
   }
 }
Example #8
0
  @Test
  public void test_setNA() {
    // Create a vec with one chunk with 15 elements, and set its numbers
    Vec vec = new Vec(Vec.newKey(), new long[] {0, 15}).makeZeros(1, null, null, null, null)[0];
    int[] vals = new int[] {0, 3, 0, 6, 0, 0, 0, -32769, 0, 12, 234, 32765, 0, 0, 19};
    Vec.Writer w = vec.open();
    for (int i = 0; i < vals.length; ++i) w.set(i, vals[i]);
    w.close();

    Chunk cc = vec.chunkForChunkIdx(0);
    assert cc instanceof C2SChunk;
    Futures fs = new Futures();
    fs.blockForPending();

    for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at80(i));
    for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at8(i));

    int[] NAs = new int[] {1, 5, 2};
    int[] notNAs = new int[] {0, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14};
    for (int na : NAs) cc.setNA(na);

    for (int na : NAs) Assert.assertTrue(cc.isNA0(na));
    for (int na : NAs) Assert.assertTrue(cc.isNA(na));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna));

    NewChunk nc = new NewChunk(null, 0);
    cc.inflate_impl(nc);
    nc.values(0, nc.len());
    Assert.assertEquals(vals.length, nc.sparseLen());
    Assert.assertEquals(vals.length, nc.len());

    Iterator<NewChunk.Value> it = nc.values(0, vals.length);
    for (int i = 0; i < vals.length; ++i) Assert.assertTrue(it.next().rowId0() == i);
    Assert.assertTrue(!it.hasNext());

    for (int na : NAs) Assert.assertTrue(cc.isNA0(na));
    for (int na : NAs) Assert.assertTrue(cc.isNA(na));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna));

    Chunk cc2 = nc.compress();
    Assert.assertEquals(vals.length, cc.len());
    Assert.assertTrue(cc2 instanceof C2SChunk);
    for (int na : NAs) Assert.assertTrue(cc.isNA0(na));
    for (int na : NAs) Assert.assertTrue(cc.isNA(na));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna));

    Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem));
    vec.remove();
  }
Example #9
0
 // PREpend all of 'nc' onto the current NewChunk.  Kill nc.
 public void addr(NewChunk nc) {
   long[] tmpl = _ls;
   _ls = nc._ls;
   nc._ls = tmpl;
   int[] tmpi = _xs;
   _xs = nc._xs;
   nc._xs = tmpi;
   tmpi = _id;
   _id = nc._id;
   nc._id = tmpi;
   double[] tmpd = _ds;
   _ds = nc._ds;
   nc._ds = tmpd;
   int tmp = _sparseLen;
   _sparseLen = nc._sparseLen;
   nc._sparseLen = tmp;
   tmp = _len;
   _len = nc._len;
   nc._len = tmp;
   add(nc);
 }
Example #10
0
  @Test
  public void test_inflate_impl2() {
    for (int l = 0; l < 2; ++l) {
      NewChunk nc = new NewChunk(null, 0);

      long[] man = new long[] {-12767, 34, 0, 52767};
      int[] exp = new int[] {-3, -2, 1, -3};
      if (l == 1) nc.addNA(); // -32768
      for (int i = 0; i < man.length; ++i) nc.addNum(man[i], exp[i]);
      nc.addNA();

      Chunk cc = nc.compress();
      Assert.assertEquals(man.length + 1 + l, cc.len());
      Assert.assertTrue(cc instanceof C2SChunk);
      if (l == 1) {
        Assert.assertTrue(cc.isNA0(0));
        Assert.assertTrue(cc.isNA(0));
      }
      for (int i = 0; i < man.length; ++i) {
        Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc.at0(l + i), 0);
        Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc.at(l + i), 0);
      }
      Assert.assertTrue(cc.isNA0(man.length + l));

      nc = cc.inflate_impl(new NewChunk(null, 0));
      nc.values(0, nc.len());
      Assert.assertEquals(man.length + 1 + l, nc.len());
      Assert.assertEquals(man.length + 1 + l, nc.sparseLen());
      if (l == 1) {
        Assert.assertTrue(nc.isNA0(0));
        Assert.assertTrue(nc.isNA(0));
      }
      for (int i = 0; i < man.length; ++i) {
        Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) nc.at0(l + i), 0);
        Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) nc.at(l + i), 0);
      }
      Assert.assertTrue(nc.isNA0(man.length + l));
      Assert.assertTrue(nc.isNA(man.length + l));

      Chunk cc2 = nc.compress();
      Assert.assertEquals(man.length + 1 + l, cc.len());
      if (l == 1) {
        Assert.assertTrue(cc2.isNA0(0));
        Assert.assertTrue(cc2.isNA(0));
      }
      for (int i = 0; i < man.length; ++i) {
        Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc2.at0(l + i), 0);
        Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc2.at(l + i), 0);
      }
      Assert.assertTrue(cc2.isNA0(man.length + l));
      Assert.assertTrue(cc2.isNA(man.length + l));
      Assert.assertTrue(cc2 instanceof C2SChunk);

      Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem));
    }
  }
Example #11
0
  // Append all of 'nc' onto the current NewChunk.  Kill nc.
  public void add(NewChunk nc) {
    assert _cidx >= 0;
    assert sparseLen() <= _len;
    assert nc.sparseLen() <= nc._len : "_len = " + nc.sparseLen() + ", _len2 = " + nc._len;
    if (nc._len == 0) return;
    if (_len == 0) {
      _ls = nc._ls;
      nc._ls = null;
      _xs = nc._xs;
      nc._xs = null;
      _id = nc._id;
      nc._id = null;
      _ds = nc._ds;
      nc._ds = null;
      _is = nc._is;
      nc._is = null;
      _ss = nc._ss;
      nc._ss = null;
      set_sparseLen(nc.sparseLen());
      set_len(nc._len);
      return;
    }
    if (nc.sparse() != sparse()) { // for now, just make it dense
      cancel_sparse();
      nc.cancel_sparse();
    }
    if (_ds != null) throw H2O.fail();
    while (sparseLen() + nc.sparseLen() >= _xs.length)
      _xs = MemoryManager.arrayCopyOf(_xs, _xs.length << 1);
    _ls = MemoryManager.arrayCopyOf(_ls, _xs.length);
    System.arraycopy(nc._ls, 0, _ls, sparseLen(), nc.sparseLen());
    System.arraycopy(nc._xs, 0, _xs, sparseLen(), nc.sparseLen());
    if (_id != null) {
      assert nc._id != null;
      _id = MemoryManager.arrayCopyOf(_id, _xs.length);
      System.arraycopy(nc._id, 0, _id, sparseLen(), nc.sparseLen());
      for (int i = sparseLen(); i < sparseLen() + nc.sparseLen(); ++i) _id[i] += _len;
    } else assert nc._id == null;

    set_sparseLen(sparseLen() + nc.sparseLen());
    set_len(_len + nc._len);
    nc._ls = null;
    nc._xs = null;
    nc._id = null;
    nc.set_sparseLen(nc.set_len(0));
    assert sparseLen() <= _len;
  }
Example #12
0
 @Override
 public void map(Chunk c, NewChunk nc) {
   double acc = _init;
   for (int i = 0; i < c._len; ++i) nc.addNum(acc = op(acc, c.atd(i)));
   _chkCumu[c.cidx()] = acc;
 }
Example #13
0
  public Frame deepSlice(Object orows, Object ocols) {
    // ocols is either a long[] or a Frame-of-1-Vec
    long[] cols;
    if (ocols == null) {
      cols = (long[]) ocols;
      assert cols == null;
    } else {
      if (ocols instanceof long[]) {
        cols = (long[]) ocols;
      } else if (ocols instanceof Frame) {
        Frame fr = (Frame) ocols;
        if (fr.numCols() != 1) {
          throw new IllegalArgumentException(
              "Columns Frame must have only one column (actually has "
                  + fr.numCols()
                  + " columns)");
        }

        long n = fr.anyVec().length();
        if (n > MAX_EQ2_COLS) {
          throw new IllegalArgumentException(
              "Too many requested columns (requested " + n + ", max " + MAX_EQ2_COLS + ")");
        }

        cols = new long[(int) n];
        Vec v = fr._vecs[0];
        for (long i = 0; i < v.length(); i++) {
          cols[(int) i] = v.at8(i);
        }
      } else {
        throw new IllegalArgumentException(
            "Columns is specified by an unsupported data type ("
                + ocols.getClass().getName()
                + ")");
      }
    }

    // Since cols is probably short convert to a positive list.
    int c2[] = null;
    if (cols == null) {
      c2 = new int[numCols()];
      for (int i = 0; i < c2.length; i++) c2[i] = i;
    } else if (cols.length == 0) {
      c2 = new int[0];
    } else if (cols[0] > 0) {
      c2 = new int[cols.length];
      for (int i = 0; i < cols.length; i++)
        c2[i] = (int) cols[i] - 1; // Convert 1-based cols to zero-based
    } else {
      c2 = new int[numCols() - cols.length];
      int j = 0;
      for (int i = 0; i < numCols(); i++) {
        if (j >= cols.length || i < (-cols[j] - 1)) c2[i - j] = i;
        else j++;
      }
    }
    for (int i = 0; i < c2.length; i++)
      if (c2[i] >= numCols())
        throw new IllegalArgumentException(
            "Trying to select column " + c2[i] + " but only " + numCols() + " present.");
    if (c2.length == 0)
      throw new IllegalArgumentException(
          "No columns selected (did you try to select column 0 instead of column 1?)");

    // Do Da Slice
    // orows is either a long[] or a Vec
    if (orows == null)
      return new DeepSlice((long[]) orows, c2)
          .doAll(c2.length, this)
          .outputFrame(names(c2), domains(c2));
    else if (orows instanceof long[]) {
      final long CHK_ROWS = 1000000;
      long[] rows = (long[]) orows;
      if (rows.length == 0)
        return new DeepSlice(rows, c2).doAll(c2.length, this).outputFrame(names(c2), domains(c2));
      if (rows[0] < 0)
        return new DeepSlice(rows, c2).doAll(c2.length, this).outputFrame(names(c2), domains(c2));
      // Vec'ize the index array
      AppendableVec av = new AppendableVec("rownames");
      int r = 0;
      int c = 0;
      while (r < rows.length) {
        NewChunk nc = new NewChunk(av, c);
        long end = Math.min(r + CHK_ROWS, rows.length);
        for (; r < end; r++) {
          nc.addNum(rows[r]);
        }
        nc.close(c++, null);
      }
      Vec c0 = av.close(null); // c0 is the row index vec
      Frame fr2 =
          new Slice(c2, this)
              .doAll(c2.length, new Frame(new String[] {"rownames"}, new Vec[] {c0}))
              .outputFrame(names(c2), domains(c2));
      UKV.remove(c0._key); // Remove hidden vector
      return fr2;
    }
    Frame frows = (Frame) orows;
    Vec vrows = frows.anyVec();
    // It's a compatible Vec; use it as boolean selector.
    // Build column names for the result.
    Vec[] vecs = new Vec[c2.length + 1];
    String[] names = new String[c2.length + 1];
    for (int i = 0; i < c2.length; ++i) {
      vecs[i] = _vecs[c2[i]];
      names[i] = _names[c2[i]];
    }
    vecs[c2.length] = vrows;
    names[c2.length] = "predicate";
    return new DeepSelect()
        .doAll(c2.length, new Frame(names, vecs))
        .outputFrame(names(c2), domains(c2));
  }
Example #14
0
  @Test
  public void test_inflate_impl() {
    final int K = 1 << 16;
    for (Double d :
        new Double[] {
          3.14159265358,
          Double.POSITIVE_INFINITY,
          Double.NEGATIVE_INFINITY,
          Double.MAX_VALUE,
          Double.NaN
        }) {
      NewChunk nc = new NewChunk(null, 0);
      for (int i = 0; i < K; ++i) nc.addNum(d);
      Assert.assertEquals(K, nc.len());
      Assert.assertEquals(K, nc.sparseLen());

      Chunk cc = nc.compress();
      Assert.assertEquals(K, cc.len());
      Assert.assertTrue(cc instanceof C0DChunk);
      for (int i = 0; i < K; ++i) Assert.assertEquals(d, cc.at0(i), Math.ulp(d));
      for (int i = 0; i < K; ++i) Assert.assertEquals(d, cc.at(i), Math.ulp(d));

      nc = cc.inflate_impl(new NewChunk(null, 0));
      nc.values(0, nc.len());
      Assert.assertEquals(K, nc.len());
      Assert.assertEquals(K, nc.sparseLen());
      for (int i = 0; i < K; ++i) Assert.assertEquals(d, nc.at0(i), Math.ulp(d));
      for (int i = 0; i < K; ++i) Assert.assertEquals(d, nc.at(i), Math.ulp(d));

      Chunk cc2 = nc.compress();
      Assert.assertEquals(K, cc2.len());
      Assert.assertTrue(cc2 instanceof C0DChunk);
      for (int i = 0; i < K; ++i) Assert.assertEquals(d, cc2.at0(i), Math.ulp(d));
      for (int i = 0; i < K; ++i) Assert.assertEquals(d, cc2.at(i), Math.ulp(d));

      Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem));
    }
  }
Example #15
0
 private void rebalanceChunk(Vec srcVec, Chunk chk) {
   NewChunk dst = new NewChunk(chk);
   dst._len = dst._len2 = 0;
   int rem = chk._len;
   while (rem > 0 && dst._len2 < chk._len) {
     Chunk srcRaw = srcVec.chunkForRow(chk._start + dst._len2);
     NewChunk src = new NewChunk((srcRaw));
     src = srcRaw.inflate_impl(src);
     assert src._len2 == srcRaw._len;
     int srcFrom = (int) (chk._start + dst._len2 - src._start);
     // check if the result is sparse (not exact since we only take subset of src in general)
     if ((src.sparse() && dst.sparse())
         || (src._len + dst._len < NewChunk.MIN_SPARSE_RATIO * (src._len2 + dst._len2))) {
       src.set_sparse(src._len);
       dst.set_sparse(dst._len);
     }
     final int srcTo = srcFrom + rem;
     int off = srcFrom - 1;
     Iterator<NewChunk.Value> it = src.values(Math.max(0, srcFrom), srcTo);
     while (it.hasNext()) {
       NewChunk.Value v = it.next();
       final int rid = v.rowId0();
       assert rid < srcTo;
       int add = rid - off;
       off = rid;
       dst.addZeros(add - 1);
       v.add2Chunk(dst);
       rem -= add;
       assert rem >= 0;
     }
     int trailingZeros = Math.min(rem, src._len2 - off - 1);
     dst.addZeros(trailingZeros);
     rem -= trailingZeros;
   }
   assert rem == 0 : "rem = " + rem;
   assert dst._len2 == chk._len : "len2 = " + dst._len2 + ", _len = " + chk._len;
   dst.close(dst.cidx(), _fs);
 }