Exemple #1
0
  @Test
  public void test_inflate_impl() {
    for (int l = 0; l < 2; ++l) {
      NewChunk nc = new NewChunk(null, 0);

      // -32.767, 0.34, 0, 32.767, NA for l==0
      // NA, -32.767, 0.34, 0, 32.767, NA for l==1
      long[] man = new long[] {-32767, 34, 0, 32767};
      int[] exp = new int[] {-3, -2, 1, -3};
      if (l == 1) nc.addNA(); // -32768
      for (int i = 0; i < man.length; ++i) nc.addNum(man[i], exp[i]);
      nc.addNA();

      Chunk cc = nc.compress();
      Assert.assertEquals(man.length + 1 + l, cc.len());
      Assert.assertTrue(cc instanceof C2SChunk);
      if (l == 1) {
        Assert.assertTrue(cc.isNA0(0));
        Assert.assertTrue(cc.isNA(0));
      }
      for (int i = 0; i < man.length; ++i) {
        Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc.at0(l + i), 0);
        Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc.at(l + i), 0);
      }
      Assert.assertTrue(cc.isNA0(man.length + l));
      Assert.assertTrue(cc.isNA(man.length + l));

      nc = cc.inflate_impl(new NewChunk(null, 0));
      nc.values(0, nc.len());
      Assert.assertEquals(man.length + 1 + l, nc.len());
      Assert.assertEquals(man.length + 1 + l, nc.sparseLen());
      if (l == 1) {
        Assert.assertTrue(nc.isNA0(0));
        Assert.assertTrue(nc.isNA(0));
      }
      for (int i = 0; i < man.length; ++i) {
        Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) nc.at0(l + i), 0);
        Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) nc.at(l + i), 0);
      }
      Assert.assertTrue(nc.isNA0(man.length + l));
      Assert.assertTrue(nc.isNA(man.length + l));

      Chunk cc2 = nc.compress();
      Assert.assertEquals(man.length + 1 + l, cc.len());
      if (l == 1) {
        Assert.assertTrue(cc2.isNA0(0));
        Assert.assertTrue(cc2.isNA(0));
      }
      for (int i = 0; i < man.length; ++i) {
        Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc2.at0(l + i), 0);
        Assert.assertEquals((float) (man[i] * Math.pow(10, exp[i])), (float) cc2.at(l + i), 0);
      }
      Assert.assertTrue(cc2.isNA0(man.length + l));
      Assert.assertTrue(cc2.isNA(man.length + l));
      Assert.assertTrue(cc2 instanceof C2SChunk);

      Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem));
    }
  }
Exemple #2
0
 public void addStr(Chunk c, int row) {
   if (c.isNA(row)) addNA();
   else {
     addStr(c.atStr(new BufferedString(), row));
     _isAllASCII &= ((CStrChunk) c)._isAllASCII;
   }
 }
Exemple #3
0
  @Test
  public void test_setNA() {
    // Create a vec with one chunk with 15 elements, and set its numbers
    Vec vec = new Vec(Vec.newKey(), new long[] {0, 15}).makeZeros(1, null, null, null, null)[0];
    int[] vals = new int[] {0, 3, 0, 6, 0, 0, 0, -32769, 0, 12, 234, 32765, 0, 0, 19};
    Vec.Writer w = vec.open();
    for (int i = 0; i < vals.length; ++i) w.set(i, vals[i]);
    w.close();

    Chunk cc = vec.chunkForChunkIdx(0);
    assert cc instanceof C2SChunk;
    Futures fs = new Futures();
    fs.blockForPending();

    for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at80(i));
    for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at8(i));

    int[] NAs = new int[] {1, 5, 2};
    int[] notNAs = new int[] {0, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14};
    for (int na : NAs) cc.setNA(na);

    for (int na : NAs) Assert.assertTrue(cc.isNA0(na));
    for (int na : NAs) Assert.assertTrue(cc.isNA(na));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna));

    NewChunk nc = new NewChunk(null, 0);
    cc.inflate_impl(nc);
    nc.values(0, nc.len());
    Assert.assertEquals(vals.length, nc.sparseLen());
    Assert.assertEquals(vals.length, nc.len());

    Iterator<NewChunk.Value> it = nc.values(0, vals.length);
    for (int i = 0; i < vals.length; ++i) Assert.assertTrue(it.next().rowId0() == i);
    Assert.assertTrue(!it.hasNext());

    for (int na : NAs) Assert.assertTrue(cc.isNA0(na));
    for (int na : NAs) Assert.assertTrue(cc.isNA(na));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna));

    Chunk cc2 = nc.compress();
    Assert.assertEquals(vals.length, cc.len());
    Assert.assertTrue(cc2 instanceof C2SChunk);
    for (int na : NAs) Assert.assertTrue(cc.isNA0(na));
    for (int na : NAs) Assert.assertTrue(cc.isNA(na));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna));

    Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem));
    vec.remove();
  }
Exemple #4
0
 @Override
 public void map(Chunk cs) {
   int idx = _chunkOffset + cs.cidx();
   Key ckey = Vec.chunkKey(_v._key, idx);
   if (_cmap != null) {
     assert !cs.hasFloat()
         : "Input chunk (" + cs.getClass() + ") has float, but is expected to be categorical";
     NewChunk nc = new NewChunk(_v, idx);
     // loop over rows and update ints for new domain mapping according to vecs[c].domain()
     for (int r = 0; r < cs._len; ++r) {
       if (cs.isNA(r)) nc.addNA();
       else nc.addNum(_cmap[(int) cs.at8(r)], 0);
     }
     nc.close(_fs);
   } else {
     DKV.put(ckey, cs.deepCopy(), _fs, true);
   }
 }
Exemple #5
0
 public void addUUID(Chunk c, int row) {
   if (c.isNA(row)) addUUID(C16Chunk._LO_NA, C16Chunk._HI_NA);
   else addUUID(c.at16l(row), c.at16h(row));
 }
Exemple #6
0
 public void addStr(Chunk c, int row) {
   if (c.isNA(row)) addNA();
   else addStr(c.atStr(new ValueString(), row));
 }
Exemple #7
0
 /**
  * Extract (sparse) rows from given chunks. Note: 0 remains 0 - _normSub of DataInfo isn't used
  * (mean shift during standarization is not reverted) - UNLESS offset is specified (for GLM only)
  * Essentially turns the dataset 90 degrees.
  *
  * @param chunks - chunk of dataset
  * @return array of sparse rows
  */
 public final Row[] extractSparseRows(Chunk[] chunks) {
   Row[] rows = new Row[chunks[0]._len];
   long startOff = chunks[0].start();
   for (int i = 0; i < rows.length; ++i) {
     rows[i] =
         new Row(
             true,
             Math.min(_nums, 16),
             _cats,
             _responses,
             i,
             startOff); // if sparse, _nums is the correct number of nonzero values! i.e., do not
     // use numNums()
     rows[i].rid = chunks[0].start() + i;
     if (_offset) {
       rows[i].offset = chunks[offsetChunkId()].atd(i);
       if (Double.isNaN(rows[i].offset)) rows[i].bad = true;
     }
     if (_weights) {
       rows[i].weight = chunks[weightChunkId()].atd(i);
       if (Double.isNaN(rows[i].weight)) rows[i].bad = true;
     }
     if (_skipMissing) {
       int N = _cats + _nums;
       for (int c = 0; c < N; ++c) if (chunks[c].isNA(i)) rows[i].bad = true;
     }
   }
   // categoricals
   for (int i = 0; i < _cats; ++i) {
     for (int r = 0; r < chunks[0]._len; ++r) {
       Row row = rows[r];
       if (row.bad) continue;
       int cid = getCategoricalId(i, chunks[i].isNA(r) ? _catModes[i] : (int) chunks[i].at8(r));
       if (cid >= 0) row.binIds[row.nBins++] = cid;
     }
   }
   // generic numbers + interactions
   int interactionOffset = 0;
   for (int cid = 0; cid < _nums; ++cid) {
     Chunk c = chunks[_cats + cid];
     int oldRow = -1;
     if (c
         instanceof
         InteractionWrappedVec
             .InteractionWrappedChunk) { // for each row, only 1 value in an interaction is 'hot'
       // all other values are off (i.e., are 0)
       for (int r = 0;
           r < c._len;
           ++r) { // the vec is "vertically" dense and "horizontally" sparse (i.e., every row has
         // one, and only one, value)
         Row row = rows[r];
         if (row.bad) continue;
         if (c.isNA(r)) row.bad = _skipMissing;
         int cidVirtualOffset =
             getInteractionOffset(
                 chunks, _cats + cid, r); // the "virtual" offset into the hot-expanded interaction
         row.addNum(
             _numOffsets[cid] + cidVirtualOffset,
             c.atd(r)); // FIXME: if this produces a "true" NA then should sub with mean? with?
       }
       interactionOffset += nextNumericIdx(cid);
     } else {
       for (int r = c.nextNZ(-1); r < c._len; r = c.nextNZ(r)) {
         if (c.atd(r) == 0) continue;
         assert r > oldRow;
         oldRow = r;
         Row row = rows[r];
         if (row.bad) continue;
         if (c.isNA(r)) row.bad = _skipMissing;
         double d = c.atd(r);
         if (Double.isNaN(d)) d = _numMeans[cid];
         if (_normMul != null) d *= _normMul[interactionOffset];
         row.addNum(_numOffsets[cid], d);
       }
       interactionOffset++;
     }
   }
   // response(s)
   for (int i = 1; i <= _responses; ++i) {
     int rid = responseChunkId(i - 1);
     Chunk rChunk = chunks[rid];
     for (int r = 0; r < chunks[0]._len; ++r) {
       Row row = rows[r];
       if (row.bad) continue;
       row.response[i - 1] = rChunk.atd(r);
       if (_normRespMul != null) {
         row.response[i - 1] = (row.response[i - 1] - _normRespSub[i - 1]) * _normRespMul[i - 1];
       }
       if (Double.isNaN(row.response[row.response.length - i])) row.bad = true;
     }
   }
   return rows;
 }