示例#1
0
 @Override
 public void compute2() {
   _in.read_lock(_jobKey);
   // simply create a bogus new vector (don't even put it into KV) with appropriate number of lines
   // per chunk and then use it as a source to do multiple makeZero calls
   // to create empty vecs and than call RebalanceTask on each one of them.
   // RebalanceTask will fetch the appropriate src chunks and fetch the data from them.
   int rpc = (int) (_in.numRows() / _nchunks);
   int rem = (int) (_in.numRows() % _nchunks);
   long[] espc = new long[_nchunks + 1];
   Arrays.fill(espc, rpc);
   for (int i = 0; i < rem; ++i) ++espc[i];
   long sum = 0;
   for (int i = 0; i < espc.length; ++i) {
     long s = espc[i];
     espc[i] = sum;
     sum += s;
   }
   assert espc[espc.length - 1] == _in.numRows()
       : "unexpected number of rows, expected " + _in.numRows() + ", got " + espc[espc.length - 1];
   final Vec[] srcVecs = _in.vecs();
   _out =
       new Frame(
           _okey,
           _in.names(),
           new Vec(Vec.newKey(), espc).makeZeros(srcVecs.length, _in.domains()));
   _out.delete_and_lock(_jobKey);
   new RebalanceTask(this, srcVecs).asyncExec(_out);
 }
示例#2
0
  @Test
  public void test_setNA() {
    // Create a vec with one chunk with 15 elements, and set its numbers
    Vec vec = new Vec(Vec.newKey(), new long[] {0, 15}).makeZeros(1, null, null, null, null)[0];
    int[] vals = new int[] {0, 3, 0, 6, 0, 0, 0, -32769, 0, 12, 234, 32765, 0, 0, 19};
    Vec.Writer w = vec.open();
    for (int i = 0; i < vals.length; ++i) w.set(i, vals[i]);
    w.close();

    Chunk cc = vec.chunkForChunkIdx(0);
    assert cc instanceof C2SChunk;
    Futures fs = new Futures();
    fs.blockForPending();

    for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at80(i));
    for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at8(i));

    int[] NAs = new int[] {1, 5, 2};
    int[] notNAs = new int[] {0, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14};
    for (int na : NAs) cc.setNA(na);

    for (int na : NAs) Assert.assertTrue(cc.isNA0(na));
    for (int na : NAs) Assert.assertTrue(cc.isNA(na));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna));

    NewChunk nc = new NewChunk(null, 0);
    cc.inflate_impl(nc);
    nc.values(0, nc.len());
    Assert.assertEquals(vals.length, nc.sparseLen());
    Assert.assertEquals(vals.length, nc.len());

    Iterator<NewChunk.Value> it = nc.values(0, vals.length);
    for (int i = 0; i < vals.length; ++i) Assert.assertTrue(it.next().rowId0() == i);
    Assert.assertTrue(!it.hasNext());

    for (int na : NAs) Assert.assertTrue(cc.isNA0(na));
    for (int na : NAs) Assert.assertTrue(cc.isNA(na));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna));

    Chunk cc2 = nc.compress();
    Assert.assertEquals(vals.length, cc.len());
    Assert.assertTrue(cc2 instanceof C2SChunk);
    for (int na : NAs) Assert.assertTrue(cc.isNA0(na));
    for (int na : NAs) Assert.assertTrue(cc.isNA(na));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA0(notna));
    for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna));

    Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem));
    vec.remove();
  }
示例#3
0
 // Make vector templates for all output frame vectors
 private Vec[][] makeTemplates(Frame dataset, float[] ratios) {
   Vec anyVec = dataset.anyVec();
   final long[][] espcPerSplit = computeEspcPerSplit(anyVec._espc, anyVec.length(), ratios);
   final int num = dataset.numCols(); // number of columns in input frame
   final int nsplits = espcPerSplit.length; // number of splits
   final String[][] domains = dataset.domains(); // domains
   Vec[][] t = new Vec[nsplits][ /*num*/]; // resulting vectors for all
   for (int i = 0; i < nsplits; i++) {
     // vectors for j-th split
     t[i] = new Vec(Vec.newKey(), espcPerSplit[i /*-th split*/]).makeZeros(num, domains);
   }
   return t;
 }
示例#4
0
  @Test
  public void test() {
    Frame frame = null;
    try {
      Futures fs = new Futures();
      Random random = new Random();
      Vec[] vecs = new Vec[1];
      AppendableVec vec = new AppendableVec(Vec.newKey(), Vec.T_NUM);
      for (int i = 0; i < 2; i++) {
        NewChunk chunk = new NewChunk(vec, i);
        for (int r = 0; r < 1000; r++) chunk.addNum(random.nextInt(1000));
        chunk.close(i, fs);
      }
      vecs[0] = vec.layout_and_close(fs);
      fs.blockForPending();
      frame = new Frame(Key.<Frame>make(), null, vecs);

      // Make sure we test the multi-chunk case
      vecs = frame.vecs();
      assert vecs[0].nChunks() > 1;
      long rows = frame.numRows();
      Vec v = vecs[0];
      double min = Double.POSITIVE_INFINITY, max = Double.NEGATIVE_INFINITY, mean = 0, sigma = 0;
      for (int r = 0; r < rows; r++) {
        double d = v.at(r);
        if (d < min) min = d;
        if (d > max) max = d;
        mean += d;
      }
      mean /= rows;
      for (int r = 0; r < rows; r++) {
        double d = v.at(r);
        sigma += (d - mean) * (d - mean);
      }
      sigma = Math.sqrt(sigma / (rows - 1));

      double epsilon = 1e-9;
      assertEquals(max, v.max(), epsilon);
      assertEquals(min, v.min(), epsilon);
      assertEquals(mean, v.mean(), epsilon);
      assertEquals(sigma, v.sigma(), epsilon);
    } finally {
      if (frame != null) frame.delete();
    }
  }
 public static Key makeByteVec(Key k, String... data) {
   byte[][] chunks = new byte[data.length][];
   long[] espc = new long[data.length + 1];
   for (int i = 0; i < chunks.length; ++i) {
     chunks[i] = data[i].getBytes();
     espc[i + 1] = espc[i] + data[i].length();
   }
   Futures fs = new Futures();
   Key key = Vec.newKey();
   ByteVec bv = new ByteVec(key, Vec.ESPC.rowLayout(key, espc));
   for (int i = 0; i < chunks.length; ++i) {
     Key chunkKey = bv.chunkKey(i);
     DKV.put(
         chunkKey,
         new Value(chunkKey, chunks[i].length, chunks[i], TypeMap.C1NCHUNK, Value.ICE),
         fs);
   }
   DKV.put(bv._key, bv, fs);
   Frame fr = new Frame(k, new String[] {"makeByteVec"}, new Vec[] {bv});
   DKV.put(k, fr, fs);
   fs.blockForPending();
   return k;
 }