Пример #1
0
 public static void stall_till_cloudsize(String[] args, int x) {
   if (!_stall_called_before) {
     H2O.main(args);
     H2O.registerRestApis(System.getProperty("user.dir"));
     _stall_called_before = true;
   }
   H2O.waitForCloudSize(x, 30000);
   _initial_keycnt = H2O.store_size();
 }
Пример #2
0
  @Override
  Val apply(Env env, Env.StackHelp stk, AST asts[]) {

    // Compute the variable args.  Find the common row count
    Val vals[] = new Val[asts.length];
    Vec vec = null;
    for (int i = 1; i < asts.length; i++) {
      vals[i] = stk.track(asts[i].exec(env));
      if (vals[i].isFrame()) {
        Vec anyvec = vals[i].getFrame().anyVec();
        if (anyvec == null) continue; // Ignore the empty frame
        if (vec == null) vec = anyvec;
        else if (vec.length() != anyvec.length())
          throw new IllegalArgumentException(
              "cbind frames must have all the same rows, found "
                  + vec.length()
                  + " and "
                  + anyvec.length()
                  + " rows.");
      }
    }
    boolean clean = false;
    if (vec == null) {
      vec = Vec.makeZero(1);
      clean = true;
    } // Default to length 1

    // Populate the new Frame
    Frame fr = new Frame();
    for (int i = 1; i < asts.length; i++) {
      switch (vals[i].type()) {
        case Val.FRM:
          fr.add(fr.makeCompatible(vals[i].getFrame()));
          break;
        case Val.FUN:
          throw H2O.unimpl();
        case Val.STR:
          throw H2O.unimpl();
        case Val.NUM:
          // Auto-expand scalars to fill every row
          double d = vals[i].getNum();
          fr.add(Double.toString(d), vec.makeCon(d));
          break;
        default:
          throw H2O.unimpl();
      }
    }
    if (clean) vec.remove();

    return new ValFrame(fr);
  }
Пример #3
0
 private void setTransform(
     TransformType t, double[] normMul, double[] normSub, int vecStart, int n) {
   int idx = 0; // idx!=i when interactions are in play, otherwise, it's just 'i'
   for (int i = 0; i < n; ++i) {
     Vec v = _adaptedFrame.vec(vecStart + i);
     boolean isIWV = isInteractionVec(vecStart + i);
     switch (t) {
       case STANDARDIZE:
         normMul[idx] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0;
         if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1;
         normSub[idx] = v.mean();
         break;
       case NORMALIZE:
         normMul[idx] = (v.max() - v.min() > 0) ? 1.0 / (v.max() - v.min()) : 1.0;
         if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1;
         normSub[idx] = v.mean();
         break;
       case DEMEAN:
         normMul[idx] = 1;
         if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1;
         normSub[idx] = v.mean();
         break;
       case DESCALE:
         normMul[idx] = (v.sigma() != 0) ? 1.0 / v.sigma() : 1.0;
         if (isIWV) for (int j = idx + 1; j < nextNumericIdx(i) + idx; j++) normMul[j] = 1;
         normSub[idx] = 0;
         break;
       default:
         throw H2O.unimpl();
     }
     assert !Double.isNaN(normMul[idx]);
     assert !Double.isNaN(normSub[idx]);
     idx = isIWV ? (idx + nextNumericIdx(i)) : (idx + 1);
   }
 }
Пример #4
0
  @Override
  public void compute2() {
    // Lock all possible data
    dataset.read_lock(jobKey);
    // Create a template vector for each segment
    final Vec[][] templates = makeTemplates(dataset, ratios);
    final int nsplits = templates.length;
    assert nsplits == ratios.length + 1 : "Unexpected number of split templates!";
    // Launch number of distributed FJ for each split part
    final Vec[] datasetVecs = dataset.vecs();
    splits = new Frame[nsplits];
    for (int s = 0; s < nsplits; s++) {
      Frame split = new Frame(destKeys[s], dataset.names(), templates[s]);
      split.delete_and_lock(jobKey);
      splits[s] = split;
    }
    setPendingCount(1);
    H2O.submitTask(
        new H2OCountedCompleter(FrameSplitter.this) {
          @Override
          public void compute2() {
            setPendingCount(nsplits);
            for (int s = 0; s < nsplits; s++) {
              new FrameSplitTask(
                      new H2OCountedCompleter(this) { // Completer for this task
                        @Override
                        public void compute2() {}

                        @Override
                        public boolean onExceptionalCompletion(
                            Throwable ex, CountedCompleter caller) {
                          synchronized (
                              FrameSplitter
                                  .this) { // synchronized on this since can be accessed from
                            // different workers
                            workersExceptions =
                                workersExceptions != null
                                    ? Arrays.copyOf(workersExceptions, workersExceptions.length + 1)
                                    : new Throwable[1];
                            workersExceptions[workersExceptions.length - 1] = ex;
                          }
                          tryComplete(); // we handle the exception so wait perform normal
                          // completion
                          return false;
                        }
                      },
                      datasetVecs,
                      ratios,
                      s)
                  .asyncExec(splits[s]);
            }
            tryComplete(); // complete the computation of nsplits-tasks
          }
        });
    tryComplete(); // complete the computation of thrown tasks
  }
Пример #5
0
 public boolean isSigmaScaled() {
   switch (this) {
     case NONE:
     case DEMEAN:
     case NORMALIZE:
       return false;
     case STANDARDIZE:
     case DESCALE:
       return true;
     default:
       throw H2O.unimpl();
   }
 }
Пример #6
0
    public double[] expandCats() {
      if (isSparse() || _responses > 0) throw H2O.unimpl();

      int N = fullN();
      int numStart = numStart();
      double[] res = new double[N + (_intercept ? 1 : 0)];

      for (int i = 0; i < nBins; ++i) res[binIds[i]] = 1;
      if (numIds == null) {
        System.arraycopy(numVals, 0, res, numStart, numVals.length);
      } else {
        for (int i = 0; i < nNums; ++i) res[numIds[i]] = numVals[i];
      }
      if (_intercept) res[res.length - 1] = 1;
      return res;
    }
Пример #7
0
  @Override
  Val apply(Env env, Env.StackHelp stk, AST asts[]) {

    // Execute all args.  Find a canonical frame; all Frames must look like this one.
    // Each argument turns into either a Frame (whose rows are entirely
    // inlined) or a scalar (which is replicated across as a single row).
    Frame fr = null; // Canonical Frame; all frames have the same column count, types and names
    int nchks = 0; // Total chunks
    Val vals[] = new Val[asts.length]; // Computed AST results
    for (int i = 1; i < asts.length; i++) {
      vals[i] = stk.track(asts[i].exec(env));
      if (vals[i].isFrame()) {
        fr = vals[i].getFrame();
        nchks += fr.anyVec().nChunks(); // Total chunks
      } else nchks++; // One chunk per scalar
    }
    // No Frame, just a pile-o-scalars?
    Vec zz = null; // The zero-length vec for the zero-frame frame
    if (fr == null) { // Zero-length, 1-column, default name
      fr = new Frame(new String[] {Frame.defaultColName(0)}, new Vec[] {zz = Vec.makeZero(0)});
      if (asts.length == 1) return new ValFrame(fr);
    }

    // Verify all Frames are the same columns, names, and types.  Domains can vary, and will be the
    // union
    final Frame frs[] = new Frame[asts.length]; // Input frame
    final byte[] types = fr.types(); // Column types
    final int ncols = fr.numCols();
    final long[] espc = new long[nchks + 1]; // Compute a new layout!
    int coffset = 0;

    for (int i = 1; i < asts.length; i++) {
      Val val = vals[i]; // Save values computed for pass 2
      Frame fr0 =
          val.isFrame()
              ? val.getFrame()
              // Scalar: auto-expand into a 1-row frame
              : stk.track(new Frame(fr._names, Vec.makeCons(val.getNum(), 1L, fr.numCols())));

      // Check that all frames are compatible
      if (fr.numCols() != fr0.numCols())
        throw new IllegalArgumentException(
            "rbind frames must have all the same columns, found "
                + fr.numCols()
                + " and "
                + fr0.numCols()
                + " columns.");
      if (!Arrays.deepEquals(fr._names, fr0._names))
        throw new IllegalArgumentException(
            "rbind frames must have all the same column names, found "
                + Arrays.toString(fr._names)
                + " and "
                + Arrays.toString(fr0._names));
      if (!Arrays.equals(types, fr0.types()))
        throw new IllegalArgumentException(
            "rbind frames must have all the same column types, found "
                + Arrays.toString(types)
                + " and "
                + Arrays.toString(fr0.types()));

      frs[i] = fr0; // Save frame

      // Roll up the ESPC row counts
      long roffset = espc[coffset];
      long[] espc2 = fr0.anyVec().espc();
      for (int j = 1; j < espc2.length; j++) // Roll up the row counts
      espc[coffset + j] = (roffset + espc2[j]);
      coffset += espc2.length - 1; // Chunk offset
    }
    if (zz != null) zz.remove();

    // build up the new domains for each vec
    HashMap<String, Integer>[] dmap = new HashMap[types.length];
    String[][] domains = new String[types.length][];
    int[][][] cmaps = new int[types.length][][];
    for (int k = 0; k < types.length; ++k) {
      dmap[k] = new HashMap<>();
      int c = 0;
      byte t = types[k];
      if (t == Vec.T_CAT) {
        int[][] maps = new int[frs.length][];
        for (int i = 1; i < frs.length; i++) {
          maps[i] = new int[frs[i].vec(k).domain().length];
          for (int j = 0; j < maps[i].length; j++) {
            String s = frs[i].vec(k).domain()[j];
            if (!dmap[k].containsKey(s)) dmap[k].put(s, maps[i][j] = c++);
            else maps[i][j] = dmap[k].get(s);
          }
        }
        cmaps[k] = maps;
      } else {
        cmaps[k] = new int[frs.length][];
      }
      domains[k] = c == 0 ? null : new String[c];
      for (Map.Entry<String, Integer> e : dmap[k].entrySet()) domains[k][e.getValue()] = e.getKey();
    }

    // Now make Keys for the new Vecs
    Key<Vec>[] keys = fr.anyVec().group().addVecs(fr.numCols());
    Vec[] vecs = new Vec[fr.numCols()];
    int rowLayout = Vec.ESPC.rowLayout(keys[0], espc);
    for (int i = 0; i < vecs.length; i++)
      vecs[i] = new Vec(keys[i], rowLayout, domains[i], types[i]);

    // Do the row-binds column-by-column.
    // Switch to F/J thread for continuations
    ParallelRbinds t;
    H2O.submitTask(t = new ParallelRbinds(frs, espc, vecs, cmaps)).join();
    return new ValFrame(new Frame(fr.names(), t._vecs));
  }
Пример #8
0
 @Override
 protected long checksum_impl() {
   throw H2O.unimpl();
 } // don't really need checksum