Пример #1
0
  /**
   * Delete v1, v2 after potential modifying operations during processing: enums and/or train/test
   * adaptation.
   */
  private void simpleCMTest(
      Frame v1,
      Frame v2,
      String[] actualDomain,
      String[] predictedDomain,
      String[] expectedDomain,
      double[][] expectedCM,
      boolean debug,
      boolean toEnum) {
    Scope.enter();
    try {
      ConfusionMatrix cm = buildCM(v1.vecs()[0].toEnum(), v2.vecs()[0].toEnum());

      // -- DEBUG --
      if (debug) {
        System.err.println("actual            : " + Arrays.toString(actualDomain));
        System.err.println("predicted         : " + Arrays.toString(predictedDomain));
        System.err.println("CM domain         : " + Arrays.toString(cm._domain));
        System.err.println("expected CM domain: " + Arrays.toString(expectedDomain) + "\n");
        for (int i = 0; i < cm._cm.length; i++) System.err.println(Arrays.toString(cm._cm[i]));
        System.err.println("");
        System.err.println(cm.toASCII());
      }
      // -- -- --
      assertCMEqual(expectedDomain, expectedCM, cm);
    } finally {
      if (v1 != null) v1.delete();
      if (v2 != null) v2.delete();
      Scope.exit();
    }
  }
Пример #2
0
 private void simpleCMTest(
     String f1,
     String f2,
     String[] expectedActualDomain,
     String[] expectedPredictDomain,
     String[] expectedDomain,
     double[][] expectedCM,
     boolean debug,
     boolean toEnum) {
   try {
     Frame v1 = parseFrame(Key.make("v1.hex"), find_test_file(f1));
     Frame v2 = parseFrame(Key.make("v2.hex"), find_test_file(f2));
     v2 = v1.makeCompatible(v2);
     simpleCMTest(
         v1,
         v2,
         expectedActualDomain,
         expectedPredictDomain,
         expectedDomain,
         expectedCM,
         debug,
         toEnum);
   } catch (IOException e) {
     e.printStackTrace();
   }
 }
Пример #3
0
 @Override
 public void map(Chunk[] ix, NewChunk[] ncs) {
   final Vec[] vecs = new Vec[_cols.length];
   final Vec anyv = _base.anyVec();
   final long nrow = anyv.length();
   long r = ix[0].at80(0);
   int last_ci = anyv.elem2ChunkIdx(r < nrow ? r : 0); // memoize the last chunk index
   long last_c0 = anyv._espc[last_ci]; // ...         last chunk start
   long last_c1 = anyv._espc[last_ci + 1]; // ...         last chunk end
   Chunk[] last_cs = new Chunk[vecs.length]; // ...         last chunks
   for (int c = 0; c < _cols.length; c++) {
     vecs[c] = _base.vecs()[_cols[c]];
     last_cs[c] = vecs[c].elem2BV(last_ci);
   }
   for (int i = 0; i < ix[0]._len; i++) {
     // select one row
     r = ix[0].at80(i) - 1; // next row to select
     if (r < 0) continue;
     if (r >= nrow) {
       for (int c = 0; c < vecs.length; c++) ncs[c].addNum(Double.NaN);
     } else {
       if (r < last_c0 || r >= last_c1) {
         last_ci = anyv.elem2ChunkIdx(r);
         last_c0 = anyv._espc[last_ci];
         last_c1 = anyv._espc[last_ci + 1];
         for (int c = 0; c < vecs.length; c++) last_cs[c] = vecs[c].elem2BV(last_ci);
       }
       for (int c = 0; c < vecs.length; c++) ncs[c].addNum(last_cs[c].at(r));
     }
   }
 }
Пример #4
0
  @Override
  Val apply(Env env, Env.StackHelp stk, AST asts[]) {
    Val v = stk.track(asts[1].exec(env));
    if (v instanceof ValRow) {
      ValRow vv = (ValRow) v;
      return vv.slice(asts[2].columns(vv._names));
    }
    Frame fr = v.getFrame();
    int[] cols = asts[2].columns(fr.names());

    Frame fr2 = new Frame();
    if (cols.length == 0) { // Empty inclusion list?
    } else if (cols[0] >= 0) { // Positive (inclusion) list
      if (cols[cols.length - 1] > fr.numCols())
        throw new IllegalArgumentException(
            "Column must be an integer from 0 to " + (fr.numCols() - 1));
      for (int col : cols) fr2.add(fr.names()[col], fr.vecs()[col]);
    } else { // Negative (exclusion) list
      fr2 = new Frame(fr); // All of them at first
      Arrays.sort(cols); // This loop depends on the values in sorted order
      for (int col : cols)
        if (0 <= -col - 1 && -col - 1 < fr.numCols()) fr2.remove(-col - 1); // Remove named column
    }

    return new ValFrame(fr2);
  }
Пример #5
0
  @Test
  public void testAggregatorBinary() {
    CreateFrame cf = new CreateFrame();
    cf.rows = 1000;
    cf.cols = 10;
    cf.categorical_fraction = 0.6;
    cf.integer_fraction = 0.0;
    cf.binary_fraction = 0.0;
    cf.real_range = 100;
    cf.integer_range = 100;
    cf.missing_fraction = 0.1;
    cf.factors = 5;
    cf.seed = 1234;
    Frame frame = cf.execImpl().get();

    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._radius_scale = 1.0;
    parms._transform = DataInfo.TransformType.NORMALIZE;
    parms._categorical_encoding = Model.Parameters.CategoricalEncodingScheme.Binary;
    long start = System.currentTimeMillis();
    AggregatorModel agg = new Aggregator(parms).trainModel().get(); // 0.905
    System.out.println(
        "AggregatorModel finished in: "
            + (System.currentTimeMillis() - start) / 1000.
            + " seconds");
    agg.checkConsistency();
    Frame output = agg._output._output_frame.get();
    System.out.println(output.toTwoDimTable(0, 10));
    Log.info("Number of exemplars: " + agg._exemplars.length);
    //    Assert.assertTrue(agg._exemplars.length==649);
    output.remove();
    frame.remove();
    agg.remove();
  }
Пример #6
0
 private static void assertRowFrameEquals(double[] expected, Frame actual) {
   assertEquals(1, actual.numRows());
   assertEquals(expected.length, actual.numCols());
   for (int i = 0; i < expected.length; i++) {
     assertEquals("Wrong sum in column " + actual.name(i), expected[i], actual.vec(i).at(0), 1e-8);
   }
 }
Пример #7
0
  @Override
  public Response serve() {
    Frame fr = DKV.get(data_key.value()).get();
    if (fr == null) return RequestServer._http404.serve();
    // Build a frame with the selected Vecs
    Frame fr2 = new Frame(new String[0], new Vec[0]);
    int[] idxs = vecs.value();
    for (int idx : idxs) // The selected frame columns
    fr2.add(fr._names[idx], fr._vecs[idx]);
    // Add the class-vec last
    Vec cvec = class_vec.value();
    fr2.add(fr._names[class_vec._colIdx.get()], cvec);
    domain = cvec.domain(); // Class/enum/factor names
    mtrys = features.value() == null ? (int) (Math.sqrt(idxs.length) + 0.5) : features.value();

    DRF drf =
        DRF.start(
            DRF.makeKey(),
            fr2,
            depth.value(),
            ntrees.value(),
            mtrys,
            sample_rate.value(),
            seed.value());

    drf.get(); // Block for result
    cm = drf.cm(); // Get CM result

    return new Response(Response.Status.done, this, -1, -1, null);
  }
Пример #8
0
  @Test
  public void testBasicDdply() {
    Frame fr = null;
    String tree =
        "(ddply hex [1] { x . (mean (cols x 2) TRUE)})"; // Group-By on col 1 (not 0) mean of col 2
    try {
      fr = chkTree(tree, "smalldata/iris/iris_wheader.csv");
      chkDim(fr, 2, 23);
      chkFr(fr, 0, 0, 2.0); // Group 2.0, mean is 3.5
      chkFr(fr, 1, 0, 3.5);
      chkFr(fr, 0, 1, 2.2); // Group 2.2, mean is 4.5
      chkFr(fr, 1, 1, 4.5);
      chkFr(fr, 0, 7, 2.8); // Group 2.8, mean is 5.043, largest group
      chkFr(fr, 1, 7, 5.042857142857143);
      chkFr(fr, 0, 22, 4.4); // Group 4.4, mean is 1.5, last group
      chkFr(fr, 1, 22, 1.5);
      fr.delete();

      fr =
          chkTree(
              "(ddply hex [1] { x . (sum (* (cols x 2) (cols x 3)))})",
              "smalldata/iris/iris_wheader.csv");
      chkDim(fr, 2, 23);

    } finally {
      if (fr != null) fr.delete();
      Keyed.remove(Key.make("hex"));
    }
  }
Пример #9
0
  @Test
  public void testCatGroup() {
    Frame fr = null;
    String tree =
        "(GB hex [4] nrow 0 \"all\" mean 2 \"all\")"; // Group-By on col 4, no order-by, nrow and
    // mean of col 2
    try {
      fr = chkTree(tree, "smalldata/iris/iris_wheader.csv");
      chkDim(fr, 3, 3);
      chkFr(fr, 0, 0, "Iris-setosa");
      chkFr(fr, 1, 0, 50);
      chkFr(fr, 2, 0, 1.464);
      chkFr(fr, 0, 1, "Iris-versicolor");
      chkFr(fr, 1, 1, 50);
      chkFr(fr, 2, 1, 4.26);
      chkFr(fr, 0, 2, "Iris-virginica");
      chkFr(fr, 1, 2, 50);
      chkFr(fr, 2, 2, 5.552);
      fr.delete();

      fr = chkTree("(GB hex [1] mode 4 \"all\" )", "smalldata/iris/iris_wheader.csv");
      chkDim(fr, 2, 23);

    } finally {
      if (fr != null) fr.delete();
      Keyed.remove(Key.make("hex"));
    }
  }
Пример #10
0
  @Test
  public void testAllAggs() {
    Frame fr = null;
    try {
      String tree =
          "(GB hex [4] nrow 0 \"rm\"  mean 1 \"rm\"  sum 1 \"rm\"  min 1 \"rm\"  max 1 \"rm\" )";
      fr = chkTree(tree, "smalldata/iris/iris_wheader.csv");
      chkDim(fr, 6, 3);

      chkFr(fr, 0, 0, "Iris-setosa");
      chkFr(fr, 1, 0, 50); // nrow
      chkFr(fr, 2, 0, 3.418); // mean
      chkFr(fr, 3, 0, 170.9); // sum
      chkFr(fr, 4, 0, 2.3); // min
      chkFr(fr, 5, 0, 4.4); // max

      chkFr(fr, 0, 1, "Iris-versicolor");
      chkFr(fr, 1, 1, 50); // nrow
      chkFr(fr, 2, 1, 2.770); // mean
      chkFr(fr, 3, 1, 138.5); // sum
      chkFr(fr, 4, 1, 2.0); // min
      chkFr(fr, 5, 1, 3.4); // max

      chkFr(fr, 0, 2, "Iris-virginica");
      chkFr(fr, 1, 2, 50); // nrow
      chkFr(fr, 2, 2, 2.974); // mean
      chkFr(fr, 3, 2, 148.7); // sum
      chkFr(fr, 4, 2, 2.2); // min
      chkFr(fr, 5, 2, 3.8); // max

    } finally {
      if (fr != null) fr.delete();
      Keyed.remove(Key.make("hex"));
    }
  }
Пример #11
0
 private static void assertColFrameEquals(double[] expected, Frame actual) {
   assertEquals(1, actual.numCols());
   assertEquals(expected.length, actual.numRows());
   for (int i = 0; i < expected.length; i++) {
     assertEquals("Wrong sum in row " + i, expected[i], actual.vec(0).at(i), 1e-8);
   }
 }
Пример #12
0
  // Adapt a trained model to a test dataset with different enums
  /*@Test*/ public void testModelAdapt() {
    File file1 = TestUtil.find_test_file("./smalldata/kaggle/KDDTrain.arff.gz");
    Key fkey1 = NFSFileVec.make(file1);
    Key dest1 = Key.make("KDDTrain.hex");
    File file2 = TestUtil.find_test_file("./smalldata/kaggle/KDDTest.arff.gz");
    Key fkey2 = NFSFileVec.make(file2);
    Key dest2 = Key.make("KDDTest.hex");
    GBM gbm = null;
    Frame fr = null;
    try {
      gbm = new GBM();
      gbm.source = ParseDataset2.parse(dest1, new Key[] {fkey1});
      UKV.remove(fkey1);
      gbm.response = gbm.source.remove(41); // Response is col 41
      gbm.ntrees = 2;
      gbm.max_depth = 8;
      gbm.learn_rate = 0.2f;
      gbm.min_rows = 10;
      gbm.nbins = 50;
      gbm.invoke();

      // The test data set has a few more enums than the train
      Frame ftest = ParseDataset2.parse(dest2, new Key[] {fkey2});
      Frame preds = gbm.score(ftest);

    } finally {
      UKV.remove(dest1); // Remove original hex frame key
      if (gbm != null) {
        UKV.remove(gbm.dest()); // Remove the model
        UKV.remove(gbm.response._key);
        gbm.remove(); // Remove GBM Job
        if (fr != null) fr.remove();
      }
    }
  }
Пример #13
0
 static Frame exec_str(String str, String id) {
   Val val = Exec.exec(str);
   switch (val.type()) {
     case Val.FRM:
       Frame fr = val.getFrame();
       Key k = Key.make(id);
       // Smart delete any prior top-level result
       Iced i = DKV.getGet(k);
       if (i instanceof Lockable) ((Lockable) i).delete();
       else if (i instanceof Keyed) ((Keyed) i).remove();
       else if (i != null)
         throw new IllegalArgumentException("Attempting to overright an unexpected key");
       DKV.put(fr = new Frame(k, fr._names, fr.vecs()));
       System.out.println(fr);
       checkSaneFrame();
       return fr;
     case Val.NUM:
       System.out.println("num= " + val.getNum());
       assert id == null;
       checkSaneFrame();
       return null;
     case Val.STR:
       System.out.println("str= " + val.getStr());
       assert id == null;
       checkSaneFrame();
       return null;
     default:
       throw water.H2O.fail();
   }
 }
Пример #14
0
  @Test
  public void testExpandCatsIris() throws InterruptedException, ExecutionException {
    double[][] iris =
        ard(
            ard(6.3, 2.5, 4.9, 1.5, 1),
            ard(5.7, 2.8, 4.5, 1.3, 1),
            ard(5.6, 2.8, 4.9, 2.0, 2),
            ard(5.0, 3.4, 1.6, 0.4, 0),
            ard(6.0, 2.2, 5.0, 1.5, 2));
    double[][] iris_expandR =
        ard(
            ard(0, 1, 0, 6.3, 2.5, 4.9, 1.5),
            ard(0, 1, 0, 5.7, 2.8, 4.5, 1.3),
            ard(0, 0, 1, 5.6, 2.8, 4.9, 2.0),
            ard(1, 0, 0, 5.0, 3.4, 1.6, 0.4),
            ard(0, 0, 1, 6.0, 2.2, 5.0, 1.5));
    String[] iris_cols = new String[] {"sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"};
    String[][] iris_domains =
        new String[][] {null, null, null, null, new String[] {"setosa", "versicolor", "virginica"}};

    Frame fr = null;
    try {
      fr = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv");
      DataInfo dinfo =
          new DataInfo(
              Key.make(),
              fr,
              null,
              0,
              true,
              DataInfo.TransformType.NONE,
              DataInfo.TransformType.NONE,
              false,
              false,
              false, /* weights */
              false, /* offset */
              false, /* fold */
              false);

      Log.info("Original matrix:\n" + colFormat(iris_cols, "%8.7s") + ArrayUtils.pprint(iris));
      double[][] iris_perm = ArrayUtils.permuteCols(iris, dinfo._permutation);
      Log.info(
          "Permuted matrix:\n"
              + colFormat(iris_cols, "%8.7s", dinfo._permutation)
              + ArrayUtils.pprint(iris_perm));

      double[][] iris_exp = GLRM.expandCats(iris_perm, dinfo);
      Log.info(
          "Expanded matrix:\n"
              + colExpFormat(iris_cols, iris_domains, "%8.7s", dinfo._permutation)
              + ArrayUtils.pprint(iris_exp));
      Assert.assertArrayEquals(iris_expandR, iris_exp);
    } catch (Throwable t) {
      t.printStackTrace();
      throw new RuntimeException(t);
    } finally {
      if (fr != null) fr.delete();
    }
  }
Пример #15
0
    @Override
    protected void init() {
      if (validation != null && n_folds != 0)
        throw new UnsupportedOperationException(
            "Cannot specify a validation dataset and non-zero number of cross-validation folds.");
      if (n_folds < 0)
        throw new UnsupportedOperationException(
            "The number of cross-validation folds must be >= 0.");
      super.init();
      xval_models = new Key[n_folds];
      for (int i = 0; i < xval_models.length; ++i)
        xval_models[i] = Key.make(dest().toString() + "_xval" + i);

      int rIndex = 0;
      for (int i = 0; i < source.vecs().length; i++)
        if (source.vecs()[i] == response) {
          rIndex = i;
          break;
        }
      _responseName = source._names != null && rIndex >= 0 ? source._names[rIndex] : "response";

      _train = selectVecs(source);
      _names = new String[cols.length];
      for (int i = 0; i < cols.length; i++) _names[i] = source._names[cols[i]];

      // Compute source response domain
      if (classification) _sourceResponseDomain = getVectorDomain(response);
      // Is validation specified?
      if (validation != null) {
        // Extract a validation response
        int idx = validation.find(source.names()[rIndex]);
        if (idx == -1)
          throw new IllegalArgumentException(
              "Validation set does not have a response column called " + _responseName);
        _validResponse = validation.vecs()[idx];
        // Compute output confusion matrix domain for classification:
        // - if validation dataset is specified then CM domain is union of train and validation
        // response domains
        //   else it is only domain of response column.
        if (classification) {
          _validResponseDomain = getVectorDomain(_validResponse);
          if (_validResponseDomain != null) {
            _cmDomain = Utils.domainUnion(_sourceResponseDomain, _validResponseDomain);
            if (!Arrays.deepEquals(_sourceResponseDomain, _validResponseDomain)) {
              _fromModel2CM =
                  Model.getDomainMapping(
                      _cmDomain,
                      _sourceResponseDomain,
                      false); // transformation from model produced response ~> cmDomain
              _fromValid2CM =
                  Model.getDomainMapping(
                      _cmDomain,
                      _validResponseDomain,
                      false); // transformation from validation response domain ~> cmDomain
            }
          } else _cmDomain = _sourceResponseDomain;
        } /* end of if classification */
      } else if (classification) _cmDomain = _sourceResponseDomain;
    }
Пример #16
0
 /**
  * Annotate the number of columns and rows of the validation data set in the job parameter JSON
  * @return JsonObject annotated with num_cols and num_rows of the validation data set
  */
 @Override protected JsonObject toJSON() {
   JsonObject jo = super.toJSON();
   if (validation != null) {
     jo.getAsJsonObject("validation").addProperty("num_cols", validation.numCols());
     jo.getAsJsonObject("validation").addProperty("num_rows", validation.numRows());
   }
   return jo;
 }
Пример #17
0
 public static NewChunk createNC(String fname, String[] data, int cidx, int len) {
   NewChunk[] nchunks = Frame.createNewChunks(fname, Vec.T_STR, cidx);
   for (int i = 0; i < len; i++) {
     nchunks[0].addStr(data[i] != null ? data[i] : null);
   }
   Frame.closeNewChunks(nchunks);
   return nchunks[0];
 }
Пример #18
0
 /**
  * Annotate the number of columns and rows of the training data set in the job parameter JSON
  * @return JsonObject annotated with num_cols and num_rows of the training data set
  */
 @Override protected JsonObject toJSON() {
   JsonObject jo = super.toJSON();
   if (source != null) {
     jo.getAsJsonObject("source").addProperty("num_cols", source.numCols());
     jo.getAsJsonObject("source").addProperty("num_rows", source.numRows());
   }
   return jo;
 }
Пример #19
0
  public final Row extractDenseRow(double[] vals, Row row) {
    row.bad = false;
    row.rid = 0;
    row.cid = 0;
    if (row.weight == 0) return row;

    if (_skipMissing)
      for (double d : vals)
        if (Double.isNaN(d)) {
          row.bad = true;
          return row;
        }
    int nbins = 0;
    for (int i = 0; i < _cats; ++i) {
      int c = getCategoricalId(i, Double.isNaN(vals[i]) ? _catModes[i] : (int) vals[i]);
      if (c >= 0) row.binIds[nbins++] = c;
    }
    row.nBins = nbins;
    final int n = _nums;
    int numValsIdx = 0;
    for (int i = 0; i < n; ++i) {
      if (isInteractionVec(i)) {
        int offset;
        InteractionWrappedVec iwv = ((InteractionWrappedVec) _adaptedFrame.vec(_cats + i));
        int v1 = _adaptedFrame.find(iwv.v1());
        int v2 = _adaptedFrame.find(iwv.v2());
        if (v1 < _cats)
          offset = getCategoricalId(v1, Double.isNaN(vals[v1]) ? _catModes[v1] : (int) vals[v1]);
        else if (v2 < _cats)
          offset = getCategoricalId(v2, Double.isNaN(vals[v2]) ? _catModes[v1] : (int) vals[v2]);
        else offset = 0;
        row.numVals[numValsIdx + offset] = vals[_cats + i]; // essentially: vals[v1] * vals[v2])
        numValsIdx += nextNumericIdx(i);
      } else {
        double d = vals[_cats + i]; // can be NA if skipMissing() == false
        if (Double.isNaN(d)) d = _numMeans[numValsIdx];
        if (_normMul != null && _normSub != null)
          d = (d - _normSub[numValsIdx]) * _normMul[numValsIdx];
        row.numVals[numValsIdx++] = d;
      }
    }
    int off = responseChunkId(0);
    for (int i = off; i < Math.min(vals.length, off + _responses); ++i) {
      try {
        row.response[i] = vals[responseChunkId(i)];
      } catch (Throwable t) {
        throw new RuntimeException(t);
      }
      if (_normRespMul != null)
        row.response[i] = (row.response[i] - _normRespSub[i]) * _normRespMul[i];
      if (Double.isNaN(row.response[i])) {
        row.bad = true;
        return row;
      }
    }
    return row;
  }
Пример #20
0
 @Test
 public void testColumnwisesumOnEmptyFrame() {
   Frame fr = register(new Frame(Key.<Frame>make()));
   Val val = Rapids.exec("(sumaxis " + fr._key + " 0 0)");
   assertTrue(val instanceof ValFrame);
   Frame res = register(val.getFrame());
   assertEquals(res.numCols(), 0);
   assertEquals(res.numRows(), 0);
 }
Пример #21
0
 public static NewChunk createNC(String fname, int cidx, int len) {
   NewChunk[] nchunks = Frame.createNewChunks(fname, Vec.T_NUM, cidx);
   int starVal = cidx * 1000;
   for (int i = 0; i < len; i++) {
     nchunks[0].addNum(starVal + i);
   }
   Frame.closeNewChunks(nchunks);
   return nchunks[0];
 }
Пример #22
0
 protected final Frame selectFrame(Frame frame) {
   Vec[] vecs = new Vec[cols.length];
   String[] names = new String[cols.length];
   for( int i = 0; i < cols.length; i++ ) {
     vecs[i] = frame.vecs()[cols[i]];
     names[i] = frame.names()[cols[i]];
   }
   return new Frame(names, vecs);
 }
Пример #23
0
  @Test
  public void testCategoricalProstate() throws InterruptedException, ExecutionException {
    GLRM job = null;
    GLRMModel model = null;
    Frame train = null;
    final int[] cats = new int[] {1, 3, 4, 5}; // Categoricals: CAPSULE, RACE, DPROS, DCAPS

    try {
      Scope.enter();
      train = parse_test_file(Key.make("prostate.hex"), "smalldata/logreg/prostate.csv");
      for (int i = 0; i < cats.length; i++)
        Scope.track(train.replace(cats[i], train.vec(cats[i]).toCategoricalVec())._key);
      train.remove("ID").remove();
      DKV.put(train._key, train);

      GLRMParameters parms = new GLRMParameters();
      parms._train = train._key;
      parms._k = 8;
      parms._gamma_x = parms._gamma_y = 0.1;
      parms._regularization_x = GLRMModel.GLRMParameters.Regularizer.Quadratic;
      parms._regularization_y = GLRMModel.GLRMParameters.Regularizer.Quadratic;
      parms._init = GLRM.Initialization.PlusPlus;
      parms._transform = DataInfo.TransformType.STANDARDIZE;
      parms._recover_svd = false;
      parms._max_iterations = 200;

      try {
        job = new GLRM(parms);
        model = job.trainModel().get();
        Log.info(
            "Iteration "
                + model._output._iterations
                + ": Objective value = "
                + model._output._objective);
        model.score(train).delete();
        ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
        Log.info(
            "Numeric Sum of Squared Error = "
                + mm._numerr
                + "\tCategorical Misclassification Error = "
                + mm._caterr);
      } catch (Throwable t) {
        t.printStackTrace();
        throw new RuntimeException(t);
      } finally {
        job.remove();
      }
    } catch (Throwable t) {
      t.printStackTrace();
      throw new RuntimeException(t);
    } finally {
      if (train != null) train.delete();
      if (model != null) model.delete();
      Scope.exit();
    }
  }
Пример #24
0
 // private constructor called by filterExpandedColumns
 private DataInfo(
     DataInfo dinfo,
     Frame fr,
     double[] normMul,
     double[] normSub,
     int[][] catLevels,
     int[] catModes) {
   _fullCatOffsets = dinfo._catOffsets;
   if (!dinfo._useAllFactorLevels) {
     _fullCatOffsets = dinfo._catOffsets.clone();
     for (int i = 0; i < _fullCatOffsets.length; ++i)
       _fullCatOffsets[i] += i; // add for the skipped zeros.
   }
   _offset = dinfo._offset;
   _weights = dinfo._weights;
   _fold = dinfo._fold;
   _valid = false;
   _interactions = dinfo._interactions;
   _interactionVecs = dinfo._interactionVecs;
   assert dinfo._predictor_transform != null;
   assert dinfo._response_transform != null;
   _predictor_transform = dinfo._predictor_transform;
   _response_transform = dinfo._response_transform;
   _skipMissing = dinfo._skipMissing;
   _imputeMissing = dinfo._imputeMissing;
   _adaptedFrame = fr;
   _catOffsets = MemoryManager.malloc4(catLevels.length + 1);
   _catMissing = new boolean[catLevels.length];
   Arrays.fill(_catMissing, !(dinfo._imputeMissing || dinfo._skipMissing));
   int s = 0;
   for (int i = 0; i < catLevels.length; ++i) {
     _catOffsets[i] = s;
     s += catLevels[i].length;
   }
   _catLvls = catLevels;
   _catOffsets[_catOffsets.length - 1] = s;
   _responses = dinfo._responses;
   _cats = catLevels.length;
   _nums =
       fr.numCols()
           - _cats
           - dinfo._responses
           - (_offset ? 1 : 0)
           - (_weights ? 1 : 0)
           - (_fold ? 1 : 0);
   _numOffsets = _nums == 0 ? new int[0] : dinfo._numOffsets.clone();
   int diff = _numOffsets.length > 0 ? _numOffsets[0] - s : 0;
   for (int i = 0; i < _numOffsets.length; i++) // need to shift everyone down by the offset!
   _numOffsets[i] -= diff;
   _useAllFactorLevels = true; // dinfo._useAllFactorLevels;
   _numMeans = new double[_nums];
   _normMul = normMul;
   _normSub = normSub;
   _catModes = catModes;
   for (int i = 0; i < _nums; i++) _numMeans[i] = _adaptedFrame.vec(_cats + i).mean();
 }
Пример #25
0
  @Override
  public void compute2() {
    // Lock all possible data
    dataset.read_lock(jobKey);
    // Create a template vector for each segment
    final Vec[][] templates = makeTemplates(dataset, ratios);
    final int nsplits = templates.length;
    assert nsplits == ratios.length + 1 : "Unexpected number of split templates!";
    // Launch number of distributed FJ for each split part
    final Vec[] datasetVecs = dataset.vecs();
    splits = new Frame[nsplits];
    for (int s = 0; s < nsplits; s++) {
      Frame split = new Frame(destKeys[s], dataset.names(), templates[s]);
      split.delete_and_lock(jobKey);
      splits[s] = split;
    }
    setPendingCount(1);
    H2O.submitTask(
        new H2OCountedCompleter(FrameSplitter.this) {
          @Override
          public void compute2() {
            setPendingCount(nsplits);
            for (int s = 0; s < nsplits; s++) {
              new FrameSplitTask(
                      new H2OCountedCompleter(this) { // Completer for this task
                        @Override
                        public void compute2() {}

                        @Override
                        public boolean onExceptionalCompletion(
                            Throwable ex, CountedCompleter caller) {
                          synchronized (
                              FrameSplitter
                                  .this) { // synchronized on this since can be accessed from
                            // different workers
                            workersExceptions =
                                workersExceptions != null
                                    ? Arrays.copyOf(workersExceptions, workersExceptions.length + 1)
                                    : new Throwable[1];
                            workersExceptions[workersExceptions.length - 1] = ex;
                          }
                          tryComplete(); // we handle the exception so wait perform normal
                          // completion
                          return false;
                        }
                      },
                      datasetVecs,
                      ratios,
                      s)
                  .asyncExec(splits[s]);
            }
            tryComplete(); // complete the computation of nsplits-tasks
          }
        });
    tryComplete(); // complete the computation of thrown tasks
  }
Пример #26
0
  // --------------------------------------------------------------------------
  // Build an entire layer of all K trees
  protected DHistogram[][][] buildLayer(
      final Frame fr,
      final int nbins,
      int nbins_cats,
      final DTree ktrees[],
      final int leafs[],
      final DHistogram hcs[][][],
      boolean subset,
      boolean build_tree_one_node) {
    // Build K trees, one per class.

    // Build up the next-generation tree splits from the current histograms.
    // Nearly all leaves will split one more level.  This loop nest is
    //           O( #active_splits * #bins * #ncols )
    // but is NOT over all the data.
    ScoreBuildOneTree sb1ts[] = new ScoreBuildOneTree[_nclass];
    Vec vecs[] = fr.vecs();
    for (int k = 0; k < _nclass; k++) {
      final DTree tree = ktrees[k]; // Tree for class K
      if (tree == null) continue;
      // Build a frame with just a single tree (& work & nid) columns, so the
      // nested MRTask ScoreBuildHistogram in ScoreBuildOneTree does not try
      // to close other tree's Vecs when run in parallel.
      Frame fr2 = new Frame(Arrays.copyOf(fr._names, _ncols + 1), Arrays.copyOf(vecs, _ncols + 1));
      fr2.add(fr._names[idx_tree(k)], vecs[idx_tree(k)]);
      fr2.add(fr._names[idx_work(k)], vecs[idx_work(k)]);
      fr2.add(fr._names[idx_nids(k)], vecs[idx_nids(k)]);
      if (idx_weight() >= 0) fr2.add(fr._names[idx_weight()], vecs[idx_weight()]);
      // Start building one of the K trees in parallel
      H2O.submitTask(
          sb1ts[k] =
              new ScoreBuildOneTree(
                  this,
                  k,
                  nbins,
                  nbins_cats,
                  tree,
                  leafs,
                  hcs,
                  fr2,
                  subset,
                  build_tree_one_node,
                  _improvPerVar,
                  _model._parms._distribution));
    }
    // Block for all K trees to complete.
    boolean did_split = false;
    for (int k = 0; k < _nclass; k++) {
      final DTree tree = ktrees[k]; // Tree for class K
      if (tree == null) continue;
      sb1ts[k].join();
      if (sb1ts[k]._did_split) did_split = true;
    }
    // The layer is done.
    return did_split ? hcs : null;
  }
Пример #27
0
 @Test
 public void testRowwisesumOnFrameWithNonnumericColumnsOnly() {
   Frame fr = register(new Frame(Key.<Frame>make(), ar("c1", "s1"), aro(vc2, vs1)));
   Val val = Rapids.exec("(sumaxis " + fr._key + " 1 1)");
   assertTrue(val instanceof ValFrame);
   Frame res = register(val.getFrame());
   assertEquals("Unexpected column name", "sum", res.name(0));
   assertEquals("Unexpected column type", Vec.T_NUM, res.types()[0]);
   assertColFrameEquals(ard(Double.NaN, Double.NaN, Double.NaN, Double.NaN, Double.NaN), res);
 }
Пример #28
0
 @Test
 public void testRowwisesumOnFrameWithTimeColumnsOnly() {
   Frame fr = register(new Frame(Key.<Frame>make(), ar("t1", "s", "t2"), aro(vt1, vs1, vt2)));
   Val val = Rapids.exec("(sumaxis " + fr._key + " 1 1)");
   assertTrue(val instanceof ValFrame);
   Frame res = register(val.getFrame());
   assertEquals("Unexpected column name", "sum", res.name(0));
   assertEquals("Unexpected column type", Vec.T_TIME, res.types()[0]);
   assertColFrameEquals(ard(30000000, 30000040, 30000060, 30000080, 30000120), res);
 }
Пример #29
0
 @Test
 public void testRowwisesumWithoutNaRm() {
   Frame fr =
       register(new Frame(Key.<Frame>make(), ar("i1", "d1", "d2", "d3"), aro(vi1, vd1, vd2, vd3)));
   Val val = Rapids.exec("(sumaxis " + fr._key + " 0 1)");
   assertTrue(val instanceof ValFrame);
   Frame res = register(val.getFrame());
   assertColFrameEquals(ard(1.7, 2.9, Double.NaN, 10.3, Double.NaN), res);
   assertEquals("sum", res.name(0));
 }
Пример #30
0
 @Override
 Val apply(Env env, Env.StackHelp stk, AST asts[]) {
   Frame fr = stk.track(asts[1].exec(env)).getFrame();
   if (fr.numCols() == 1 && fr.numRows() == 1) {
     if (fr.anyVec().isNumeric() || fr.anyVec().isBad()) return new ValNum(fr.anyVec().at(0));
     else if (fr.anyVec().isString())
       return new ValStr(fr.anyVec().atStr(new BufferedString(), 0).toString());
     return new ValStr(fr.domains()[0][(int) fr.anyVec().at8(0)]);
   }
   return new ValFrame(fr); // did not flatten
 }