예제 #1
0
  @Test
  public void testCatGroup() {
    Frame fr = null;
    String tree =
        "(GB hex [4] nrow 0 \"all\" mean 2 \"all\")"; // Group-By on col 4, no order-by, nrow and
    // mean of col 2
    try {
      fr = chkTree(tree, "smalldata/iris/iris_wheader.csv");
      chkDim(fr, 3, 3);
      chkFr(fr, 0, 0, "Iris-setosa");
      chkFr(fr, 1, 0, 50);
      chkFr(fr, 2, 0, 1.464);
      chkFr(fr, 0, 1, "Iris-versicolor");
      chkFr(fr, 1, 1, 50);
      chkFr(fr, 2, 1, 4.26);
      chkFr(fr, 0, 2, "Iris-virginica");
      chkFr(fr, 1, 2, 50);
      chkFr(fr, 2, 2, 5.552);
      fr.delete();

      fr = chkTree("(GB hex [1] mode 4 \"all\" )", "smalldata/iris/iris_wheader.csv");
      chkDim(fr, 2, 23);

    } finally {
      if (fr != null) fr.delete();
      Keyed.remove(Key.make("hex"));
    }
  }
예제 #2
0
 static Frame exec_str(String str, String id) {
   Val val = Exec.exec(str);
   switch (val.type()) {
     case Val.FRM:
       Frame fr = val.getFrame();
       Key k = Key.make(id);
       // Smart delete any prior top-level result
       Iced i = DKV.getGet(k);
       if (i instanceof Lockable) ((Lockable) i).delete();
       else if (i instanceof Keyed) ((Keyed) i).remove();
       else if (i != null)
         throw new IllegalArgumentException("Attempting to overright an unexpected key");
       DKV.put(fr = new Frame(k, fr._names, fr.vecs()));
       System.out.println(fr);
       checkSaneFrame();
       return fr;
     case Val.NUM:
       System.out.println("num= " + val.getNum());
       assert id == null;
       checkSaneFrame();
       return null;
     case Val.STR:
       System.out.println("str= " + val.getStr());
       assert id == null;
       checkSaneFrame();
       return null;
     default:
       throw water.H2O.fail();
   }
 }
예제 #3
0
  @Test
  public void testBasicDdply() {
    Frame fr = null;
    String tree =
        "(ddply hex [1] { x . (mean (cols x 2) TRUE)})"; // Group-By on col 1 (not 0) mean of col 2
    try {
      fr = chkTree(tree, "smalldata/iris/iris_wheader.csv");
      chkDim(fr, 2, 23);
      chkFr(fr, 0, 0, 2.0); // Group 2.0, mean is 3.5
      chkFr(fr, 1, 0, 3.5);
      chkFr(fr, 0, 1, 2.2); // Group 2.2, mean is 4.5
      chkFr(fr, 1, 1, 4.5);
      chkFr(fr, 0, 7, 2.8); // Group 2.8, mean is 5.043, largest group
      chkFr(fr, 1, 7, 5.042857142857143);
      chkFr(fr, 0, 22, 4.4); // Group 4.4, mean is 1.5, last group
      chkFr(fr, 1, 22, 1.5);
      fr.delete();

      fr =
          chkTree(
              "(ddply hex [1] { x . (sum (* (cols x 2) (cols x 3)))})",
              "smalldata/iris/iris_wheader.csv");
      chkDim(fr, 2, 23);

    } finally {
      if (fr != null) fr.delete();
      Keyed.remove(Key.make("hex"));
    }
  }
예제 #4
0
  @Test
  public void testImpute() {
    Frame fr = null;
    try {
      // Impute fuel economy via the "mean" method, no.
      String tree = "(h2o.impute hex 1 \"mean\" \"low\" [])";
      fr = chkTree(tree, "smalldata/junit/cars.csv");
      chkDim(fr, 8, 406);

      Assert.assertEquals(0, fr.vec(1).naCnt()); // No NAs anymore
      Assert.assertEquals(23.51, fr.vec(1).at(26), 1e-1); // Row 26 was an NA, now as mean economy
      fr.delete();

      // Impute fuel economy via the "mean" method, after grouping by year.  Update in place.
      tree = "(h2o.impute hex 1 \"mean\" \"low\" [7])";
      fr = chkTree(tree, "smalldata/junit/cars.csv");
      chkDim(fr, 8, 406);

      Assert.assertEquals(0, fr.vec(1).naCnt()); // No NAs anymore
      Assert.assertEquals(
          17.69, fr.vec(1).at(26), 1e-1); // Row 26 was an NA, now as 1970 mean economy

    } finally {
      if (fr != null) fr.delete();
      Keyed.remove(Key.make("hex"));
    }
  }
예제 #5
0
  @Test
  public void testAllAggs() {
    Frame fr = null;
    try {
      String tree =
          "(GB hex [4] nrow 0 \"rm\"  mean 1 \"rm\"  sum 1 \"rm\"  min 1 \"rm\"  max 1 \"rm\" )";
      fr = chkTree(tree, "smalldata/iris/iris_wheader.csv");
      chkDim(fr, 6, 3);

      chkFr(fr, 0, 0, "Iris-setosa");
      chkFr(fr, 1, 0, 50); // nrow
      chkFr(fr, 2, 0, 3.418); // mean
      chkFr(fr, 3, 0, 170.9); // sum
      chkFr(fr, 4, 0, 2.3); // min
      chkFr(fr, 5, 0, 4.4); // max

      chkFr(fr, 0, 1, "Iris-versicolor");
      chkFr(fr, 1, 1, 50); // nrow
      chkFr(fr, 2, 1, 2.770); // mean
      chkFr(fr, 3, 1, 138.5); // sum
      chkFr(fr, 4, 1, 2.0); // min
      chkFr(fr, 5, 1, 3.4); // max

      chkFr(fr, 0, 2, "Iris-virginica");
      chkFr(fr, 1, 2, 50); // nrow
      chkFr(fr, 2, 2, 2.974); // mean
      chkFr(fr, 3, 2, 148.7); // sum
      chkFr(fr, 4, 2, 2.2); // min
      chkFr(fr, 5, 2, 3.8); // max

    } finally {
      if (fr != null) fr.delete();
      Keyed.remove(Key.make("hex"));
    }
  }
예제 #6
0
  @Test
  public void testNAHandle() {
    Frame fr = null;
    try {
      String tree =
          "(GB hex [7] nrow 0 \"all\" mean 1 \"all\")"; // Group-By on year, no order-by, mean of
      // economy
      fr = chkTree(tree, "smalldata/junit/cars.csv");
      chkDim(fr, 3, 13);

      chkFr(fr, 0, 0, 70); // 1970, 35 cars, NA in economy
      chkFr(fr, 1, 0, 35);
      chkFr(fr, 2, 0, Double.NaN);

      chkFr(fr, 0, 2, 72); // 1972, 28 cars, 18.714 in economy
      chkFr(fr, 1, 2, 28);
      chkFr(fr, 2, 2, 18.714, 1e-1);
      fr.delete();

      tree = "(GB hex [7] nrow 1 \"all\" nrow 1 \"rm\" nrow 1 \"ignore\")"; // Group-By on year, no
      // order-by, nrow of
      // economy
      fr = chkTree(tree, "smalldata/junit/cars.csv");
      chkDim(fr, 4, 13);
      chkFr(fr, 0, 0, 70); // 1970, 35 cars, 29 have economy
      chkFr(fr, 1, 0, 35); // ALL
      chkFr(fr, 2, 0, 29); // RM
      chkFr(fr, 3, 0, 29); // IGNORE
      fr.delete();

      tree = "(GB hex [7] mean 1 \"all\" mean 1 \"rm\" mean 1 \"ignore\")"; // Group-By on year, no
      // order-by, mean of
      // economy
      fr = chkTree(tree, "smalldata/junit/cars.csv");
      chkDim(fr, 4, 13);
      chkFr(fr, 0, 0, 70); // 1970, 35 cars, 29 have economy
      chkFr(fr, 1, 0, Double.NaN); // ALL
      chkFr(fr, 2, 0, 17.69, 1e-1); // RM
      chkFr(fr, 3, 0, 14.66, 1e-1); // IGNORE

    } finally {
      if (fr != null) fr.delete();
      Keyed.remove(Key.make("hex"));
    }
  }
예제 #7
0
  @Test
  public void testBasic() {
    Frame fr = null;
    String tree =
        "(GB hex [1] mean 2 \"all\")"; // Group-By on col 1 (not 0), no order-by, mean of col 2
    try {
      fr = chkTree(tree, "smalldata/iris/iris_wheader.csv");
      chkDim(fr, 2, 23);
      chkFr(fr, 0, 0, 2.0); // Group 2.0, mean is 3.5
      chkFr(fr, 1, 0, 3.5);
      chkFr(fr, 0, 1, 2.2); // Group 2.2, mean is 4.5
      chkFr(fr, 1, 1, 4.5);
      chkFr(fr, 0, 7, 2.8); // Group 2.8, mean is 5.043, largest group
      chkFr(fr, 1, 7, 5.042857142857143);
      chkFr(fr, 0, 22, 4.4); // Group 4.4, mean is 1.5, last group
      chkFr(fr, 1, 22, 1.5);

    } finally {
      if (fr != null) fr.delete();
      Keyed.remove(Key.make("hex"));
    }
  }
예제 #8
0
  @Test
  public void testChicago() {
    Frame weather = null, crimes = null, census = null;
    String oldtz = Exec.exec("(getTimeZone)").getStr();
    try {
      weather = parse_test_file(Key.make("weather.hex"), "smalldata/chicago/chicagoAllWeather.csv");
      crimes =
          parse_test_file(Key.make("crimes.hex"), "smalldata/chicago/chicagoCrimes10k.csv.zip");
      String fname = "smalldata/chicago/chicagoCensus.csv";
      File f = find_test_file(fname);
      assert f != null && f.exists() : " file not found: " + fname;
      NFSFileVec nfs = NFSFileVec.make(f);
      ParseSetup ps = ParseSetup.guessSetup(new Key[] {nfs._key}, false, 1);
      ps.getColumnTypes()[1] = Vec.T_ENUM;
      census = ParseDataset.parse(Key.make("census.hex"), new Key[] {nfs._key}, true, ps);

      census =
          exec_str(
              "(colnames= census.hex [0 1 2 3 4 5 6 7 8] [\"Community.Area.Number\" \"COMMUNITY.AREA.NAME\" \"PERCENT.OF.HOUSING.CROWDED\" \"PERCENT.HOUSEHOLDS.BELOW.POVERTY\" \"PERCENT.AGED.16..UNEMPLOYED\" \"PERCENT.AGED.25..WITHOUT.HIGH.SCHOOL.DIPLOMA\" \"PERCENT.AGED.UNDER.18.OR.OVER.64\" \"PER.CAPITA.INCOME.\" \"HARDSHIP.INDEX\"])",
              "census.hex");

      crimes =
          exec_str(
              "(colnames= crimes.hex [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21] [\"ID\" \"Case.Number\" \"Date\" \"Block\" \"IUCR\" \"Primary.Type\" \"Description\" \"Location.Description\" \"Arrest\" \"Domestic\" \"Beat\" \"District\" \"Ward\" \"Community.Area\" \"FBI.Code\" \"X.Coordinate\" \"Y.Coordinate\" \"Year\" \"Updated.On\" \"Latitude\" \"Longitude\" \"Location\"])",
              "crimes.hex");

      exec_str("(setTimeZone \"Etc/UTC\")", null);

      crimes =
          exec_str(
              "(colnames= (= crimes.hex (tmp= unary_op_6 (day (tmp= nary_op_5 (as.Date (cols crimes.hex [2]) \"%m/%d/%Y %I:%M:%S %p\")))) [22] [0:9999]) 22 \"Day\")",
              "crimes.hex");

      crimes =
          exec_str(
              "(colnames= (= crimes.hex (tmp= binary_op_31 (+ (tmp= unary_op_7 (month nary_op_5)) #1)) [23] [0:9999]) 23 \"Month\")",
              "crimes.hex");

      Keyed.remove(Key.make("nary_op_30"));

      crimes =
          exec_str(
              "(colnames= (= crimes.hex (tmp= binary_op_32 (+ (tmp= binary_op_9 (- (tmp= unary_op_8 (year nary_op_5)) #1900)) #1900)) [17] [0:9999]) 17 \"Year\")",
              "crimes.hex");

      crimes =
          exec_str(
              "(colnames= (= crimes.hex (tmp= unary_op_10 (week nary_op_5)) [24] [0:9999]) 24 \"WeekNum\")",
              "crimes.hex");

      Keyed.remove(Key.make("binary_op_32"));
      Keyed.remove(Key.make("binary_op_31"));
      Keyed.remove(Key.make("unary_op_8"));
      checkSaneFrame();

      crimes =
          exec_str(
              "(colnames= (= crimes.hex (tmp= unary_op_11 (dayOfWeek nary_op_5)) [25] [0:9999]) 25 \"WeekDay\")",
              "crimes.hex");
      Keyed.remove(
          Key.make(
              "nfs:\\C:\\Users\\cliffc\\Desktop\\h2o-3\\smalldata\\chicago\\chicagoCrimes10k.csv.zip"));

      crimes =
          exec_str(
              "(colnames= (= crimes.hex (tmp= unary_op_12 (hour nary_op_5)) [26] [0:9999]) 26 \"HourOfDay\")",
              "crimes.hex");

      crimes =
          exec_str(
              "(colnames= (= crimes.hex (tmp= nary_op_16 (ifelse (tmp= binary_op_15 (| (tmp= binary_op_13 (== unary_op_11 \"Sun\")) (tmp= binary_op_14 (== unary_op_11 \"Sat\")))) 1 0)) [27] [0:9999]) 27 \"Weekend\")",
              "crimes.hex");

      // Season is incorrectly assigned in the original chicago demo; picks up the Weekend flag
      crimes =
          exec_str(
              "(colnames= (= crimes.hex nary_op_16 [28] [0:9999]) 28 \"Season\")", "crimes.hex");

      // Standard "head of 10 rows" pattern for printing
      Frame subset_33 = exec_str("(rows crimes.hex [0:10])", "subset_33");
      Keyed.remove(Key.make("subset_33"));

      Keyed.remove(Key.make("subset_33"));
      Keyed.remove(Key.make("unary_op_29"));
      Keyed.remove(Key.make("nary_op_28"));
      Keyed.remove(Key.make("nary_op_27"));
      Keyed.remove(Key.make("nary_op_26"));
      Keyed.remove(Key.make("binary_op_25"));
      Keyed.remove(Key.make("binary_op_24"));
      Keyed.remove(Key.make("binary_op_23"));
      Keyed.remove(Key.make("binary_op_22"));
      Keyed.remove(Key.make("binary_op_21"));
      Keyed.remove(Key.make("binary_op_20"));
      Keyed.remove(Key.make("binary_op_19"));
      Keyed.remove(Key.make("binary_op_18"));
      Keyed.remove(Key.make("binary_op_17"));
      Keyed.remove(Key.make("nary_op_16"));
      Keyed.remove(Key.make("binary_op_15"));
      Keyed.remove(Key.make("binary_op_14"));
      Keyed.remove(Key.make("binary_op_13"));
      Keyed.remove(Key.make("unary_op_12"));
      Keyed.remove(Key.make("unary_op_11"));
      Keyed.remove(Key.make("unary_op_10"));
      Keyed.remove(Key.make("binary_op_9"));
      Keyed.remove(Key.make("unary_op_8"));
      Keyed.remove(Key.make("unary_op_7"));
      Keyed.remove(Key.make("unary_op_6"));
      Keyed.remove(Key.make("nary_op_5"));
      checkSaneFrame();

      // Standard "head of 10 rows" pattern for printing
      Frame subset_34 = exec_str("(rows crimes.hex [0:10])", "subset_34");
      Keyed.remove(Key.make("subset_34"));

      census =
          exec_str(
              "(colnames= census.hex [0 1 2 3 4 5 6 7 8] [\"Community.Area\" \"COMMUNITY.AREA.NAME\" \"PERCENT.OF.HOUSING.CROWDED\" \"PERCENT.HOUSEHOLDS.BELOW.POVERTY\" \"PERCENT.AGED.16..UNEMPLOYED\" \"PERCENT.AGED.25..WITHOUT.HIGH.SCHOOL.DIPLOMA\" \"PERCENT.AGED.UNDER.18.OR.OVER.64\" \"PER.CAPITA.INCOME.\" \"HARDSHIP.INDEX\"])",
              "census.hex");
      Keyed.remove(Key.make("subset_34"));

      Frame subset_35 = exec_str("(cols  crimes.hex [-3])", "subset_35");
      Frame subset_36 = exec_str("(cols weather.hex [-1])", "subset_36");

      subset_36 =
          exec_str(
              "(colnames= subset_36 [0 1 2 3 4 5] [\"Month\" \"Day\" \"Year\" \"maxTemp\" \"meanTemp\" \"minTemp\"])",
              "subset_36");

      crimes.remove();
      weather.remove();

      // nary_op_37 = merge( X Y ); Vecs in X & nary_op_37 shared
      Frame nary_op_37 = exec_str("(merge subset_35 census.hex FALSE FALSE)", "nary_op_37");

      // nary_op_38 = merge( nary_op_37 subset_36); Vecs in nary_op_38 and nary_pop_37 and X shared
      Frame subset_41 =
          exec_str(
              "(rows (tmp= nary_op_38 (merge nary_op_37 subset_36 TRUE FALSE)) (tmp= binary_op_40 (<= (tmp= nary_op_39 (h2o.runif nary_op_38 30792152736.5179)) #0.8)))",
              "subset_41");

      // Standard "head of 10 rows" pattern for printing
      Frame subset_44 = exec_str("(rows subset_41 [0:10])", "subset_44");
      Keyed.remove(Key.make("subset_44"));
      Keyed.remove(Key.make("subset_44"));
      Keyed.remove(Key.make("binary_op_40"));
      Keyed.remove(Key.make("nary_op_37"));

      Frame subset_43 =
          exec_str("(rows nary_op_38 (tmp= binary_op_42 (> nary_op_39 #0.8)))", "subset_43");

      // Chicago demo continues on past, but this is all I've captured for now

      checkSaneFrame();

    } finally {
      Exec.exec(
          "(setTimeZone \""
              + oldtz
              + "\")"); // Restore time zone (which is global, and will affect following tests)
      if (weather != null) weather.remove();
      if (crimes != null) crimes.remove();
      if (census != null) census.remove();

      for (String s :
          new String[] {
            "nary_op_5",
            "unary_op_6",
            "unary_op_7",
            "unary_op_8",
            "binary_op_9",
            "unary_op_10",
            "unary_op_11",
            "unary_op_12",
            "binary_op_13",
            "binary_op_14",
            "binary_op_15",
            "nary_op_16",
            "binary_op_17",
            "binary_op_18",
            "binary_op_19",
            "binary_op_20",
            "binary_op_21",
            "binary_op_22",
            "binary_op_23",
            "binary_op_24",
            "binary_op_25",
            "nary_op_26",
            "nary_op_27",
            "nary_op_28",
            "unary_op_29",
            "binary_op_30",
            "binary_op_31",
            "binary_op_32",
            "subset_33",
            "subset_34",
            "subset_35",
            "subset_36",
            "nary_op_37",
            "nary_op_38",
            "nary_op_39",
            "binary_op_40",
            "subset_41",
            "binary_op_42",
            "subset_43",
            "subset_44",
          }) Keyed.remove(Key.make(s));
    }
  }