Example #1
0
  public void testMapValues() throws Exception {
    Configuration conf = new Configuration();
    Properties props = new Properties();

    props.put(serdeConstants.LIST_COLUMNS, "a,b");
    props.put(serdeConstants.LIST_COLUMN_TYPES, "array<string>,map<string,int>");
    JsonSerDe rjsd = new JsonSerDe();
    SerDeUtils.initializeSerDe(rjsd, conf, props, null);

    Text text1 = new Text("{ \"a\":[\"aaa\"],\"b\":{\"bbb\":1}} ");
    Text text2 = new Text("{\"a\":[\"yyy\"],\"b\":{\"zzz\":123}}");
    Text text3 = new Text("{\"a\":[\"a\"],\"b\":{\"x\":11, \"y\": 22, \"z\": null}}");

    HCatRecord expected1 =
        new DefaultHCatRecord(
            Arrays.<Object>asList(
                Arrays.<String>asList("aaa"), createHashMapStringInteger("bbb", 1)));
    HCatRecord expected2 =
        new DefaultHCatRecord(
            Arrays.<Object>asList(
                Arrays.<String>asList("yyy"), createHashMapStringInteger("zzz", 123)));
    HCatRecord expected3 =
        new DefaultHCatRecord(
            Arrays.<Object>asList(
                Arrays.<String>asList("a"),
                createHashMapStringInteger("x", 11, "y", 22, "z", null)));

    assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord) rjsd.deserialize(text1), expected1));
    assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord) rjsd.deserialize(text2), expected2));
  }
Example #2
0
  /**
   * This test tests that our json deserialization is not too strict, as per HIVE-6166
   *
   * <p>i.e, if our schema is "s:struct<a:int,b:string>,k:int", and we pass in data that looks like
   * : { "x" : "abc" , "t" : { "a" : "1", "b" : "2", "c" : [ { "x" : 2 , "y" : 3 } , { "x" : 3 , "y"
   * : 2 } ] } , "s" : { "a" : 2 , "b" : "blah", "c": "woo" } }
   *
   * <p>Then it should still work, and ignore the "x" and "t" field and "c" subfield of "s", and it
   * should read k as null.
   */
  public void testLooseJsonReadability() throws Exception {
    Configuration conf = new Configuration();
    Properties props = new Properties();

    props.put(serdeConstants.LIST_COLUMNS, "s,k");
    props.put(serdeConstants.LIST_COLUMN_TYPES, "struct<a:int,b:string>,int");
    JsonSerDe rjsd = new JsonSerDe();
    SerDeUtils.initializeSerDe(rjsd, conf, props, null);

    Text jsonText =
        new Text(
            "{ \"x\" : \"abc\" , "
                + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ,"
                + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } }");
    List<Object> expected = new ArrayList<Object>();
    List<Object> inner = new ArrayList<Object>();
    inner.add(2);
    inner.add("blah");
    expected.add(inner);
    expected.add(null);
    HCatRecord expectedRecord = new DefaultHCatRecord(expected);

    HCatRecord r = (HCatRecord) rjsd.deserialize(jsonText);
    System.err.println("record : " + r.toString());

    assertTrue(HCatDataCheckUtil.recordsEqual(r, expectedRecord));
  }
Example #3
0
  public void testRW() throws Exception {

    Configuration conf = new Configuration();

    for (Pair<Properties, HCatRecord> e : getData()) {
      Properties tblProps = e.first;
      HCatRecord r = e.second;

      HCatRecordSerDe hrsd = new HCatRecordSerDe();
      SerDeUtils.initializeSerDe(hrsd, conf, tblProps, null);

      JsonSerDe jsde = new JsonSerDe();
      SerDeUtils.initializeSerDe(jsde, conf, tblProps, null);

      LOG.info("ORIG:{}", r);

      Writable s = hrsd.serialize(r, hrsd.getObjectInspector());
      LOG.info("ONE:{}", s);

      Object o1 = hrsd.deserialize(s);
      StringBuilder msg = new StringBuilder();
      boolean isEqual = HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o1);
      assertTrue(msg.toString(), isEqual);

      Writable s2 = jsde.serialize(o1, hrsd.getObjectInspector());
      LOG.info("TWO:{}", s2);
      Object o2 = jsde.deserialize(s2);
      LOG.info("deserialized TWO : {} ", o2);
      msg.setLength(0);
      isEqual = HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2, msg);
      assertTrue(msg.toString(), isEqual);
    }
  }
Example #4
0
  public void testUpperCaseKey() throws Exception {
    Configuration conf = new Configuration();
    Properties props = new Properties();

    props.put(serdeConstants.LIST_COLUMNS, "empid,name");
    props.put(serdeConstants.LIST_COLUMN_TYPES, "int,string");
    JsonSerDe rjsd = new JsonSerDe();
    SerDeUtils.initializeSerDe(rjsd, conf, props, null);

    Text text1 = new Text("{ \"empId\" : 123, \"name\" : \"John\" } ");
    Text text2 = new Text("{ \"empId\" : 456, \"name\" : \"Jane\" } ");

    HCatRecord expected1 = new DefaultHCatRecord(Arrays.<Object>asList(123, "John"));
    HCatRecord expected2 = new DefaultHCatRecord(Arrays.<Object>asList(456, "Jane"));

    assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord) rjsd.deserialize(text1), expected1));
    assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord) rjsd.deserialize(text2), expected2));
  }
Example #5
0
  public void testRobustRead() throws Exception {
    /**
     * This test has been added to account for HCATALOG-436 We write out columns with "internal
     * column names" such as "_col0", but try to read with regular column names.
     */
    Configuration conf = new Configuration();

    for (Pair<Properties, HCatRecord> e : getData()) {
      Properties tblProps = e.first;
      HCatRecord r = e.second;

      Properties internalTblProps = new Properties();
      for (Map.Entry pe : tblProps.entrySet()) {
        if (!pe.getKey().equals(serdeConstants.LIST_COLUMNS)) {
          internalTblProps.put(pe.getKey(), pe.getValue());
        } else {
          internalTblProps.put(pe.getKey(), getInternalNames((String) pe.getValue()));
        }
      }

      LOG.info("orig tbl props:{}", tblProps);
      LOG.info("modif tbl props:{}", internalTblProps);

      JsonSerDe wjsd = new JsonSerDe();
      SerDeUtils.initializeSerDe(wjsd, conf, internalTblProps, null);

      JsonSerDe rjsd = new JsonSerDe();
      SerDeUtils.initializeSerDe(rjsd, conf, tblProps, null);

      LOG.info("ORIG:{}", r);

      Writable s = wjsd.serialize(r, wjsd.getObjectInspector());
      LOG.info("ONE:{}", s);

      Object o1 = wjsd.deserialize(s);
      LOG.info("deserialized ONE : {} ", o1);

      Object o2 = rjsd.deserialize(s);
      LOG.info("deserialized TWO : {} ", o2);
      StringBuilder msg = new StringBuilder();
      boolean isEqual = HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2, msg);
      assertTrue(msg.toString(), isEqual);
    }
  }