public void testMapValues() throws Exception { Configuration conf = new Configuration(); Properties props = new Properties(); props.put(serdeConstants.LIST_COLUMNS, "a,b"); props.put(serdeConstants.LIST_COLUMN_TYPES, "array<string>,map<string,int>"); JsonSerDe rjsd = new JsonSerDe(); SerDeUtils.initializeSerDe(rjsd, conf, props, null); Text text1 = new Text("{ \"a\":[\"aaa\"],\"b\":{\"bbb\":1}} "); Text text2 = new Text("{\"a\":[\"yyy\"],\"b\":{\"zzz\":123}}"); Text text3 = new Text("{\"a\":[\"a\"],\"b\":{\"x\":11, \"y\": 22, \"z\": null}}"); HCatRecord expected1 = new DefaultHCatRecord( Arrays.<Object>asList( Arrays.<String>asList("aaa"), createHashMapStringInteger("bbb", 1))); HCatRecord expected2 = new DefaultHCatRecord( Arrays.<Object>asList( Arrays.<String>asList("yyy"), createHashMapStringInteger("zzz", 123))); HCatRecord expected3 = new DefaultHCatRecord( Arrays.<Object>asList( Arrays.<String>asList("a"), createHashMapStringInteger("x", 11, "y", 22, "z", null))); assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord) rjsd.deserialize(text1), expected1)); assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord) rjsd.deserialize(text2), expected2)); }
/** * This test tests that our json deserialization is not too strict, as per HIVE-6166 * * <p>i.e, if our schema is "s:struct<a:int,b:string>,k:int", and we pass in data that looks like * : { "x" : "abc" , "t" : { "a" : "1", "b" : "2", "c" : [ { "x" : 2 , "y" : 3 } , { "x" : 3 , "y" * : 2 } ] } , "s" : { "a" : 2 , "b" : "blah", "c": "woo" } } * * <p>Then it should still work, and ignore the "x" and "t" field and "c" subfield of "s", and it * should read k as null. */ public void testLooseJsonReadability() throws Exception { Configuration conf = new Configuration(); Properties props = new Properties(); props.put(serdeConstants.LIST_COLUMNS, "s,k"); props.put(serdeConstants.LIST_COLUMN_TYPES, "struct<a:int,b:string>,int"); JsonSerDe rjsd = new JsonSerDe(); SerDeUtils.initializeSerDe(rjsd, conf, props, null); Text jsonText = new Text( "{ \"x\" : \"abc\" , " + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ," + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } }"); List<Object> expected = new ArrayList<Object>(); List<Object> inner = new ArrayList<Object>(); inner.add(2); inner.add("blah"); expected.add(inner); expected.add(null); HCatRecord expectedRecord = new DefaultHCatRecord(expected); HCatRecord r = (HCatRecord) rjsd.deserialize(jsonText); System.err.println("record : " + r.toString()); assertTrue(HCatDataCheckUtil.recordsEqual(r, expectedRecord)); }
public void testRW() throws Exception { Configuration conf = new Configuration(); for (Pair<Properties, HCatRecord> e : getData()) { Properties tblProps = e.first; HCatRecord r = e.second; HCatRecordSerDe hrsd = new HCatRecordSerDe(); SerDeUtils.initializeSerDe(hrsd, conf, tblProps, null); JsonSerDe jsde = new JsonSerDe(); SerDeUtils.initializeSerDe(jsde, conf, tblProps, null); LOG.info("ORIG:{}", r); Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); LOG.info("ONE:{}", s); Object o1 = hrsd.deserialize(s); StringBuilder msg = new StringBuilder(); boolean isEqual = HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o1); assertTrue(msg.toString(), isEqual); Writable s2 = jsde.serialize(o1, hrsd.getObjectInspector()); LOG.info("TWO:{}", s2); Object o2 = jsde.deserialize(s2); LOG.info("deserialized TWO : {} ", o2); msg.setLength(0); isEqual = HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2, msg); assertTrue(msg.toString(), isEqual); } }
public void testUpperCaseKey() throws Exception { Configuration conf = new Configuration(); Properties props = new Properties(); props.put(serdeConstants.LIST_COLUMNS, "empid,name"); props.put(serdeConstants.LIST_COLUMN_TYPES, "int,string"); JsonSerDe rjsd = new JsonSerDe(); SerDeUtils.initializeSerDe(rjsd, conf, props, null); Text text1 = new Text("{ \"empId\" : 123, \"name\" : \"John\" } "); Text text2 = new Text("{ \"empId\" : 456, \"name\" : \"Jane\" } "); HCatRecord expected1 = new DefaultHCatRecord(Arrays.<Object>asList(123, "John")); HCatRecord expected2 = new DefaultHCatRecord(Arrays.<Object>asList(456, "Jane")); assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord) rjsd.deserialize(text1), expected1)); assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord) rjsd.deserialize(text2), expected2)); }
public void testRobustRead() throws Exception { /** * This test has been added to account for HCATALOG-436 We write out columns with "internal * column names" such as "_col0", but try to read with regular column names. */ Configuration conf = new Configuration(); for (Pair<Properties, HCatRecord> e : getData()) { Properties tblProps = e.first; HCatRecord r = e.second; Properties internalTblProps = new Properties(); for (Map.Entry pe : tblProps.entrySet()) { if (!pe.getKey().equals(serdeConstants.LIST_COLUMNS)) { internalTblProps.put(pe.getKey(), pe.getValue()); } else { internalTblProps.put(pe.getKey(), getInternalNames((String) pe.getValue())); } } LOG.info("orig tbl props:{}", tblProps); LOG.info("modif tbl props:{}", internalTblProps); JsonSerDe wjsd = new JsonSerDe(); SerDeUtils.initializeSerDe(wjsd, conf, internalTblProps, null); JsonSerDe rjsd = new JsonSerDe(); SerDeUtils.initializeSerDe(rjsd, conf, tblProps, null); LOG.info("ORIG:{}", r); Writable s = wjsd.serialize(r, wjsd.getObjectInspector()); LOG.info("ONE:{}", s); Object o1 = wjsd.deserialize(s); LOG.info("deserialized ONE : {} ", o1); Object o2 = rjsd.deserialize(s); LOG.info("deserialized TWO : {} ", o2); StringBuilder msg = new StringBuilder(); boolean isEqual = HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2, msg); assertTrue(msg.toString(), isEqual); } }