/** * This test tests that our json deserialization is not too strict, as per HIVE-6166 * * <p>i.e, if our schema is "s:struct<a:int,b:string>,k:int", and we pass in data that looks like * : { "x" : "abc" , "t" : { "a" : "1", "b" : "2", "c" : [ { "x" : 2 , "y" : 3 } , { "x" : 3 , "y" * : 2 } ] } , "s" : { "a" : 2 , "b" : "blah", "c": "woo" } } * * <p>Then it should still work, and ignore the "x" and "t" field and "c" subfield of "s", and it * should read k as null. */ public void testLooseJsonReadability() throws Exception { Configuration conf = new Configuration(); Properties props = new Properties(); props.put(serdeConstants.LIST_COLUMNS, "s,k"); props.put(serdeConstants.LIST_COLUMN_TYPES, "struct<a:int,b:string>,int"); JsonSerDe rjsd = new JsonSerDe(); SerDeUtils.initializeSerDe(rjsd, conf, props, null); Text jsonText = new Text( "{ \"x\" : \"abc\" , " + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ," + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } }"); List<Object> expected = new ArrayList<Object>(); List<Object> inner = new ArrayList<Object>(); inner.add(2); inner.add("blah"); expected.add(inner); expected.add(null); HCatRecord expectedRecord = new DefaultHCatRecord(expected); HCatRecord r = (HCatRecord) rjsd.deserialize(jsonText); System.err.println("record : " + r.toString()); assertTrue(HCatDataCheckUtil.recordsEqual(r, expectedRecord)); }
public void testRW() throws Exception { Configuration conf = new Configuration(); for (Entry<Properties, HCatRecord> e : getData().entrySet()) { Properties tblProps = e.getKey(); HCatRecord r = e.getValue(); HCatRecordSerDe hrsd = new HCatRecordSerDe(); SerDeUtils.initializeSerDe(hrsd, conf, tblProps, null); LOG.info("ORIG: {}", r); Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); LOG.info("ONE: {}", s); HCatRecord r2 = (HCatRecord) hrsd.deserialize(s); Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, r2)); // If it went through correctly, then s is also a HCatRecord, // and also equal to the above, and a deepcopy, and this holds // through for multiple levels more of serialization as well. Writable s2 = hrsd.serialize(s, hrsd.getObjectInspector()); LOG.info("TWO: {}", s2); Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s)); Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s2)); // serialize using another serde, and read out that object repr. LazySimpleSerDe testSD = new LazySimpleSerDe(); SerDeUtils.initializeSerDe(testSD, conf, tblProps, null); Writable s3 = testSD.serialize(s, hrsd.getObjectInspector()); LOG.info("THREE: {}", s3); Object o3 = testSD.deserialize(s3); Assert.assertFalse(r.getClass().equals(o3.getClass())); // then serialize again using hrsd, and compare results HCatRecord s4 = (HCatRecord) hrsd.serialize(o3, testSD.getObjectInspector()); LOG.info("FOUR: {}", s4); // Test LazyHCatRecord init and read LazyHCatRecord s5 = new LazyHCatRecord(o3, testSD.getObjectInspector()); LOG.info("FIVE: {}", s5); LazyHCatRecord s6 = new LazyHCatRecord(s4, hrsd.getObjectInspector()); LOG.info("SIX: {}", s6); } }