Esempio n. 1
0
  /**
   * This test tests that our json deserialization is not too strict, as per HIVE-6166
   *
   * <p>i.e, if our schema is "s:struct<a:int,b:string>,k:int", and we pass in data that looks like
   * : { "x" : "abc" , "t" : { "a" : "1", "b" : "2", "c" : [ { "x" : 2 , "y" : 3 } , { "x" : 3 , "y"
   * : 2 } ] } , "s" : { "a" : 2 , "b" : "blah", "c": "woo" } }
   *
   * <p>Then it should still work, and ignore the "x" and "t" field and "c" subfield of "s", and it
   * should read k as null.
   */
  public void testLooseJsonReadability() throws Exception {
    Configuration conf = new Configuration();
    Properties props = new Properties();

    props.put(serdeConstants.LIST_COLUMNS, "s,k");
    props.put(serdeConstants.LIST_COLUMN_TYPES, "struct<a:int,b:string>,int");
    JsonSerDe rjsd = new JsonSerDe();
    SerDeUtils.initializeSerDe(rjsd, conf, props, null);

    Text jsonText =
        new Text(
            "{ \"x\" : \"abc\" , "
                + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ,"
                + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } }");
    List<Object> expected = new ArrayList<Object>();
    List<Object> inner = new ArrayList<Object>();
    inner.add(2);
    inner.add("blah");
    expected.add(inner);
    expected.add(null);
    HCatRecord expectedRecord = new DefaultHCatRecord(expected);

    HCatRecord r = (HCatRecord) rjsd.deserialize(jsonText);
    System.err.println("record : " + r.toString());

    assertTrue(HCatDataCheckUtil.recordsEqual(r, expectedRecord));
  }
Esempio n. 2
0
  public void testRW() throws Exception {

    Configuration conf = new Configuration();

    for (Entry<Properties, HCatRecord> e : getData().entrySet()) {
      Properties tblProps = e.getKey();
      HCatRecord r = e.getValue();

      HCatRecordSerDe hrsd = new HCatRecordSerDe();
      SerDeUtils.initializeSerDe(hrsd, conf, tblProps, null);

      LOG.info("ORIG: {}", r);

      Writable s = hrsd.serialize(r, hrsd.getObjectInspector());
      LOG.info("ONE: {}", s);

      HCatRecord r2 = (HCatRecord) hrsd.deserialize(s);
      Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, r2));

      // If it went through correctly, then s is also a HCatRecord,
      // and also equal to the above, and a deepcopy, and this holds
      // through for multiple levels more of serialization as well.

      Writable s2 = hrsd.serialize(s, hrsd.getObjectInspector());
      LOG.info("TWO: {}", s2);
      Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s));
      Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s2));

      // serialize using another serde, and read out that object repr.
      LazySimpleSerDe testSD = new LazySimpleSerDe();
      SerDeUtils.initializeSerDe(testSD, conf, tblProps, null);

      Writable s3 = testSD.serialize(s, hrsd.getObjectInspector());
      LOG.info("THREE: {}", s3);
      Object o3 = testSD.deserialize(s3);
      Assert.assertFalse(r.getClass().equals(o3.getClass()));

      // then serialize again using hrsd, and compare results
      HCatRecord s4 = (HCatRecord) hrsd.serialize(o3, testSD.getObjectInspector());
      LOG.info("FOUR: {}", s4);

      // Test LazyHCatRecord init and read
      LazyHCatRecord s5 = new LazyHCatRecord(o3, testSD.getObjectInspector());
      LOG.info("FIVE: {}", s5);

      LazyHCatRecord s6 = new LazyHCatRecord(s4, hrsd.getObjectInspector());
      LOG.info("SIX: {}", s6);
    }
  }