public void testReadCorruptFile() throws IOException, SerDeException { fs.delete(file, true); byte[][] record = {null, null, null, null, null, null, null, null}; RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(record.length); final int recCount = 100; Random rand = new Random(); for (int recIdx = 0; recIdx < recCount; recIdx++) { for (int i = 0; i < record.length; i++) { record[i] = new Integer(rand.nextInt()).toString().getBytes("UTF-8"); } for (int i = 0; i < record.length; i++) { BytesRefWritable cu = new BytesRefWritable(record[i], 0, record[i].length); bytes.set(i, cu); } writer.append(bytes); bytes.clear(); } writer.close(); // Insert junk in middle of file. Assumes file is on local disk. RandomAccessFile raf = new RandomAccessFile(file.toUri().getPath(), "rw"); long corruptOffset = raf.length() / 2; LOG.info("corrupting " + raf + " at offset " + corruptOffset); raf.seek(corruptOffset); raf.writeBytes("junkjunkjunkjunkjunkjunkjunkjunk"); raf.close(); // Set the option for tolerating corruptions. The read should succeed. Configuration tmpConf = new Configuration(conf); tmpConf.setBoolean("hive.io.rcfile.tolerate.corruptions", true); RCFile.Reader reader = new RCFile.Reader(fs, file, tmpConf); LongWritable rowID = new LongWritable(); while (true) { boolean more = reader.next(rowID); if (!more) { break; } BytesRefArrayWritable cols = new BytesRefArrayWritable(); reader.getCurrentRow(cols); cols.resetValid(8); } reader.close(); }
public void testSimpleReadAndWrite() throws IOException, SerDeException { fs.delete(file, true); byte[][] record_1 = { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") }; byte[][] record_2 = { "100".getBytes("UTF-8"), "200".getBytes("UTF-8"), "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") }; RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length); for (int i = 0; i < record_1.length; i++) { BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length); bytes.set(i, cu); } writer.append(bytes); bytes.clear(); for (int i = 0; i < record_2.length; i++) { BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length); bytes.set(i, cu); } writer.append(bytes); writer.close(); Object[] expectedRecord_1 = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null }; Object[] expectedRecord_2 = { new ByteWritable((byte) 100), new ShortWritable((short) 200), new IntWritable(123), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null }; RCFile.Reader reader = new RCFile.Reader(fs, file, conf); LongWritable rowID = new LongWritable(); for (int i = 0; i < 2; i++) { reader.next(rowID); BytesRefArrayWritable cols = new BytesRefArrayWritable(); reader.getCurrentRow(cols); cols.resetValid(8); Object row = serDe.deserialize(cols); StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); assertEquals("Field size should be 8", 8, fieldRefs.size()); for (int j = 0; j < fieldRefs.size(); j++) { Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j)); Object standardWritableData = ObjectInspectorUtils.copyToStandardObject( fieldData, fieldRefs.get(j).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE); if (i == 0) { assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]); } else { assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]); } } } reader.close(); }