@Override public int run(String[] args) throws Exception { Job job = new Job(getConf(), "HAWQParquetOutputFormat"); job.setJarByClass(HAWQParquetOutputDriver.class); job.setOutputFormatClass(HAWQParquetOutputFormat.class); /* // int2 int4 int8 HAWQSchema schema = new HAWQSchema("t_int", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT2, "col_short"), HAWQSchema.optional_field(HAWQPrimitiveField.PrimitiveType.INT4, "col_int"), HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT8, "col_long") ); job.setMapperClass(WriteIntMapper.class); */ /* // varchar HAWQSchema schema = new HAWQSchema("t_varchar", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.VARCHAR, "col_varchar") ); job.setMapperClass(WriteVarcharMapper.class); */ /* // float4 float8 HAWQSchema schema = new HAWQSchema("t_floating", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT4, "col_float"), HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT8, "col_long") ); job.setMapperClass(WriteFloatingNumberMapper.class); */ // boolean // HAWQSchema schema = new HAWQSchema("t_boolean", // HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BOOL, "col_bool")); // job.setMapperClass(WriteBooleanMapper.class); // byte array HAWQSchema schema = new HAWQSchema( "t_bytea", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BYTEA, "col_bytea")); job.setMapperClass(WriteByteArrayMapper.class); HAWQParquetOutputFormat.setSchema(job, schema); FileInputFormat.addInputPath(job, new Path(args[0])); HAWQParquetOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Void.class); job.setMapOutputValueClass(HAWQRecord.class); return job.waitForCompletion(true) ? 0 : 1; }
private static class WriteByteArrayMapper extends Mapper<LongWritable, Text, Void, HAWQRecord> { private HAWQRecord record = HAWQParquetOutputFormat.newRecord(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Integer recordNum = Integer.parseInt(value.toString()); try { for (int i = 0; i < recordNum; i++) { record.reset(); record.setBytes(1, String.format("hello %d", i).getBytes()); context.write(null, record); } } catch (HAWQException e) { throw new IOException(e); } } }
private static class WriteFloatingNumberMapper extends Mapper<LongWritable, Text, Void, HAWQRecord> { private HAWQRecord record = HAWQParquetOutputFormat.newRecord(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Integer recordNum = Integer.parseInt(value.toString()); try { for (int i = 0; i < recordNum; i++) { record.reset(); record.setFloat(1, 1.0f * i); record.setDouble(2, 2 * Math.PI * i); context.write(null, record); } } catch (HAWQException e) { throw new IOException(e); } } }
private static class WriteIntMapper extends Mapper<LongWritable, Text, Void, HAWQRecord> { private HAWQRecord record = HAWQParquetOutputFormat.newRecord(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Integer recordNum = Integer.parseInt(value.toString()); try { for (int i = 0; i < recordNum; i++) { record.reset(); record.setShort(1, (short) (i + 1)); if (i % 2 == 0) { record.setInt(2, i); } record.setLong(3, i * 100); context.write(null, record); } } catch (HAWQException e) { throw new IOException(e); } } }