public void determineMapCollectionType(Schema input, Properties prop) throws FrontendException { byte keyType = input.getField(0).type; byte valueType = input.getField(1).type; if (keyType == DataType.INTEGER) { if (valueType == DataType.INTEGER) { setArrayTypeProperty(prop, PigCollection.INT_INT_MAP); } else if (valueType == DataType.FLOAT) { setArrayTypeProperty(prop, PigCollection.INT_FLOAT_MAP); } else { throw new RuntimeException("Value type for map-type PigCollection should be int or float"); } } else if (keyType == DataType.CHARARRAY) { if (valueType == DataType.INTEGER) { setArrayTypeProperty(prop, PigCollection.STRING_INT_MAP); } else if (valueType == DataType.FLOAT) { setArrayTypeProperty(prop, PigCollection.STRING_FLOAT_MAP); } else { throw new RuntimeException("Value type for map-type PigCollection should be int or float"); } } else { throw new RuntimeException( "Recieved bag of unsupported schema. " + "Bags with two-element tuples must have schema {(int/chararray, int/float/float)}"); } }
@Test public void readWithoutSchemaTestSchema() throws IOException { registerLoadQuery(IntWritableConverter.class, TextConverter.class, null); Schema schema = pigServer.dumpSchema("A"); Assert.assertNotNull(schema); Assert.assertEquals("key", schema.getField(0).alias); Assert.assertEquals(DataType.INTEGER, schema.getField(0).type); Assert.assertEquals("value", schema.getField(1).alias); Assert.assertEquals(DataType.CHARARRAY, schema.getField(1).type); }
@Test // See PIG-730 public void testMergeSchemaWithTwoLevelAccess1() throws Exception { // Generate two schemas Schema s1 = Utils.getSchemaFromString("a:{t:(a0:int, a1:int)}"); Schema s2 = Utils.getSchemaFromString("b:{t:(b0:int, b1:int)}"); s1.getField(0).schema.setTwoLevelAccessRequired(true); s2.getField(0).schema.setTwoLevelAccessRequired(true); Schema s3 = Schema.mergeSchema(s1, s2, true); Assert.assertEquals(s3.getField(0).schema.isTwoLevelAccessRequired(), true); }
public void determineArrayCollectionType(Schema input, Properties prop) throws FrontendException { byte type = input.getField(0).type; for (int i = 1; i < input.size(); i++) { if (type != input.getField(i).type) { throw new RuntimeException("All inputs must have the same type"); } } if (type == DataType.INTEGER) { setArrayTypeProperty(prop, PigCollection.INT_ARRAY); } else if (type == DataType.FLOAT) { setArrayTypeProperty(prop, PigCollection.FLOAT_ARRAY); } else { throw new RuntimeException("Recieved vector of unsupported schema. Should be ints or floats"); } }
@Override public Schema outputSchema(Schema input) { try { return new Schema( new Schema.FieldSchema( getSchemaName(this.getClass().getName().toLowerCase(), input), input.getField(0).schema, DataType.BAG)); } catch (FrontendException e) { return null; } }
public void determineSetCollectionType(Schema input, Properties prop) throws FrontendException { byte type = input.getField(0).type; if (type == DataType.INTEGER) { setArrayTypeProperty(prop, PigCollection.INT_SET); } else if (type == DataType.CHARARRAY) { setArrayTypeProperty(prop, PigCollection.STRING_SET); } else { throw new RuntimeException( "Recieved bag of unsupported schema. " + "Bags with two-element tuples must have schema {(int/chararray)}"); } }
@Override public Schema outputSchema(Schema input) { try { if (input.getFields().size() != 1 || input.getField(0).type != DataType.BAG) { throw new RuntimeException("expect input {bag}"); } Schema bag = input.getField(0).schema.getField(0).schema; if (bag.getFields().size() < 4 || bag.getField(0).type != DataType.CHARARRAY || bag.getField(1).type != DataType.CHARARRAY || bag.getField(2).type != DataType.DOUBLE || bag.getField(3).type != DataType.CHARARRAY) { throw new RuntimeException( "expect input {userid:chararray, " + "md:chararray, weight:double, cls:chararray}"); } Schema result = new Schema(); result.add(new FieldSchema("cls", DataType.CHARARRAY)); result.add(new FieldSchema("weight", DataType.DOUBLE)); return result; } catch (Exception e) { throw new RuntimeException(e); } }
@Override public Schema outputSchema(Schema input) { try { if (input.size() != 1) { throw new RuntimeException("Expected input to have only a single field"); } Schema.FieldSchema inputFieldSchema = input.getField(0); if (inputFieldSchema.type != DataType.BAG) { throw new RuntimeException("Expected a BAG as input"); } return new Schema(new Schema.FieldSchema(null, DataType.LONG)); } catch (FrontendException e) { throw new RuntimeException(e); } }
@Override public Schema outputSchema(Schema input) { try { Schema.FieldSchema inputFieldSchema = input.getField(0); if (inputFieldSchema.type != DataType.BAG) { throw new RuntimeException("Expected a BAG as input"); } return new Schema( new Schema.FieldSchema( getSchemaName(this.getClass().getName().toLowerCase(), input), inputFieldSchema.schema, DataType.BAG)); } catch (FrontendException e) { e.printStackTrace(); throw new RuntimeException(e); } }
public Schema outputSchema(Schema input) { try { Properties prop = UDFContext.getUDFContext().getUDFProperties(this.getClass()); String outputAlias = null; if (input.size() == 1) { Schema.FieldSchema onlyField = input.getField(0); outputAlias = onlyField.alias; if (onlyField.type == DataType.TUPLE) { prop.setProperty(INPUT_TYPE_SIGNATURE, new Byte(INPUT_TUPLE_FIELD).toString()); determineArrayCollectionType(onlyField.schema, prop); } else if (onlyField.type == DataType.BAG) { prop.setProperty(INPUT_TYPE_SIGNATURE, new Byte(INPUT_BAG_FIELD).toString()); Schema tupleSchema = onlyField.schema.getField(0).schema; if (tupleSchema.size() == 1) { determineSetCollectionType(tupleSchema, prop); } else if (tupleSchema.size() == 2) { determineMapCollectionType(tupleSchema, prop); } else { throw new RuntimeException( "Bag must have either single-element tuples (set) " + "or two-element tuples (key, value) to be encoded as a PigArray."); } } } else { prop.setProperty(INPUT_TYPE_SIGNATURE, new Byte(INPUT_SEVERAL_FIELDS).toString()); determineArrayCollectionType(input, prop); } return new Schema( new Schema.FieldSchema( outputAlias == null ? "pig_collection" : outputAlias, DataType.BYTEARRAY)); } catch (FrontendException e) { throw new RuntimeException(e); } }
@Test public void schema() throws Exception { EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates(); Schema schema = func.outputSchema(null); Assert.assertNotNull(schema); Assert.assertEquals(schema.size(), 1); Assert.assertEquals(schema.getField(0).type, DataType.BAG); Assert.assertEquals(schema.getField(0).schema.size(), 1); Assert.assertEquals(schema.getField(0).schema.getField(0).type, DataType.TUPLE); Assert.assertEquals(schema.getField(0).schema.getField(0).schema.size(), 4); Assert.assertEquals( schema.getField(0).schema.getField(0).schema.getField(0).type, DataType.CHARARRAY); Assert.assertEquals( schema.getField(0).schema.getField(0).schema.getField(1).type, DataType.LONG); Assert.assertEquals( schema.getField(0).schema.getField(0).schema.getField(2).type, DataType.LONG); Assert.assertEquals( schema.getField(0).schema.getField(0).schema.getField(3).type, DataType.LONG); }
@SuppressWarnings("null") @Test public void outputSchemaTest() throws IOException { EvalFunc<String> udf = new SketchToString(); Schema inputSchema = null; Schema.FieldSchema inputFieldSchema = new Schema.FieldSchema("Sketch", DataType.BYTEARRAY); Schema nullOutputSchema = null; Schema outputSchema = null; Schema.FieldSchema outputOuterFs0 = null; Schema outputInnerSchema = null; Schema.FieldSchema outputInnerFs0 = null; inputSchema = new Schema(inputFieldSchema); nullOutputSchema = udf.outputSchema(null); outputSchema = udf.outputSchema(inputSchema); outputOuterFs0 = outputSchema.getField(0); outputInnerSchema = outputOuterFs0.schema; outputInnerFs0 = outputInnerSchema.getField(0); Assert.assertNull(nullOutputSchema, "Should be null"); Assert.assertNotNull(outputOuterFs0, "outputSchema.getField(0) schema may not be null"); String expected = "tuple"; String result = DataType.findTypeName(outputOuterFs0.type); Assert.assertEquals(result, expected); expected = "chararray"; Assert.assertNotNull(outputInnerFs0, "innerSchema.getField(0) schema may not be null"); result = DataType.findTypeName(outputInnerFs0.type); Assert.assertEquals(result, expected); // print schemas // @formatter:off StringBuilder sb = new StringBuilder(); sb.append("input schema: ") .append(inputSchema) .append(LS) .append("output schema: ") .append(outputSchema) .append(LS) .append("outputOuterFs: ") .append(outputOuterFs0) .append(", type: ") .append(DataType.findTypeName(outputOuterFs0.type)) .append(LS) .append("outputInnerSchema: ") .append(outputInnerSchema) .append(LS) .append("outputInnerFs0: ") .append(outputInnerFs0) .append(", type: ") .append(DataType.findTypeName(outputInnerFs0.type)) .append(LS); println(sb.toString()); // @formatter:on // end print schemas }