@Override public Schema getOutputSchema(Schema input) { ArrayList<String> bagNames = new ArrayList<String>(input.size() / 2); Map<String, String> bagNameToJoinPrefix = new HashMap<String, String>(input.size() / 2); Map<String, Integer> bagNameToSize = new HashMap<String, Integer>(input.size() / 2); Schema outputSchema = null; Schema bagSchema = new Schema(); try { int i = 0; // all even fields should be bags, odd fields are key names String bagName = null; String tupleName = null; for (FieldSchema outerField : input.getFields()) { if (i++ % 2 == 1) continue; bagName = outerField.alias; bagNames.add(bagName); if (bagName == null) bagName = "null"; if (outerField.schema == null) throw new RuntimeException( "Expected input format of (bag, 'field') pairs. " + "Did not receive a bag at index: " + i + ", alias: " + bagName + ". " + "Instead received type: " + DataType.findTypeName(outerField.type) + " in schema:" + input.toString()); FieldSchema tupleField = outerField.schema.getField(0); tupleName = tupleField.alias; bagNameToJoinPrefix.put(bagName, getPrefixedAliasName(outerField.alias, tupleName)); if (tupleField.schema == null) { log.error( String.format( "could not get schema for inner tuple %s in bag %s", tupleName, bagName)); } else { bagNameToSize.put(bagName, tupleField.schema.size()); for (FieldSchema innerField : tupleField.schema.getFields()) { String innerFieldName = innerField.alias; if (innerFieldName == null) innerFieldName = "null"; String outputFieldName = bagName + "::" + innerFieldName; bagSchema.add(new FieldSchema(outputFieldName, innerField.type)); } } } outputSchema = new Schema(new Schema.FieldSchema("joined", bagSchema, DataType.BAG)); log.debug("output schema: " + outputSchema.toString()); } catch (FrontendException e) { e.printStackTrace(); throw new RuntimeException(e); } Properties properties = getInstanceProperties(); properties.put(BAG_NAMES_PROPERTY, bagNames); properties.put(BAG_NAME_TO_JOIN_PREFIX_PROPERTY, bagNameToJoinPrefix); properties.put(BAG_NAME_TO_SIZE_PROPERTY, bagNameToSize); return outputSchema; }
public Schema outputSchema(Schema input) { try { Properties prop = UDFContext.getUDFContext().getUDFProperties(this.getClass()); String outputAlias = null; if (input.size() == 1) { Schema.FieldSchema onlyField = input.getField(0); outputAlias = onlyField.alias; if (onlyField.type == DataType.TUPLE) { prop.setProperty(INPUT_TYPE_SIGNATURE, new Byte(INPUT_TUPLE_FIELD).toString()); determineArrayCollectionType(onlyField.schema, prop); } else if (onlyField.type == DataType.BAG) { prop.setProperty(INPUT_TYPE_SIGNATURE, new Byte(INPUT_BAG_FIELD).toString()); Schema tupleSchema = onlyField.schema.getField(0).schema; if (tupleSchema.size() == 1) { determineSetCollectionType(tupleSchema, prop); } else if (tupleSchema.size() == 2) { determineMapCollectionType(tupleSchema, prop); } else { throw new RuntimeException( "Bag must have either single-element tuples (set) " + "or two-element tuples (key, value) to be encoded as a PigArray."); } } } else { prop.setProperty(INPUT_TYPE_SIGNATURE, new Byte(INPUT_SEVERAL_FIELDS).toString()); determineArrayCollectionType(input, prop); } return new Schema( new Schema.FieldSchema( outputAlias == null ? "pig_collection" : outputAlias, DataType.BYTEARRAY)); } catch (FrontendException e) { throw new RuntimeException(e); } }
public void determineArrayCollectionType(Schema input, Properties prop) throws FrontendException { byte type = input.getField(0).type; for (int i = 1; i < input.size(); i++) { if (type != input.getField(i).type) { throw new RuntimeException("All inputs must have the same type"); } } if (type == DataType.INTEGER) { setArrayTypeProperty(prop, PigCollection.INT_ARRAY); } else if (type == DataType.FLOAT) { setArrayTypeProperty(prop, PigCollection.FLOAT_ARRAY); } else { throw new RuntimeException("Recieved vector of unsupported schema. Should be ints or floats"); } }
@Override public Schema outputSchema(Schema input) { try { if (input.size() != 1) { throw new RuntimeException("Expected input to have only a single field"); } Schema.FieldSchema inputFieldSchema = input.getField(0); if (inputFieldSchema.type != DataType.BAG) { throw new RuntimeException("Expected a BAG as input"); } return new Schema(new Schema.FieldSchema(null, DataType.LONG)); } catch (FrontendException e) { throw new RuntimeException(e); } }
@Test public void schema() throws Exception { EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates(); Schema schema = func.outputSchema(null); Assert.assertNotNull(schema); Assert.assertEquals(schema.size(), 1); Assert.assertEquals(schema.getField(0).type, DataType.BAG); Assert.assertEquals(schema.getField(0).schema.size(), 1); Assert.assertEquals(schema.getField(0).schema.getField(0).type, DataType.TUPLE); Assert.assertEquals(schema.getField(0).schema.getField(0).schema.size(), 4); Assert.assertEquals( schema.getField(0).schema.getField(0).schema.getField(0).type, DataType.CHARARRAY); Assert.assertEquals( schema.getField(0).schema.getField(0).schema.getField(1).type, DataType.LONG); Assert.assertEquals( schema.getField(0).schema.getField(0).schema.getField(2).type, DataType.LONG); Assert.assertEquals( schema.getField(0).schema.getField(0).schema.getField(3).type, DataType.LONG); }