示例#1
0
 @Override
 public Schema getOutputSchema(Schema input) {
   ArrayList<String> bagNames = new ArrayList<String>(input.size() / 2);
   Map<String, String> bagNameToJoinPrefix = new HashMap<String, String>(input.size() / 2);
   Map<String, Integer> bagNameToSize = new HashMap<String, Integer>(input.size() / 2);
   Schema outputSchema = null;
   Schema bagSchema = new Schema();
   try {
     int i = 0;
     // all even fields should be bags, odd fields are key names
     String bagName = null;
     String tupleName = null;
     for (FieldSchema outerField : input.getFields()) {
       if (i++ % 2 == 1) continue;
       bagName = outerField.alias;
       bagNames.add(bagName);
       if (bagName == null) bagName = "null";
       if (outerField.schema == null)
         throw new RuntimeException(
             "Expected input format of (bag, 'field') pairs. "
                 + "Did not receive a bag at index: "
                 + i
                 + ", alias: "
                 + bagName
                 + ". "
                 + "Instead received type: "
                 + DataType.findTypeName(outerField.type)
                 + " in schema:"
                 + input.toString());
       FieldSchema tupleField = outerField.schema.getField(0);
       tupleName = tupleField.alias;
       bagNameToJoinPrefix.put(bagName, getPrefixedAliasName(outerField.alias, tupleName));
       if (tupleField.schema == null) {
         log.error(
             String.format(
                 "could not get schema for inner tuple %s in bag %s", tupleName, bagName));
       } else {
         bagNameToSize.put(bagName, tupleField.schema.size());
         for (FieldSchema innerField : tupleField.schema.getFields()) {
           String innerFieldName = innerField.alias;
           if (innerFieldName == null) innerFieldName = "null";
           String outputFieldName = bagName + "::" + innerFieldName;
           bagSchema.add(new FieldSchema(outputFieldName, innerField.type));
         }
       }
     }
     outputSchema = new Schema(new Schema.FieldSchema("joined", bagSchema, DataType.BAG));
     log.debug("output schema: " + outputSchema.toString());
   } catch (FrontendException e) {
     e.printStackTrace();
     throw new RuntimeException(e);
   }
   Properties properties = getInstanceProperties();
   properties.put(BAG_NAMES_PROPERTY, bagNames);
   properties.put(BAG_NAME_TO_JOIN_PREFIX_PROPERTY, bagNameToJoinPrefix);
   properties.put(BAG_NAME_TO_SIZE_PROPERTY, bagNameToSize);
   return outputSchema;
 }
  public Schema outputSchema(Schema input) {
    try {
      Properties prop = UDFContext.getUDFContext().getUDFProperties(this.getClass());
      String outputAlias = null;

      if (input.size() == 1) {
        Schema.FieldSchema onlyField = input.getField(0);
        outputAlias = onlyField.alias;
        if (onlyField.type == DataType.TUPLE) {
          prop.setProperty(INPUT_TYPE_SIGNATURE, new Byte(INPUT_TUPLE_FIELD).toString());
          determineArrayCollectionType(onlyField.schema, prop);
        } else if (onlyField.type == DataType.BAG) {
          prop.setProperty(INPUT_TYPE_SIGNATURE, new Byte(INPUT_BAG_FIELD).toString());

          Schema tupleSchema = onlyField.schema.getField(0).schema;
          if (tupleSchema.size() == 1) {
            determineSetCollectionType(tupleSchema, prop);
          } else if (tupleSchema.size() == 2) {
            determineMapCollectionType(tupleSchema, prop);
          } else {
            throw new RuntimeException(
                "Bag must have either single-element tuples (set) "
                    + "or two-element tuples (key, value) to be encoded as a PigArray.");
          }
        }
      } else {
        prop.setProperty(INPUT_TYPE_SIGNATURE, new Byte(INPUT_SEVERAL_FIELDS).toString());
        determineArrayCollectionType(input, prop);
      }

      return new Schema(
          new Schema.FieldSchema(
              outputAlias == null ? "pig_collection" : outputAlias, DataType.BYTEARRAY));
    } catch (FrontendException e) {
      throw new RuntimeException(e);
    }
  }
  public void determineArrayCollectionType(Schema input, Properties prop) throws FrontendException {
    byte type = input.getField(0).type;

    for (int i = 1; i < input.size(); i++) {
      if (type != input.getField(i).type) {
        throw new RuntimeException("All inputs must have the same type");
      }
    }

    if (type == DataType.INTEGER) {
      setArrayTypeProperty(prop, PigCollection.INT_ARRAY);
    } else if (type == DataType.FLOAT) {
      setArrayTypeProperty(prop, PigCollection.FLOAT_ARRAY);
    } else {
      throw new RuntimeException("Recieved vector of unsupported schema. Should be ints or floats");
    }
  }
  @Override
  public Schema outputSchema(Schema input) {
    try {
      if (input.size() != 1) {
        throw new RuntimeException("Expected input to have only a single field");
      }

      Schema.FieldSchema inputFieldSchema = input.getField(0);

      if (inputFieldSchema.type != DataType.BAG) {
        throw new RuntimeException("Expected a BAG as input");
      }

      return new Schema(new Schema.FieldSchema(null, DataType.LONG));
    } catch (FrontendException e) {
      throw new RuntimeException(e);
    }
  }
 @Test
 public void schema() throws Exception {
   EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
   Schema schema = func.outputSchema(null);
   Assert.assertNotNull(schema);
   Assert.assertEquals(schema.size(), 1);
   Assert.assertEquals(schema.getField(0).type, DataType.BAG);
   Assert.assertEquals(schema.getField(0).schema.size(), 1);
   Assert.assertEquals(schema.getField(0).schema.getField(0).type, DataType.TUPLE);
   Assert.assertEquals(schema.getField(0).schema.getField(0).schema.size(), 4);
   Assert.assertEquals(
       schema.getField(0).schema.getField(0).schema.getField(0).type, DataType.CHARARRAY);
   Assert.assertEquals(
       schema.getField(0).schema.getField(0).schema.getField(1).type, DataType.LONG);
   Assert.assertEquals(
       schema.getField(0).schema.getField(0).schema.getField(2).type, DataType.LONG);
   Assert.assertEquals(
       schema.getField(0).schema.getField(0).schema.getField(3).type, DataType.LONG);
 }