示例#1
0
 @Override
 public Schema getOutputSchema(Schema input) {
   ArrayList<String> bagNames = new ArrayList<String>(input.size() / 2);
   Map<String, String> bagNameToJoinPrefix = new HashMap<String, String>(input.size() / 2);
   Map<String, Integer> bagNameToSize = new HashMap<String, Integer>(input.size() / 2);
   Schema outputSchema = null;
   Schema bagSchema = new Schema();
   try {
     int i = 0;
     // all even fields should be bags, odd fields are key names
     String bagName = null;
     String tupleName = null;
     for (FieldSchema outerField : input.getFields()) {
       if (i++ % 2 == 1) continue;
       bagName = outerField.alias;
       bagNames.add(bagName);
       if (bagName == null) bagName = "null";
       if (outerField.schema == null)
         throw new RuntimeException(
             "Expected input format of (bag, 'field') pairs. "
                 + "Did not receive a bag at index: "
                 + i
                 + ", alias: "
                 + bagName
                 + ". "
                 + "Instead received type: "
                 + DataType.findTypeName(outerField.type)
                 + " in schema:"
                 + input.toString());
       FieldSchema tupleField = outerField.schema.getField(0);
       tupleName = tupleField.alias;
       bagNameToJoinPrefix.put(bagName, getPrefixedAliasName(outerField.alias, tupleName));
       if (tupleField.schema == null) {
         log.error(
             String.format(
                 "could not get schema for inner tuple %s in bag %s", tupleName, bagName));
       } else {
         bagNameToSize.put(bagName, tupleField.schema.size());
         for (FieldSchema innerField : tupleField.schema.getFields()) {
           String innerFieldName = innerField.alias;
           if (innerFieldName == null) innerFieldName = "null";
           String outputFieldName = bagName + "::" + innerFieldName;
           bagSchema.add(new FieldSchema(outputFieldName, innerField.type));
         }
       }
     }
     outputSchema = new Schema(new Schema.FieldSchema("joined", bagSchema, DataType.BAG));
     log.debug("output schema: " + outputSchema.toString());
   } catch (FrontendException e) {
     e.printStackTrace();
     throw new RuntimeException(e);
   }
   Properties properties = getInstanceProperties();
   properties.put(BAG_NAMES_PROPERTY, bagNames);
   properties.put(BAG_NAME_TO_JOIN_PREFIX_PROPERTY, bagNameToJoinPrefix);
   properties.put(BAG_NAME_TO_SIZE_PROPERTY, bagNameToSize);
   return outputSchema;
 }
  @Override
  public Schema outputSchema(Schema input) {
    try {
      Schema.FieldSchema inputFieldSchema = input.getField(0);

      if (inputFieldSchema.type != DataType.BAG) {
        throw new RuntimeException("Expected a BAG as input");
      }

      return new Schema(
          new Schema.FieldSchema(
              getSchemaName(this.getClass().getName().toLowerCase(), input),
              inputFieldSchema.schema,
              DataType.BAG));
    } catch (FrontendException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }
  }