public void determineMapCollectionType(Schema input, Properties prop) throws FrontendException {
    byte keyType = input.getField(0).type;
    byte valueType = input.getField(1).type;

    if (keyType == DataType.INTEGER) {
      if (valueType == DataType.INTEGER) {
        setArrayTypeProperty(prop, PigCollection.INT_INT_MAP);
      } else if (valueType == DataType.FLOAT) {
        setArrayTypeProperty(prop, PigCollection.INT_FLOAT_MAP);
      } else {
        throw new RuntimeException("Value type for map-type PigCollection should be int or float");
      }
    } else if (keyType == DataType.CHARARRAY) {
      if (valueType == DataType.INTEGER) {
        setArrayTypeProperty(prop, PigCollection.STRING_INT_MAP);
      } else if (valueType == DataType.FLOAT) {
        setArrayTypeProperty(prop, PigCollection.STRING_FLOAT_MAP);
      } else {
        throw new RuntimeException("Value type for map-type PigCollection should be int or float");
      }
    } else {
      throw new RuntimeException(
          "Recieved bag of unsupported schema. "
              + "Bags with two-element tuples must have schema {(int/chararray, int/float/float)}");
    }
  }
 @Test
 public void readWithoutSchemaTestSchema() throws IOException {
   registerLoadQuery(IntWritableConverter.class, TextConverter.class, null);
   Schema schema = pigServer.dumpSchema("A");
   Assert.assertNotNull(schema);
   Assert.assertEquals("key", schema.getField(0).alias);
   Assert.assertEquals(DataType.INTEGER, schema.getField(0).type);
   Assert.assertEquals("value", schema.getField(1).alias);
   Assert.assertEquals(DataType.CHARARRAY, schema.getField(1).type);
 }
Beispiel #3
0
 @Test
 // See PIG-730
 public void testMergeSchemaWithTwoLevelAccess1() throws Exception {
   // Generate two schemas
   Schema s1 = Utils.getSchemaFromString("a:{t:(a0:int, a1:int)}");
   Schema s2 = Utils.getSchemaFromString("b:{t:(b0:int, b1:int)}");
   s1.getField(0).schema.setTwoLevelAccessRequired(true);
   s2.getField(0).schema.setTwoLevelAccessRequired(true);
   Schema s3 = Schema.mergeSchema(s1, s2, true);
   Assert.assertEquals(s3.getField(0).schema.isTwoLevelAccessRequired(), true);
 }
  public void determineArrayCollectionType(Schema input, Properties prop) throws FrontendException {
    byte type = input.getField(0).type;

    for (int i = 1; i < input.size(); i++) {
      if (type != input.getField(i).type) {
        throw new RuntimeException("All inputs must have the same type");
      }
    }

    if (type == DataType.INTEGER) {
      setArrayTypeProperty(prop, PigCollection.INT_ARRAY);
    } else if (type == DataType.FLOAT) {
      setArrayTypeProperty(prop, PigCollection.FLOAT_ARRAY);
    } else {
      throw new RuntimeException("Recieved vector of unsupported schema. Should be ints or floats");
    }
  }
 @Override
 public Schema outputSchema(Schema input) {
   try {
     return new Schema(
         new Schema.FieldSchema(
             getSchemaName(this.getClass().getName().toLowerCase(), input),
             input.getField(0).schema,
             DataType.BAG));
   } catch (FrontendException e) {
     return null;
   }
 }
  public void determineSetCollectionType(Schema input, Properties prop) throws FrontendException {
    byte type = input.getField(0).type;

    if (type == DataType.INTEGER) {
      setArrayTypeProperty(prop, PigCollection.INT_SET);
    } else if (type == DataType.CHARARRAY) {
      setArrayTypeProperty(prop, PigCollection.STRING_SET);
    } else {
      throw new RuntimeException(
          "Recieved bag of unsupported schema. "
              + "Bags with two-element tuples must have schema {(int/chararray)}");
    }
  }
Beispiel #7
0
  @Override
  public Schema outputSchema(Schema input) {
    try {
      if (input.getFields().size() != 1 || input.getField(0).type != DataType.BAG) {
        throw new RuntimeException("expect input {bag}");
      }
      Schema bag = input.getField(0).schema.getField(0).schema;
      if (bag.getFields().size() < 4
          || bag.getField(0).type != DataType.CHARARRAY
          || bag.getField(1).type != DataType.CHARARRAY
          || bag.getField(2).type != DataType.DOUBLE
          || bag.getField(3).type != DataType.CHARARRAY) {
        throw new RuntimeException(
            "expect input {userid:chararray, " + "md:chararray, weight:double, cls:chararray}");
      }

      Schema result = new Schema();
      result.add(new FieldSchema("cls", DataType.CHARARRAY));
      result.add(new FieldSchema("weight", DataType.DOUBLE));
      return result;
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }
  @Override
  public Schema outputSchema(Schema input) {
    try {
      if (input.size() != 1) {
        throw new RuntimeException("Expected input to have only a single field");
      }

      Schema.FieldSchema inputFieldSchema = input.getField(0);

      if (inputFieldSchema.type != DataType.BAG) {
        throw new RuntimeException("Expected a BAG as input");
      }

      return new Schema(new Schema.FieldSchema(null, DataType.LONG));
    } catch (FrontendException e) {
      throw new RuntimeException(e);
    }
  }
  @Override
  public Schema outputSchema(Schema input) {
    try {
      Schema.FieldSchema inputFieldSchema = input.getField(0);

      if (inputFieldSchema.type != DataType.BAG) {
        throw new RuntimeException("Expected a BAG as input");
      }

      return new Schema(
          new Schema.FieldSchema(
              getSchemaName(this.getClass().getName().toLowerCase(), input),
              inputFieldSchema.schema,
              DataType.BAG));
    } catch (FrontendException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }
  }
  public Schema outputSchema(Schema input) {
    try {
      Properties prop = UDFContext.getUDFContext().getUDFProperties(this.getClass());
      String outputAlias = null;

      if (input.size() == 1) {
        Schema.FieldSchema onlyField = input.getField(0);
        outputAlias = onlyField.alias;
        if (onlyField.type == DataType.TUPLE) {
          prop.setProperty(INPUT_TYPE_SIGNATURE, new Byte(INPUT_TUPLE_FIELD).toString());
          determineArrayCollectionType(onlyField.schema, prop);
        } else if (onlyField.type == DataType.BAG) {
          prop.setProperty(INPUT_TYPE_SIGNATURE, new Byte(INPUT_BAG_FIELD).toString());

          Schema tupleSchema = onlyField.schema.getField(0).schema;
          if (tupleSchema.size() == 1) {
            determineSetCollectionType(tupleSchema, prop);
          } else if (tupleSchema.size() == 2) {
            determineMapCollectionType(tupleSchema, prop);
          } else {
            throw new RuntimeException(
                "Bag must have either single-element tuples (set) "
                    + "or two-element tuples (key, value) to be encoded as a PigArray.");
          }
        }
      } else {
        prop.setProperty(INPUT_TYPE_SIGNATURE, new Byte(INPUT_SEVERAL_FIELDS).toString());
        determineArrayCollectionType(input, prop);
      }

      return new Schema(
          new Schema.FieldSchema(
              outputAlias == null ? "pig_collection" : outputAlias, DataType.BYTEARRAY));
    } catch (FrontendException e) {
      throw new RuntimeException(e);
    }
  }
 @Test
 public void schema() throws Exception {
   EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
   Schema schema = func.outputSchema(null);
   Assert.assertNotNull(schema);
   Assert.assertEquals(schema.size(), 1);
   Assert.assertEquals(schema.getField(0).type, DataType.BAG);
   Assert.assertEquals(schema.getField(0).schema.size(), 1);
   Assert.assertEquals(schema.getField(0).schema.getField(0).type, DataType.TUPLE);
   Assert.assertEquals(schema.getField(0).schema.getField(0).schema.size(), 4);
   Assert.assertEquals(
       schema.getField(0).schema.getField(0).schema.getField(0).type, DataType.CHARARRAY);
   Assert.assertEquals(
       schema.getField(0).schema.getField(0).schema.getField(1).type, DataType.LONG);
   Assert.assertEquals(
       schema.getField(0).schema.getField(0).schema.getField(2).type, DataType.LONG);
   Assert.assertEquals(
       schema.getField(0).schema.getField(0).schema.getField(3).type, DataType.LONG);
 }
  @SuppressWarnings("null")
  @Test
  public void outputSchemaTest() throws IOException {
    EvalFunc<String> udf = new SketchToString();

    Schema inputSchema = null;
    Schema.FieldSchema inputFieldSchema = new Schema.FieldSchema("Sketch", DataType.BYTEARRAY);

    Schema nullOutputSchema = null;

    Schema outputSchema = null;
    Schema.FieldSchema outputOuterFs0 = null;

    Schema outputInnerSchema = null;
    Schema.FieldSchema outputInnerFs0 = null;

    inputSchema = new Schema(inputFieldSchema);

    nullOutputSchema = udf.outputSchema(null);

    outputSchema = udf.outputSchema(inputSchema);
    outputOuterFs0 = outputSchema.getField(0);

    outputInnerSchema = outputOuterFs0.schema;
    outputInnerFs0 = outputInnerSchema.getField(0);

    Assert.assertNull(nullOutputSchema, "Should be null");
    Assert.assertNotNull(outputOuterFs0, "outputSchema.getField(0) schema may not be null");

    String expected = "tuple";
    String result = DataType.findTypeName(outputOuterFs0.type);
    Assert.assertEquals(result, expected);

    expected = "chararray";
    Assert.assertNotNull(outputInnerFs0, "innerSchema.getField(0) schema may not be null");
    result = DataType.findTypeName(outputInnerFs0.type);
    Assert.assertEquals(result, expected);

    // print schemas
    // @formatter:off
    StringBuilder sb = new StringBuilder();
    sb.append("input schema: ")
        .append(inputSchema)
        .append(LS)
        .append("output schema: ")
        .append(outputSchema)
        .append(LS)
        .append("outputOuterFs: ")
        .append(outputOuterFs0)
        .append(", type: ")
        .append(DataType.findTypeName(outputOuterFs0.type))
        .append(LS)
        .append("outputInnerSchema: ")
        .append(outputInnerSchema)
        .append(LS)
        .append("outputInnerFs0: ")
        .append(outputInnerFs0)
        .append(", type: ")
        .append(DataType.findTypeName(outputInnerFs0.type))
        .append(LS);
    println(sb.toString());
    // @formatter:on
    // end print schemas
  }