Esempio n. 1
0
  @Override
  public ResourceSchema getSchema(String location, Job job) throws IOException {

    if (!partitionKeysSet) {
      Set<String> keys = getPartitionColumns(location, job);

      if (!(keys == null || keys.size() == 0)) {

        // re-edit the pigSchema to contain the new partition keys.
        ResourceFieldSchema[] fields = pigSchema.getFields();

        LOG.debug("Schema: " + Arrays.toString(fields));

        ResourceFieldSchema[] newFields = Arrays.copyOf(fields, fields.length + keys.size());

        int index = fields.length;

        for (String key : keys) {
          newFields[index++] = new ResourceFieldSchema(new FieldSchema(key, DataType.CHARARRAY));
        }

        pigSchema.setFields(newFields);

        LOG.debug("Added partition fields: " + keys + " to loader schema");
        LOG.debug("Schema is: " + Arrays.toString(newFields));
      }

      partitionKeysSet = true;
    }

    return pigSchema;
  }
Esempio n. 2
0
  /** wrap a pig schema as tuple */
  public static ResourceFieldSchema wrapAsTuple(ResourceFieldSchema subFieldSchema)
      throws IOException {
    ResourceSchema listSchema = new ResourceSchema();
    listSchema.setFields(new ResourceFieldSchema[] {subFieldSchema});

    ResourceFieldSchema tupleWrapper = new ResourceFieldSchema();
    tupleWrapper.setType(DataType.TUPLE);
    tupleWrapper.setName(PIG_TUPLE_WRAPPER);
    tupleWrapper.setSchema(listSchema);

    return tupleWrapper;
  }