Java FieldPartitioner Examples

Programming Language: Java

Namespace/Package Name: org.kitesdk.data.spi

Class/Type: FieldPartitioner

Examples at hotexamples.com: 5

Java FieldPartitioner - 5 examples found. These are the top rated real world Java examples of org.kitesdk.data.spi.FieldPartitioner extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getName(4)

getCardinality(1)

valueFromString(1)

valueToString(1)

Example #1

Show file

 @Override
 @SuppressWarnings({"unchecked", "deprecation"})
 protected void populateAvroHeaders(Map<String, String> hdrs, Schema schema, Object message) {
   if (!initialized) {
     // initialize here rather than in activateOptions to avoid initialization
     // cycle in Configuration and log4j
     try {
       URI datasetUri = new URIBuilder(datasetRepositoryUri, datasetName).build();
       Dataset dataset = Datasets.load(datasetUri);
       if (dataset.getDescriptor().isPartitioned()) {
         partitionStrategy = dataset.getDescriptor().getPartitionStrategy();
       }
       URL schemaUrl = dataset.getDescriptor().getSchemaUrl();
       if (schemaUrl != null) {
         setAvroSchemaUrl(schemaUrl.toExternalForm());
       }
     } catch (Exception e) {
       throw new FlumeException(e);
     } finally {
       initialized = true;
     }
   }
   super.populateAvroHeaders(hdrs, schema, message);
   if (partitionStrategy != null) {
     key = PartitionKey.partitionKeyForEntity(partitionStrategy, message, key);
     int i = 0;
     for (FieldPartitioner fp : partitionStrategy.getFieldPartitioners()) {
       hdrs.put(PARTITION_PREFIX + fp.getName(), fp.valueToString(key.get(i++)));
     }
   }
 }

Example #2

Show file

File: PartitionStrategy.java Project: rbrush/kite

 /** Construct a partition strategy with a list of field partitioners. */
 PartitionStrategy(List<FieldPartitioner> partitioners) {
   this.fieldPartitioners = ImmutableList.copyOf(partitioners);
   ImmutableMap.Builder<String, FieldPartitioner> mapBuilder = ImmutableMap.builder();
   for (FieldPartitioner fp : partitioners) {
     mapBuilder.put(fp.getName(), fp);
   }
   this.partitionerMap = mapBuilder.build();
 }

Example #3

Show file

File: PartitionStrategy.java Project: rbrush/kite

 private void add(FieldPartitioner fp) {
   ValidationException.check(
       !names.contains(fp.getName()),
       "Partition name %s conflicts with an existing field or partition name",
       fp.getName());
   fieldPartitioners.add(fp);
   names.add(fp.getName());
 }

Example #4

Show file

File: FileSystemDatasetRepository.java Project: stevek-ngdata/kite

  /**
   * Get a {@link org.kitesdk.data.PartitionKey} corresponding to a partition's filesystem path
   * represented as a {@link URI}. If the path is not a valid partition, then {@link
   * IllegalArgumentException} is thrown. Note that the partition does not have to exist.
   *
   * @param dataset the filesystem dataset
   * @param partitionPath a directory path where the partition data is stored
   * @return a partition key representing the partition at the given path
   * @since 0.4.0
   */
  @SuppressWarnings("deprecation")
  public static PartitionKey partitionKeyForPath(Dataset dataset, URI partitionPath) {
    Preconditions.checkState(
        dataset.getDescriptor().isPartitioned(),
        "Attempt to get a partition on a non-partitioned dataset (name:%s)",
        dataset.getName());

    Preconditions.checkArgument(
        dataset instanceof FileSystemDataset, "Dataset is not a FileSystemDataset");
    FileSystemDataset fsDataset = (FileSystemDataset) dataset;

    FileSystem fs = fsDataset.getFileSystem();
    URI partitionUri = fs.makeQualified(new Path(partitionPath)).toUri();
    URI directoryUri = fsDataset.getDirectory().toUri();
    URI relativizedUri = directoryUri.relativize(partitionUri);

    if (relativizedUri.equals(partitionUri)) {
      throw new IllegalArgumentException(
          String.format(
              "Partition URI %s has different " + "root directory to dataset (directory: %s).",
              partitionUri, directoryUri));
    }

    Iterable<String> parts = Splitter.on('/').split(relativizedUri.getPath());

    PartitionStrategy partitionStrategy = dataset.getDescriptor().getPartitionStrategy();
    List<FieldPartitioner> fieldPartitioners = partitionStrategy.getFieldPartitioners();
    if (Iterables.size(parts) > fieldPartitioners.size()) {
      throw new IllegalArgumentException(
          String.format(
              "Too many partition directories " + "for %s (%s), expecting %s.",
              partitionUri, Iterables.size(parts), fieldPartitioners.size()));
    }

    List<Object> values = Lists.newArrayList();
    int i = 0;
    for (String part : parts) {
      Iterator<String> split = Splitter.on('=').split(part).iterator();
      String fieldName = split.next();
      FieldPartitioner fp = fieldPartitioners.get(i++);
      if (!fieldName.equals(fp.getName())) {
        throw new IllegalArgumentException(
            String.format(
                "Unrecognized partition name " + "'%s' in partition %s, expecting '%s'.",
                fieldName, partitionUri, fp.getName()));
      }
      if (!split.hasNext()) {
        throw new IllegalArgumentException(
            String.format(
                "Missing partition value for " + "'%s' in partition %s.", fieldName, partitionUri));
      }
      String stringValue = split.next();
      Object value = fp.valueFromString(stringValue);
      values.add(value);
    }
    return org.kitesdk.data.impl.Accessor.getDefault()
        .newPartitionKey(values.toArray(new Object[values.size()]));
  }

Example #5

Show file

File: PartitionStrategy.java Project: rbrush/kite

 /**
  * Return the cardinality produced by the contained field partitioners.
  *
  * <p>This can be used to aid in calculating resource usage during certain operations. For
  * example, when writing data to a partitioned dataset, you can use this method to estimate (or
  * discover exactly, depending on the partition functions) how many leaf partitions exist.
  *
  * <p><strong>Warning:</strong> This method is allowed to lie and should be treated only as a
  * hint. Some partition functions are fixed (for example, hash modulo number of buckets), while
  * others are open-ended (for example, discrete value) and depend on the input data.
  *
  * @return The estimated (or possibly concrete) number of leaf partitions.
  */
 public int getCardinality() {
   int cardinality = 1;
   for (FieldPartitioner fieldPartitioner : fieldPartitioners) {
     if (fieldPartitioner.getCardinality() == FieldPartitioner.UNKNOWN_CARDINALITY) {
       return FieldPartitioner.UNKNOWN_CARDINALITY;
     }
     cardinality *= fieldPartitioner.getCardinality();
   }
   return cardinality;
 }