/** * Get a {@link org.kitesdk.data.PartitionKey} corresponding to a partition's filesystem path * represented as a {@link URI}. If the path is not a valid partition, then {@link * IllegalArgumentException} is thrown. Note that the partition does not have to exist. * * @param dataset the filesystem dataset * @param partitionPath a directory path where the partition data is stored * @return a partition key representing the partition at the given path * @since 0.4.0 */ @SuppressWarnings("deprecation") public static PartitionKey partitionKeyForPath(Dataset dataset, URI partitionPath) { Preconditions.checkState( dataset.getDescriptor().isPartitioned(), "Attempt to get a partition on a non-partitioned dataset (name:%s)", dataset.getName()); Preconditions.checkArgument( dataset instanceof FileSystemDataset, "Dataset is not a FileSystemDataset"); FileSystemDataset fsDataset = (FileSystemDataset) dataset; FileSystem fs = fsDataset.getFileSystem(); URI partitionUri = fs.makeQualified(new Path(partitionPath)).toUri(); URI directoryUri = fsDataset.getDirectory().toUri(); URI relativizedUri = directoryUri.relativize(partitionUri); if (relativizedUri.equals(partitionUri)) { throw new IllegalArgumentException( String.format( "Partition URI %s has different " + "root directory to dataset (directory: %s).", partitionUri, directoryUri)); } Iterable<String> parts = Splitter.on('/').split(relativizedUri.getPath()); PartitionStrategy partitionStrategy = dataset.getDescriptor().getPartitionStrategy(); List<FieldPartitioner> fieldPartitioners = partitionStrategy.getFieldPartitioners(); if (Iterables.size(parts) > fieldPartitioners.size()) { throw new IllegalArgumentException( String.format( "Too many partition directories " + "for %s (%s), expecting %s.", partitionUri, Iterables.size(parts), fieldPartitioners.size())); } List<Object> values = Lists.newArrayList(); int i = 0; for (String part : parts) { Iterator<String> split = Splitter.on('=').split(part).iterator(); String fieldName = split.next(); FieldPartitioner fp = fieldPartitioners.get(i++); if (!fieldName.equals(fp.getName())) { throw new IllegalArgumentException( String.format( "Unrecognized partition name " + "'%s' in partition %s, expecting '%s'.", fieldName, partitionUri, fp.getName())); } if (!split.hasNext()) { throw new IllegalArgumentException( String.format( "Missing partition value for " + "'%s' in partition %s.", fieldName, partitionUri)); } String stringValue = split.next(); Object value = fp.valueFromString(stringValue); values.add(value); } return org.kitesdk.data.impl.Accessor.getDefault() .newPartitionKey(values.toArray(new Object[values.size()])); }
@Override public DatasetWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState( view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); DatasetWriter<E> writer = new FileSystemWriter<E>( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), dataset.getDescriptor()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded(dataset.getName(), partition.toString()); } writer.open(); return writer; }