@Override public <E> Dataset<E> create(String name, DatasetDescriptor descriptor) { Preconditions.checkArgument(name != null, "Name can not be null"); Preconditions.checkArgument(descriptor != null, "Descriptor can not be null"); Preconditions.checkArgument( descriptor.getLocation() == null, "Descriptor location cannot be set; " + "it is assigned by the MetadataProvider"); DatasetDescriptor newDescriptor = metadataProvider.create(name, descriptor); newDescriptor = addRepositoryUri(newDescriptor); final URI location = newDescriptor.getLocation(); if (location == null) { throw new DatasetRepositoryException( "[BUG] MetadataProvider did not assign a location to dataset:" + name); } ensureExists(newDescriptor, conf); logger.debug( "Created dataset:{} schema:{} datasetPath:{}", new Object[] {name, newDescriptor.getSchema(), location.toString()}); return new FileSystemDataset.Builder() .name(name) .configuration(conf) .descriptor(newDescriptor) .partitionKey( newDescriptor.isPartitioned() ? org.kitesdk.data.impl.Accessor.getDefault().newPartitionKey() : null) .partitionListener(getPartitionListener()) .build(); }
@Override public <E> Dataset<E> load(String name) { Preconditions.checkArgument(name != null, "Name can not be null"); logger.debug("Loading dataset:{}", name); DatasetDescriptor descriptor = metadataProvider.load(name); descriptor = addRepositoryUri(descriptor); FileSystemDataset<E> ds = new FileSystemDataset.Builder() .name(name) .configuration(conf) .descriptor(descriptor) .partitionKey( descriptor.isPartitioned() ? org.kitesdk.data.impl.Accessor.getDefault().newPartitionKey() : null) .partitionListener(getPartitionListener()) .build(); logger.debug("Loaded dataset:{}", ds); return ds; }
public PartitionedDatasetWriter(FileSystemView<E> view) { final DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Preconditions.checkArgument( descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned"); this.view = view; this.partitionStrategy = descriptor.getPartitionStrategy(); this.maxWriters = Math.min(10, partitionStrategy.getCardinality()); this.state = ReaderWriterState.NEW; this.reusedKey = new StorageKey(partitionStrategy); }
private GetStorageKey(View<E> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); // get serializable versions of transient objects this.strategyString = descriptor.getPartitionStrategy().toString(false /* no white space */); this.schemaString = descriptor.getSchema().toString(false /* no white space */); this.type = view.getType(); if (view instanceof AbstractRefinableView) { this.constraints = ((AbstractRefinableView) view).getConstraints().toQueryMap(); } else { this.constraints = null; } }
/** * Partitions {@code collection} to be stored efficiently in {@code View}. * * <p>This restructures the parallel collection so that all of the entities that will be stored in * a given partition will be processed by the same writer. * * <p>If the dataset is not partitioned, then this will structure all of the entities to produce a * number of files equal to {@code numWriters}. * * @param collection a collection of entities * @param view a {@link View} of a dataset to partition the collection for * @param numWriters the number of writers that should be used * @param <E> the type of entities in the collection and underlying dataset * @return an equivalent collection of entities partitioned for the view * @see #partition(PCollection, View) * @since 0.16.0 */ public static <E> PCollection<E> partition( PCollection<E> collection, View<E> view, int numWriters) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); if (descriptor.isPartitioned()) { GetStorageKey<E> getKey = new GetStorageKey<E>(view); PTable<GenericData.Record, E> table = collection.by(getKey, Avros.generics(getKey.schema())); PGroupedTable<GenericData.Record, E> grouped = numWriters > 0 ? table.groupByKey(numWriters) : table.groupByKey(); return grouped.ungroup().values(); } else { return partition(collection, numWriters); } }
private FileSystemWriter(FileSystem fs, Path path, DatasetDescriptor descriptor) { Preconditions.checkNotNull(fs, "File system is not defined"); Preconditions.checkNotNull(path, "Destination directory is not defined"); Preconditions.checkNotNull(descriptor, "Descriptor is not defined"); this.fs = fs; this.directory = path; this.descriptor = descriptor; this.conf = new Configuration(fs.getConf()); this.state = ReaderWriterState.NEW; // copy file format settings from custom properties to the Configuration for (String prop : descriptor.listProperties()) { conf.set(prop, descriptor.getProperty(prop)); } }
static <E> PartitionedDatasetWriter<E, ?> newWriter(FileSystemView<E> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Format format = descriptor.getFormat(); if (Formats.PARQUET.equals(format)) { // by default, Parquet is not durable if (DescriptorUtil.isDisabled(FileSystemProperties.NON_DURABLE_PARQUET_PROP, descriptor)) { return new IncrementalPartitionedDatasetWriter<E>(view); } else { return new NonDurablePartitionedDatasetWriter<E>(view); } } else if (Formats.AVRO.equals(format) || Formats.CSV.equals(format)) { return new IncrementalPartitionedDatasetWriter<E>(view); } else { return new NonDurablePartitionedDatasetWriter<E>(view); } }
/** * Creates, if necessary, the given the location for {@code descriptor}. * * @param conf A Configuration * @param descriptor A DatasetDescriptor */ static void ensureExists(DatasetDescriptor descriptor, Configuration conf) { Preconditions.checkArgument( descriptor.getLocation() != null, "Cannot get FileSystem for a descriptor with no location"); final Path dataPath = new Path(descriptor.getLocation()); final FileSystem fs = fsForPath(dataPath, conf); try { if (!fs.exists(dataPath)) { fs.mkdirs(dataPath); } } catch (IOException ex) { throw new DatasetRepositoryException("Cannot access data location", ex); } }
@Override public final void initialize() { Preconditions.checkState( state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s", state); // ensure the directory exists try { fs.mkdirs(directory); } catch (IOException ex) { this.state = ReaderWriterState.ERROR; throw new DatasetIOException("Failed to create path " + directory, ex); } // initialize paths this.finalPath = new Path(directory, uniqueFilename(descriptor.getFormat())); this.tempPath = tempFilename(finalPath); this.appender = newAppender(tempPath); try { appender.open(); } catch (IOException e) { this.state = ReaderWriterState.ERROR; throw new DatasetIOException("Failed to open appender " + appender, e); } this.count = 0; LOG.debug("Opened appender {} for {}", appender, finalPath); this.state = ReaderWriterState.OPEN; }
public Builder descriptor(DatasetDescriptor descriptor) { Preconditions.checkArgument( descriptor.getLocation() != null, "Dataset location cannot be null"); this.descriptor = descriptor; return this; }
@VisibleForTesting @SuppressWarnings("unchecked") <E> FileAppender<E> newAppender(Path temp) { Format format = descriptor.getFormat(); if (Formats.PARQUET.equals(format)) { // by default, Parquet is not durable if (DescriptorUtil.isDisabled(FileSystemProperties.NON_DURABLE_PARQUET_PROP, descriptor)) { return (FileAppender<E>) new DurableParquetAppender( fs, temp, descriptor.getSchema(), conf, descriptor.getCompressionType()); } else { return (FileAppender<E>) new ParquetAppender( fs, temp, descriptor.getSchema(), conf, descriptor.getCompressionType()); } } else if (Formats.AVRO.equals(format)) { return new AvroAppender<E>(fs, temp, descriptor.getSchema(), descriptor.getCompressionType()); } else if (Formats.CSV.equals(format) && DescriptorUtil.isEnabled(FileSystemProperties.ALLOW_CSV_PROP, descriptor)) { return new CSVAppender<E>(fs, temp, descriptor); } else { this.state = ReaderWriterState.ERROR; throw new UnknownFormatException("Unknown format " + descriptor); } }
@Override public final void initialize() { Preconditions.checkState( state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s", state); ValidationException.check( isSupportedFormat(descriptor), "Not a supported format: %s", descriptor.getFormat()); // ensure the directory exists try { fs.mkdirs(directory); } catch (RuntimeException e) { this.state = ReaderWriterState.ERROR; throw new DatasetOperationException(e, "Failed to create path %s", directory); } catch (IOException ex) { this.state = ReaderWriterState.ERROR; throw new DatasetIOException("Failed to create path " + directory, ex); } // initialize paths try { this.finalPath = new Path(directory, uniqueFilename(descriptor.getFormat())); this.tempPath = tempFilename(finalPath); } catch (RuntimeException e) { this.state = ReaderWriterState.ERROR; throw new DatasetOperationException(e, "Failed to initialize file paths under %s", directory); } try { this.appender = newAppender(tempPath); appender.open(); } catch (RuntimeException e) { this.state = ReaderWriterState.ERROR; throw new DatasetOperationException(e, "Failed to open appender %s", appender); } catch (IOException e) { this.state = ReaderWriterState.ERROR; throw new DatasetIOException("Failed to open appender " + appender, e); } this.count = 0; LOG.info("Opened output appender {} for {}", appender, finalPath); this.state = ReaderWriterState.OPEN; }
FileSystemDataset( FileSystem fileSystem, Path directory, String name, DatasetDescriptor descriptor, @Nullable PartitionListener partitionListener) { this.fileSystem = fileSystem; this.directory = directory; this.name = name; this.descriptor = descriptor; this.partitionStrategy = descriptor.isPartitioned() ? descriptor.getPartitionStrategy() : null; this.partitionListener = partitionListener; this.convert = new PathConversion(); this.unbounded = new FileSystemView<E>(this); // remove this.partitionKey for 0.13.0 this.partitionKey = null; }
@Override public boolean delete(String name) { Preconditions.checkArgument(name != null, "Name can not be null"); logger.debug("Deleting dataset:{}", name); DatasetDescriptor descriptor; try { descriptor = metadataProvider.load(name); descriptor = addRepositoryUri(descriptor); } catch (DatasetNotFoundException ex) { return false; } boolean changed; try { // don't care about the return value here -- if it already doesn't exist // we still need to delete the data directory changed = metadataProvider.delete(name); } catch (MetadataProviderException ex) { throw new DatasetRepositoryException("Failed to delete descriptor for name:" + name, ex); } final Path dataLocation = new Path(descriptor.getLocation()); final FileSystem fs = fsForPath(dataLocation, conf); try { if (fs.exists(dataLocation)) { if (fs.delete(dataLocation, true)) { changed = true; } else { throw new DatasetRepositoryException( "Failed to delete dataset name:" + name + " location:" + dataLocation); } } } catch (IOException e) { throw new DatasetRepositoryException( "Internal failure when removing location:" + dataLocation); } return changed; }
@Override public void initialize() { Preconditions.checkState( state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s", state); DatasetDescriptor descriptor = view.getDataset().getDescriptor(); ValidationException.check( FileSystemWriter.isSupportedFormat(descriptor), "Not a supported format: %s", descriptor.getFormat()); LOG.debug("Opening partitioned dataset writer w/strategy:{}", partitionStrategy); cachedWriters = CacheBuilder.newBuilder() .maximumSize(maxWriters) .removalListener(new DatasetWriterCloser<E>()) .build(createCacheLoader()); state = ReaderWriterState.OPEN; }
@SuppressWarnings("unchecked") private <E> FileAppender<E> newAppender(Path temp) { Format format = descriptor.getFormat(); if (Formats.PARQUET.equals(format)) { // by default, guarantee durability with the more costly writer if (DescriptorUtil.isEnabled(FileSystemProperties.NON_DURABLE_PARQUET_PROP, descriptor)) { return (FileAppender<E>) new ParquetAppender(fs, temp, descriptor.getSchema(), conf, true); } else { return (FileAppender<E>) new DurableParquetAppender(fs, temp, descriptor.getSchema(), conf, true); } } else if (Formats.AVRO.equals(format)) { return new AvroAppender<E>(fs, temp, descriptor.getSchema(), true); } else if (Formats.CSV.equals(format) && DescriptorUtil.isEnabled(FileSystemProperties.ALLOW_CSV_PROP, descriptor)) { return new CSVAppender<E>(fs, temp, descriptor); } else { this.state = ReaderWriterState.ERROR; throw new DatasetWriterException("Unknown format " + descriptor); } }
private PartitionedDatasetWriter(FileSystemView<E> view) { final DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Preconditions.checkArgument( descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned"); this.view = view; this.partitionStrategy = descriptor.getPartitionStrategy(); int maxWriters = DEFAULT_WRITER_CACHE_SIZE; if (descriptor.hasProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP)) { try { maxWriters = Integer.parseInt(descriptor.getProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP)); } catch (NumberFormatException e) { LOG.warn( "Not an integer: " + FileSystemProperties.WRITER_CACHE_SIZE_PROP + "=" + descriptor.getProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP)); } } else if (partitionStrategy.getCardinality() != FieldPartitioner.UNKNOWN_CARDINALITY) { maxWriters = Math.min(maxWriters, partitionStrategy.getCardinality()); } this.maxWriters = maxWriters; this.state = ReaderWriterState.NEW; this.reusedKey = new StorageKey(partitionStrategy); this.accessor = view.getAccessor(); this.provided = view.getProvidedValues(); }
/** * Precondition-style validation that the DatasetDescriptor is compatible. * * @param descriptor a {@link DatasetDescriptor} */ public static void checkDescriptor(DatasetDescriptor descriptor) { Preconditions.checkNotNull(descriptor, "Descriptor cannot be null"); Schema schema = descriptor.getSchema(); checkSchema(schema); if (descriptor.isPartitioned()) { // marked as [BUG] because this is checked in DatasetDescriptor Preconditions.checkArgument( schema.getType() == Schema.Type.RECORD, "[BUG] Partitioned datasets must have record schemas"); Set<String> names = Sets.newHashSet(); for (Schema.Field field : schema.getFields()) { names.add(field.name()); } List<String> incompatible = Lists.newArrayList(); List<String> duplicates = Lists.newArrayList(); for (FieldPartitioner fp : descriptor.getPartitionStrategy().getFieldPartitioners()) { String name = fp.getName(); if (!isCompatibleName(name)) { incompatible.add(name); } else if (names.contains(name)) { duplicates.add(name); } else { names.add(name); } } Preconditions.checkState( incompatible.isEmpty(), "Hive incompatible: partition names are not alphanumeric (plus '_'): %s", Joiner.on(", ").join(incompatible)); Preconditions.checkState( duplicates.isEmpty(), "Hive incompatible: partition names duplicate data fields: %s", Joiner.on(", ").join(duplicates)); } }
static <E> FileSystemWriter<E> newWriter(FileSystem fs, Path path, DatasetDescriptor descriptor) { Format format = descriptor.getFormat(); if (Formats.PARQUET.equals(format)) { // by default, Parquet is not durable if (DescriptorUtil.isDisabled(FileSystemProperties.NON_DURABLE_PARQUET_PROP, descriptor)) { return new IncrementalWriter<E>(fs, path, descriptor); } else { return new FileSystemWriter<E>(fs, path, descriptor); } } else if (Formats.AVRO.equals(format) || Formats.CSV.equals(format)) { return new IncrementalWriter<E>(fs, path, descriptor); } else { return new FileSystemWriter<E>(fs, path, descriptor); } }
/** * Checks that the {@code existing} {@link DatasetDescriptor} is compatible with {@code test}. * * @param existing the current {@code DatasetDescriptor} for a dataset * @param test a new {@code DatasetDescriptor} for the same dataset */ public static void checkCompatible(DatasetDescriptor existing, DatasetDescriptor test) { checkNotChanged("format", existing.getFormat(), test.getFormat()); checkNotChanged("partitioning", existing.isPartitioned(), test.isPartitioned()); if (existing.isPartitioned()) { checkNotChanged( "partition strategy", existing.getPartitionStrategy(), test.getPartitionStrategy()); } // check can read records written with old schema using new schema Schema oldSchema = existing.getSchema(); Schema testSchema = test.getSchema(); if (!SchemaValidationUtil.canRead(oldSchema, testSchema)) { throw new IncompatibleSchemaException( "Schema cannot read data " + "written using existing schema. Schema: " + testSchema.toString(true) + "\nExisting schema: " + oldSchema.toString(true)); } }
@Override @Nullable @Deprecated public Dataset<E> getPartition(PartitionKey key, boolean allowCreate) { Preconditions.checkState( descriptor.isPartitioned(), "Attempt to get a partition on a non-partitioned dataset (name:%s)", name); logger.debug("Loading partition for key {}, allowCreate:{}", new Object[] {key, allowCreate}); Path partitionDirectory = fileSystem.makeQualified(toDirectoryName(directory, key)); try { if (!fileSystem.exists(partitionDirectory)) { if (allowCreate) { fileSystem.mkdirs(partitionDirectory); if (partitionListener != null) { partitionListener.partitionAdded(name, toRelativeDirectory(key).toString()); } } else { return null; } } } catch (IOException e) { throw new DatasetException( "Unable to locate or create dataset partition directory " + partitionDirectory, e); } int partitionDepth = key.getLength(); PartitionStrategy subpartitionStrategy = Accessor.getDefault().getSubpartitionStrategy(partitionStrategy, partitionDepth); return new FileSystemDataset.Builder() .name(name) .fileSystem(fileSystem) .descriptor( new DatasetDescriptor.Builder(descriptor) .location(partitionDirectory) .partitionStrategy(subpartitionStrategy) .build()) .partitionKey(key) .partitionListener(partitionListener) .build(); }
public <E> FileSystemDataset<E> build() { Preconditions.checkState(this.name != null, "No dataset name defined"); Preconditions.checkState(this.descriptor != null, "No dataset descriptor defined"); Preconditions.checkState( (conf != null) || (fileSystem != null), "Configuration or FileSystem must be set"); this.directory = new Path(descriptor.getLocation()); if (fileSystem == null) { try { this.fileSystem = directory.getFileSystem(conf); } catch (IOException ex) { throw new DatasetException("Cannot access FileSystem", ex); } } Path absoluteDirectory = fileSystem.makeQualified(directory); return new FileSystemDataset<E>( fileSystem, absoluteDirectory, name, descriptor, partitionKey, partitionListener); }
@Override @Deprecated public Iterable<Dataset<E>> getPartitions() { Preconditions.checkState( descriptor.isPartitioned(), "Attempt to get partitions on a non-partitioned dataset (name:%s)", name); List<Dataset<E>> partitions = Lists.newArrayList(); FileStatus[] fileStatuses; try { fileStatuses = fileSystem.listStatus(directory, PathFilters.notHidden()); } catch (IOException e) { throw new DatasetException( "Unable to list partition directory for directory " + directory, e); } for (FileStatus stat : fileStatuses) { Path p = fileSystem.makeQualified(stat.getPath()); PartitionKey key = fromDirectoryName(p); PartitionStrategy subPartitionStrategy = Accessor.getDefault().getSubpartitionStrategy(partitionStrategy, 1); Builder builder = new FileSystemDataset.Builder() .name(name) .fileSystem(fileSystem) .descriptor( new DatasetDescriptor.Builder(descriptor) .location(p) .partitionStrategy(subPartitionStrategy) .build()) .partitionKey(key) .partitionListener(partitionListener); partitions.add(builder.<E>build()); } return partitions; }
@Override @Deprecated public void dropPartition(PartitionKey key) { Preconditions.checkState( descriptor.isPartitioned(), "Attempt to drop a partition on a non-partitioned dataset (name:%s)", name); Preconditions.checkArgument(key != null, "Partition key may not be null"); logger.debug("Dropping partition with key:{} dataset:{}", key, name); Path partitionDirectory = toDirectoryName(directory, key); try { if (!fileSystem.delete(partitionDirectory, true)) { throw new DatasetException( "Partition directory " + partitionDirectory + " for key " + key + " does not exist"); } } catch (IOException e) { throw new DatasetException( "Unable to locate or drop dataset partition directory " + partitionDirectory, e); } }
/** * Checks that the {@code existing} {@link DatasetDescriptor} can be replaced by {@code updated}. * * @param existing the current {@code DatasetDescriptor} for a dataset * @param updated a new {@code DatasetDescriptor} for the same dataset */ public static void checkUpdate(DatasetDescriptor existing, DatasetDescriptor updated) { checkNotChanged("location", existing.getLocation(), updated.getLocation()); checkCompatible(existing, updated); }
static boolean isSupportedFormat(DatasetDescriptor descriptor) { Format format = descriptor.getFormat(); return (SUPPORTED_FORMATS.contains(format) || (Formats.CSV.equals(format) && DescriptorUtil.isEnabled(FileSystemProperties.ALLOW_CSV_PROP, descriptor))); }
@Override public <E> Dataset<E> update(String name, DatasetDescriptor descriptor) { Preconditions.checkArgument(name != null, "Dataset name cannot be null"); Preconditions.checkArgument(descriptor != null, "DatasetDescriptro cannot be null"); DatasetDescriptor oldDescriptor = metadataProvider.load(name); // oldDescriptor is valid if load didn't throw NoSuchDatasetException if (!oldDescriptor.getFormat().equals(descriptor.getFormat())) { throw new DatasetRepositoryException( "Cannot change dataset format from " + oldDescriptor.getFormat() + " to " + descriptor.getFormat()); } final URI oldLocation = oldDescriptor.getLocation(); if ((oldLocation != null) && !(oldLocation.equals(descriptor.getLocation()))) { throw new DatasetRepositoryException("Cannot change the dataset's location"); } if (oldDescriptor.isPartitioned() != descriptor.isPartitioned()) { throw new DatasetRepositoryException( "Cannot change an unpartitioned dataset to " + " partitioned or vice versa."); } else if (oldDescriptor.isPartitioned() && descriptor.isPartitioned() && !oldDescriptor.getPartitionStrategy().equals(descriptor.getPartitionStrategy())) { throw new DatasetRepositoryException( "Cannot change partition strategy from " + oldDescriptor.getPartitionStrategy() + " to " + descriptor.getPartitionStrategy()); } // check can read records written with old schema using new schema final Schema oldSchema = oldDescriptor.getSchema(); final Schema newSchema = descriptor.getSchema(); if (!SchemaValidationUtil.canRead(oldSchema, newSchema)) { throw new IncompatibleSchemaException( "New schema cannot read data " + "written using " + "old schema. New schema: " + newSchema.toString(true) + "\nOld schema: " + oldSchema.toString(true)); } DatasetDescriptor updatedDescriptor = metadataProvider.update(name, descriptor); updatedDescriptor = addRepositoryUri(updatedDescriptor); logger.debug( "Updated dataset:{} schema:{} datasetPath:{}", new Object[] { name, updatedDescriptor.getSchema(), updatedDescriptor.getLocation().toString() }); return new FileSystemDataset.Builder() .name(name) .configuration(conf) .descriptor(updatedDescriptor) .partitionKey( updatedDescriptor.isPartitioned() ? org.kitesdk.data.impl.Accessor.getDefault().newPartitionKey() : null) .partitionListener(getPartitionListener()) .build(); }
@Override public void merge(FileSystemDataset<E> update) { DatasetDescriptor updateDescriptor = update.getDescriptor(); if (!updateDescriptor.getFormat().equals(descriptor.getFormat())) { throw new DatasetRepositoryException( "Cannot merge dataset format " + updateDescriptor.getFormat() + " with format " + descriptor.getFormat()); } if (updateDescriptor.isPartitioned() != descriptor.isPartitioned()) { throw new DatasetRepositoryException( "Cannot merge an unpartitioned dataset with a " + " partitioned one or vice versa."); } else if (updateDescriptor.isPartitioned() && descriptor.isPartitioned() && !updateDescriptor.getPartitionStrategy().equals(descriptor.getPartitionStrategy())) { throw new DatasetRepositoryException( "Cannot merge dataset partition strategy " + updateDescriptor.getPartitionStrategy() + " with " + descriptor.getPartitionStrategy()); } if (!updateDescriptor.getSchema().equals(descriptor.getSchema())) { throw new DatasetRepositoryException( "Cannot merge dataset schema " + updateDescriptor.getFormat() + " with schema " + descriptor.getFormat()); } Set<String> addedPartitions = Sets.newHashSet(); for (Path path : update.pathIterator()) { URI relativePath = update.getDirectory().toUri().relativize(path.toUri()); Path newPath = new Path(directory, new Path(relativePath)); Path newPartitionDirectory = newPath.getParent(); try { if (!fileSystem.exists(newPartitionDirectory)) { fileSystem.mkdirs(newPartitionDirectory); } logger.debug("Renaming {} to {}", path, newPath); boolean renameOk = fileSystem.rename(path, newPath); if (!renameOk) { throw new DatasetException( "Dataset merge failed during rename of " + path + " to " + newPath); } } catch (IOException e) { throw new DatasetIOException("Dataset merge failed", e); } if (descriptor.isPartitioned() && partitionListener != null) { String partition = newPartitionDirectory.toString(); if (!addedPartitions.contains(partition)) { partitionListener.partitionAdded(name, partition); addedPartitions.add(partition); } } } }