private PartitionedDatasetWriter(FileSystemView<E> view) { final DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Preconditions.checkArgument( descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned"); this.view = view; this.partitionStrategy = descriptor.getPartitionStrategy(); int maxWriters = DEFAULT_WRITER_CACHE_SIZE; if (descriptor.hasProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP)) { try { maxWriters = Integer.parseInt(descriptor.getProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP)); } catch (NumberFormatException e) { LOG.warn( "Not an integer: " + FileSystemProperties.WRITER_CACHE_SIZE_PROP + "=" + descriptor.getProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP)); } } else if (partitionStrategy.getCardinality() != FieldPartitioner.UNKNOWN_CARDINALITY) { maxWriters = Math.min(maxWriters, partitionStrategy.getCardinality()); } this.maxWriters = maxWriters; this.state = ReaderWriterState.NEW; this.reusedKey = new StorageKey(partitionStrategy); this.accessor = view.getAccessor(); this.provided = view.getProvidedValues(); }
@Override @edu.umd.cs.findbugs.annotations.SuppressWarnings( value = "BC_UNCONFIRMED_CAST_OF_RETURN_VALUE", justification = "Writer is known to be IncrementalWriter") public FileSystemWriter.IncrementalWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState( view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); FileSystemWriter<E> writer = FileSystemWriter.newWriter( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), dataset.getDescriptor()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded(dataset.getNamespace(), dataset.getName(), partition.toString()); } // initialize the writer after calling the listener // this lets the listener decide if and how to create the // partition directory writer.initialize(); return (FileSystemWriter.IncrementalWriter<E>) writer; }
@Override public FileSystemWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState( view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); FileSystemWriter<E> writer = FileSystemWriter.newWriter( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), dataset.getDescriptor()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded(dataset.getNamespace(), dataset.getName(), partition.toString()); } // initialize the writer after calling the listener // this lets the listener decide if and how to create the // partition directory writer.initialize(); return writer; }
public PartitionedDatasetWriter(FileSystemView<E> view) { final DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Preconditions.checkArgument( descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned"); this.view = view; this.partitionStrategy = descriptor.getPartitionStrategy(); this.maxWriters = Math.min(10, partitionStrategy.getCardinality()); this.state = ReaderWriterState.NEW; this.reusedKey = new StorageKey(partitionStrategy); }
@Override public DatasetWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState( view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); DatasetWriter<E> writer = new FileSystemWriter<E>( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), dataset.getDescriptor()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded(dataset.getName(), partition.toString()); } writer.open(); return writer; }
static <E> PartitionedDatasetWriter<E, ?> newWriter(FileSystemView<E> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Format format = descriptor.getFormat(); if (Formats.PARQUET.equals(format)) { // by default, Parquet is not durable if (DescriptorUtil.isDisabled(FileSystemProperties.NON_DURABLE_PARQUET_PROP, descriptor)) { return new IncrementalPartitionedDatasetWriter<E>(view); } else { return new NonDurablePartitionedDatasetWriter<E>(view); } } else if (Formats.AVRO.equals(format) || Formats.CSV.equals(format)) { return new IncrementalPartitionedDatasetWriter<E>(view); } else { return new NonDurablePartitionedDatasetWriter<E>(view); } }
@Override public void initialize() { Preconditions.checkState( state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s", state); DatasetDescriptor descriptor = view.getDataset().getDescriptor(); ValidationException.check( FileSystemWriter.isSupportedFormat(descriptor), "Not a supported format: %s", descriptor.getFormat()); LOG.debug("Opening partitioned dataset writer w/strategy:{}", partitionStrategy); cachedWriters = CacheBuilder.newBuilder() .maximumSize(maxWriters) .removalListener(new DatasetWriterCloser<E>()) .build(createCacheLoader()); state = ReaderWriterState.OPEN; }
public FileSystemViewKeyInputFormat(FileSystemView<E> view, Configuration conf) { this((FileSystemDataset<E>) view.getDataset(), conf); this.view = view; LOG.debug("View: {}", view); }
public IncrementalDatasetWriterCacheLoader(FileSystemView<E> view) { this.view = view; this.convert = new PathConversion(view.getDataset().getDescriptor().getSchema()); }