@Override public FileSystemWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState( view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); FileSystemWriter<E> writer = FileSystemWriter.newWriter( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), dataset.getDescriptor()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded(dataset.getNamespace(), dataset.getName(), partition.toString()); } // initialize the writer after calling the listener // this lets the listener decide if and how to create the // partition directory writer.initialize(); return writer; }
private PartitionedDatasetWriter(FileSystemView<E> view) { final DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Preconditions.checkArgument( descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned"); this.view = view; this.partitionStrategy = descriptor.getPartitionStrategy(); int maxWriters = DEFAULT_WRITER_CACHE_SIZE; if (descriptor.hasProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP)) { try { maxWriters = Integer.parseInt(descriptor.getProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP)); } catch (NumberFormatException e) { LOG.warn( "Not an integer: " + FileSystemProperties.WRITER_CACHE_SIZE_PROP + "=" + descriptor.getProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP)); } } else if (partitionStrategy.getCardinality() != FieldPartitioner.UNKNOWN_CARDINALITY) { maxWriters = Math.min(maxWriters, partitionStrategy.getCardinality()); } this.maxWriters = maxWriters; this.state = ReaderWriterState.NEW; this.reusedKey = new StorageKey(partitionStrategy); this.accessor = view.getAccessor(); this.provided = view.getProvidedValues(); }
@Override @edu.umd.cs.findbugs.annotations.SuppressWarnings( value = "BC_UNCONFIRMED_CAST_OF_RETURN_VALUE", justification = "Writer is known to be IncrementalWriter") public FileSystemWriter.IncrementalWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState( view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); FileSystemWriter<E> writer = FileSystemWriter.newWriter( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), dataset.getDescriptor()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded(dataset.getNamespace(), dataset.getName(), partition.toString()); } // initialize the writer after calling the listener // this lets the listener decide if and how to create the // partition directory writer.initialize(); return (FileSystemWriter.IncrementalWriter<E>) writer; }
public PartitionedDatasetWriter(FileSystemView<E> view) { final DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Preconditions.checkArgument( descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned"); this.view = view; this.partitionStrategy = descriptor.getPartitionStrategy(); this.maxWriters = Math.min(10, partitionStrategy.getCardinality()); this.state = ReaderWriterState.NEW; this.reusedKey = new StorageKey(partitionStrategy); }
@Override public void write(E entity) { Preconditions.checkState( state.equals(ReaderWriterState.OPEN), "Attempt to write to a writer in state:%s", state); reusedKey.reuseFor(entity); DatasetWriter<E> writer = cachedWriters.getIfPresent(reusedKey); if (writer == null) { // avoid checking in every whether the entity belongs in the view by only // checking when a new writer is created Preconditions.checkArgument( view.includes(entity), "View %s does not include entity %s", view, entity); // get a new key because it is stored in the cache StorageKey key = StorageKey.copy(reusedKey); try { writer = cachedWriters.getUnchecked(key); } catch (UncheckedExecutionException ex) { throw new IllegalArgumentException( "Problem creating view for entity: " + entity, ex.getCause()); } } writer.write(entity); }
@Override public DatasetWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState( view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); DatasetWriter<E> writer = new FileSystemWriter<E>( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), dataset.getDescriptor()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded(dataset.getName(), partition.toString()); } writer.open(); return writer; }
@Override public RecordReader<E, Void> createRecordReader( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { RecordReader<E, Void> unfilteredRecordReader = createUnfilteredRecordReader(inputSplit, taskAttemptContext); if (view != null) { // use the constraints to filter out entities from the reader return new FilteredRecordReader<E>( unfilteredRecordReader, ((AbstractRefinableView) view).getConstraints(), view.getAccessor()); } return unfilteredRecordReader; }
@SuppressWarnings("unchecked") private boolean setInputPaths(JobContext jobContext, Job job) throws IOException { List<Path> paths = Lists.newArrayList( (Iterator) (view == null ? dataset.pathIterator() : view.pathIterator())); LOG.debug("Input paths: {}", paths); if (paths.isEmpty()) { return false; } FileInputFormat.setInputPaths(job, paths.toArray(new Path[paths.size()])); // the following line is needed for Hadoop 1, otherwise the paths are not set Configuration contextConf = Hadoop.JobContext.getConfiguration.invoke(jobContext); Configuration jobConf = Hadoop.JobContext.getConfiguration.invoke(job); contextConf.set("mapred.input.dir", jobConf.get("mapred.input.dir")); return true; }
static <E> PartitionedDatasetWriter<E, ?> newWriter(FileSystemView<E> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Format format = descriptor.getFormat(); if (Formats.PARQUET.equals(format)) { // by default, Parquet is not durable if (DescriptorUtil.isDisabled(FileSystemProperties.NON_DURABLE_PARQUET_PROP, descriptor)) { return new IncrementalPartitionedDatasetWriter<E>(view); } else { return new NonDurablePartitionedDatasetWriter<E>(view); } } else if (Formats.AVRO.equals(format) || Formats.CSV.equals(format)) { return new IncrementalPartitionedDatasetWriter<E>(view); } else { return new NonDurablePartitionedDatasetWriter<E>(view); } }
@Override public void initialize() { Preconditions.checkState( state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s", state); DatasetDescriptor descriptor = view.getDataset().getDescriptor(); ValidationException.check( FileSystemWriter.isSupportedFormat(descriptor), "Not a supported format: %s", descriptor.getFormat()); LOG.debug("Opening partitioned dataset writer w/strategy:{}", partitionStrategy); cachedWriters = CacheBuilder.newBuilder() .maximumSize(maxWriters) .removalListener(new DatasetWriterCloser<E>()) .build(createCacheLoader()); state = ReaderWriterState.OPEN; }
public FileSystemViewKeyInputFormat(FileSystemView<E> view, Configuration conf) { this((FileSystemDataset<E>) view.getDataset(), conf); this.view = view; LOG.debug("View: {}", view); }
public IncrementalDatasetWriterCacheLoader(FileSystemView<E> view) { this.view = view; this.convert = new PathConversion(view.getDataset().getDescriptor().getSchema()); }
/** * Returns an iterator that provides all leaf-level directories in this view. * * @return leaf-directory iterator */ Iterator<Path> dirIterator() { return unbounded.dirIterator(); }
PathIterator pathIterator() { return unbounded.pathIterator(); }
public boolean deleteAll() { // no constraints, so delete is always aligned to partition boundaries return unbounded.deleteAllUnsafe(); }