コード例 #1
0
  private PartitionedDatasetWriter(FileSystemView<E> view) {
    final DatasetDescriptor descriptor = view.getDataset().getDescriptor();
    Preconditions.checkArgument(
        descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned");

    this.view = view;
    this.partitionStrategy = descriptor.getPartitionStrategy();

    int maxWriters = DEFAULT_WRITER_CACHE_SIZE;
    if (descriptor.hasProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP)) {
      try {
        maxWriters =
            Integer.parseInt(descriptor.getProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP));
      } catch (NumberFormatException e) {
        LOG.warn(
            "Not an integer: "
                + FileSystemProperties.WRITER_CACHE_SIZE_PROP
                + "="
                + descriptor.getProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP));
      }
    } else if (partitionStrategy.getCardinality() != FieldPartitioner.UNKNOWN_CARDINALITY) {
      maxWriters = Math.min(maxWriters, partitionStrategy.getCardinality());
    }
    this.maxWriters = maxWriters;

    this.state = ReaderWriterState.NEW;
    this.reusedKey = new StorageKey(partitionStrategy);
    this.accessor = view.getAccessor();
    this.provided = view.getProvidedValues();
  }
コード例 #2
0
    @Override
    @edu.umd.cs.findbugs.annotations.SuppressWarnings(
        value = "BC_UNCONFIRMED_CAST_OF_RETURN_VALUE",
        justification = "Writer is known to be IncrementalWriter")
    public FileSystemWriter.IncrementalWriter<E> load(StorageKey key) throws Exception {
      Preconditions.checkState(
          view.getDataset() instanceof FileSystemDataset,
          "FileSystemWriters cannot create writer for " + view.getDataset());

      FileSystemDataset dataset = (FileSystemDataset) view.getDataset();
      Path partition = convert.fromKey(key);
      FileSystemWriter<E> writer =
          FileSystemWriter.newWriter(
              dataset.getFileSystem(),
              new Path(dataset.getDirectory(), partition),
              dataset.getDescriptor());

      PartitionListener listener = dataset.getPartitionListener();
      if (listener != null) {
        listener.partitionAdded(dataset.getNamespace(), dataset.getName(), partition.toString());
      }

      // initialize the writer after calling the listener
      // this lets the listener decide if and how to create the
      // partition directory
      writer.initialize();

      return (FileSystemWriter.IncrementalWriter<E>) writer;
    }
コード例 #3
0
    @Override
    public FileSystemWriter<E> load(StorageKey key) throws Exception {
      Preconditions.checkState(
          view.getDataset() instanceof FileSystemDataset,
          "FileSystemWriters cannot create writer for " + view.getDataset());

      FileSystemDataset dataset = (FileSystemDataset) view.getDataset();
      Path partition = convert.fromKey(key);
      FileSystemWriter<E> writer =
          FileSystemWriter.newWriter(
              dataset.getFileSystem(),
              new Path(dataset.getDirectory(), partition),
              dataset.getDescriptor());

      PartitionListener listener = dataset.getPartitionListener();
      if (listener != null) {
        listener.partitionAdded(dataset.getNamespace(), dataset.getName(), partition.toString());
      }

      // initialize the writer after calling the listener
      // this lets the listener decide if and how to create the
      // partition directory
      writer.initialize();

      return writer;
    }
コード例 #4
0
  public PartitionedDatasetWriter(FileSystemView<E> view) {
    final DatasetDescriptor descriptor = view.getDataset().getDescriptor();
    Preconditions.checkArgument(
        descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned");

    this.view = view;
    this.partitionStrategy = descriptor.getPartitionStrategy();
    this.maxWriters = Math.min(10, partitionStrategy.getCardinality());
    this.state = ReaderWriterState.NEW;
    this.reusedKey = new StorageKey(partitionStrategy);
  }
コード例 #5
0
    @Override
    public DatasetWriter<E> load(StorageKey key) throws Exception {
      Preconditions.checkState(
          view.getDataset() instanceof FileSystemDataset,
          "FileSystemWriters cannot create writer for " + view.getDataset());

      FileSystemDataset dataset = (FileSystemDataset) view.getDataset();
      Path partition = convert.fromKey(key);
      DatasetWriter<E> writer =
          new FileSystemWriter<E>(
              dataset.getFileSystem(),
              new Path(dataset.getDirectory(), partition),
              dataset.getDescriptor());

      PartitionListener listener = dataset.getPartitionListener();
      if (listener != null) {
        listener.partitionAdded(dataset.getName(), partition.toString());
      }

      writer.open();

      return writer;
    }
コード例 #6
0
 static <E> PartitionedDatasetWriter<E, ?> newWriter(FileSystemView<E> view) {
   DatasetDescriptor descriptor = view.getDataset().getDescriptor();
   Format format = descriptor.getFormat();
   if (Formats.PARQUET.equals(format)) {
     // by default, Parquet is not durable
     if (DescriptorUtil.isDisabled(FileSystemProperties.NON_DURABLE_PARQUET_PROP, descriptor)) {
       return new IncrementalPartitionedDatasetWriter<E>(view);
     } else {
       return new NonDurablePartitionedDatasetWriter<E>(view);
     }
   } else if (Formats.AVRO.equals(format) || Formats.CSV.equals(format)) {
     return new IncrementalPartitionedDatasetWriter<E>(view);
   } else {
     return new NonDurablePartitionedDatasetWriter<E>(view);
   }
 }
コード例 #7
0
  @Override
  public void initialize() {
    Preconditions.checkState(
        state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s", state);

    DatasetDescriptor descriptor = view.getDataset().getDescriptor();
    ValidationException.check(
        FileSystemWriter.isSupportedFormat(descriptor),
        "Not a supported format: %s",
        descriptor.getFormat());

    LOG.debug("Opening partitioned dataset writer w/strategy:{}", partitionStrategy);

    cachedWriters =
        CacheBuilder.newBuilder()
            .maximumSize(maxWriters)
            .removalListener(new DatasetWriterCloser<E>())
            .build(createCacheLoader());

    state = ReaderWriterState.OPEN;
  }
コード例 #8
0
 public FileSystemViewKeyInputFormat(FileSystemView<E> view, Configuration conf) {
   this((FileSystemDataset<E>) view.getDataset(), conf);
   this.view = view;
   LOG.debug("View: {}", view);
 }
コード例 #9
0
 public IncrementalDatasetWriterCacheLoader(FileSystemView<E> view) {
   this.view = view;
   this.convert = new PathConversion(view.getDataset().getDescriptor().getSchema());
 }