Esempio n. 1
0
    @Override
    public FileSystemWriter<E> load(StorageKey key) throws Exception {
      Preconditions.checkState(
          view.getDataset() instanceof FileSystemDataset,
          "FileSystemWriters cannot create writer for " + view.getDataset());

      FileSystemDataset dataset = (FileSystemDataset) view.getDataset();
      Path partition = convert.fromKey(key);
      FileSystemWriter<E> writer =
          FileSystemWriter.newWriter(
              dataset.getFileSystem(),
              new Path(dataset.getDirectory(), partition),
              dataset.getDescriptor());

      PartitionListener listener = dataset.getPartitionListener();
      if (listener != null) {
        listener.partitionAdded(dataset.getNamespace(), dataset.getName(), partition.toString());
      }

      // initialize the writer after calling the listener
      // this lets the listener decide if and how to create the
      // partition directory
      writer.initialize();

      return writer;
    }
Esempio n. 2
0
  private PartitionedDatasetWriter(FileSystemView<E> view) {
    final DatasetDescriptor descriptor = view.getDataset().getDescriptor();
    Preconditions.checkArgument(
        descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned");

    this.view = view;
    this.partitionStrategy = descriptor.getPartitionStrategy();

    int maxWriters = DEFAULT_WRITER_CACHE_SIZE;
    if (descriptor.hasProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP)) {
      try {
        maxWriters =
            Integer.parseInt(descriptor.getProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP));
      } catch (NumberFormatException e) {
        LOG.warn(
            "Not an integer: "
                + FileSystemProperties.WRITER_CACHE_SIZE_PROP
                + "="
                + descriptor.getProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP));
      }
    } else if (partitionStrategy.getCardinality() != FieldPartitioner.UNKNOWN_CARDINALITY) {
      maxWriters = Math.min(maxWriters, partitionStrategy.getCardinality());
    }
    this.maxWriters = maxWriters;

    this.state = ReaderWriterState.NEW;
    this.reusedKey = new StorageKey(partitionStrategy);
    this.accessor = view.getAccessor();
    this.provided = view.getProvidedValues();
  }
Esempio n. 3
0
    @Override
    @edu.umd.cs.findbugs.annotations.SuppressWarnings(
        value = "BC_UNCONFIRMED_CAST_OF_RETURN_VALUE",
        justification = "Writer is known to be IncrementalWriter")
    public FileSystemWriter.IncrementalWriter<E> load(StorageKey key) throws Exception {
      Preconditions.checkState(
          view.getDataset() instanceof FileSystemDataset,
          "FileSystemWriters cannot create writer for " + view.getDataset());

      FileSystemDataset dataset = (FileSystemDataset) view.getDataset();
      Path partition = convert.fromKey(key);
      FileSystemWriter<E> writer =
          FileSystemWriter.newWriter(
              dataset.getFileSystem(),
              new Path(dataset.getDirectory(), partition),
              dataset.getDescriptor());

      PartitionListener listener = dataset.getPartitionListener();
      if (listener != null) {
        listener.partitionAdded(dataset.getNamespace(), dataset.getName(), partition.toString());
      }

      // initialize the writer after calling the listener
      // this lets the listener decide if and how to create the
      // partition directory
      writer.initialize();

      return (FileSystemWriter.IncrementalWriter<E>) writer;
    }
  public PartitionedDatasetWriter(FileSystemView<E> view) {
    final DatasetDescriptor descriptor = view.getDataset().getDescriptor();
    Preconditions.checkArgument(
        descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned");

    this.view = view;
    this.partitionStrategy = descriptor.getPartitionStrategy();
    this.maxWriters = Math.min(10, partitionStrategy.getCardinality());
    this.state = ReaderWriterState.NEW;
    this.reusedKey = new StorageKey(partitionStrategy);
  }
  @Override
  public void write(E entity) {
    Preconditions.checkState(
        state.equals(ReaderWriterState.OPEN), "Attempt to write to a writer in state:%s", state);

    reusedKey.reuseFor(entity);

    DatasetWriter<E> writer = cachedWriters.getIfPresent(reusedKey);
    if (writer == null) {
      // avoid checking in every whether the entity belongs in the view by only
      // checking when a new writer is created
      Preconditions.checkArgument(
          view.includes(entity), "View %s does not include entity %s", view, entity);
      // get a new key because it is stored in the cache
      StorageKey key = StorageKey.copy(reusedKey);
      try {
        writer = cachedWriters.getUnchecked(key);
      } catch (UncheckedExecutionException ex) {
        throw new IllegalArgumentException(
            "Problem creating view for entity: " + entity, ex.getCause());
      }
    }

    writer.write(entity);
  }
    @Override
    public DatasetWriter<E> load(StorageKey key) throws Exception {
      Preconditions.checkState(
          view.getDataset() instanceof FileSystemDataset,
          "FileSystemWriters cannot create writer for " + view.getDataset());

      FileSystemDataset dataset = (FileSystemDataset) view.getDataset();
      Path partition = convert.fromKey(key);
      DatasetWriter<E> writer =
          new FileSystemWriter<E>(
              dataset.getFileSystem(),
              new Path(dataset.getDirectory(), partition),
              dataset.getDescriptor());

      PartitionListener listener = dataset.getPartitionListener();
      if (listener != null) {
        listener.partitionAdded(dataset.getName(), partition.toString());
      }

      writer.open();

      return writer;
    }
 @Override
 public RecordReader<E, Void> createRecordReader(
     InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
     throws IOException, InterruptedException {
   RecordReader<E, Void> unfilteredRecordReader =
       createUnfilteredRecordReader(inputSplit, taskAttemptContext);
   if (view != null) {
     // use the constraints to filter out entities from the reader
     return new FilteredRecordReader<E>(
         unfilteredRecordReader,
         ((AbstractRefinableView) view).getConstraints(),
         view.getAccessor());
   }
   return unfilteredRecordReader;
 }
 @SuppressWarnings("unchecked")
 private boolean setInputPaths(JobContext jobContext, Job job) throws IOException {
   List<Path> paths =
       Lists.newArrayList(
           (Iterator) (view == null ? dataset.pathIterator() : view.pathIterator()));
   LOG.debug("Input paths: {}", paths);
   if (paths.isEmpty()) {
     return false;
   }
   FileInputFormat.setInputPaths(job, paths.toArray(new Path[paths.size()]));
   // the following line is needed for Hadoop 1, otherwise the paths are not set
   Configuration contextConf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
   Configuration jobConf = Hadoop.JobContext.getConfiguration.invoke(job);
   contextConf.set("mapred.input.dir", jobConf.get("mapred.input.dir"));
   return true;
 }
Esempio n. 9
0
 static <E> PartitionedDatasetWriter<E, ?> newWriter(FileSystemView<E> view) {
   DatasetDescriptor descriptor = view.getDataset().getDescriptor();
   Format format = descriptor.getFormat();
   if (Formats.PARQUET.equals(format)) {
     // by default, Parquet is not durable
     if (DescriptorUtil.isDisabled(FileSystemProperties.NON_DURABLE_PARQUET_PROP, descriptor)) {
       return new IncrementalPartitionedDatasetWriter<E>(view);
     } else {
       return new NonDurablePartitionedDatasetWriter<E>(view);
     }
   } else if (Formats.AVRO.equals(format) || Formats.CSV.equals(format)) {
     return new IncrementalPartitionedDatasetWriter<E>(view);
   } else {
     return new NonDurablePartitionedDatasetWriter<E>(view);
   }
 }
Esempio n. 10
0
  @Override
  public void initialize() {
    Preconditions.checkState(
        state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s", state);

    DatasetDescriptor descriptor = view.getDataset().getDescriptor();
    ValidationException.check(
        FileSystemWriter.isSupportedFormat(descriptor),
        "Not a supported format: %s",
        descriptor.getFormat());

    LOG.debug("Opening partitioned dataset writer w/strategy:{}", partitionStrategy);

    cachedWriters =
        CacheBuilder.newBuilder()
            .maximumSize(maxWriters)
            .removalListener(new DatasetWriterCloser<E>())
            .build(createCacheLoader());

    state = ReaderWriterState.OPEN;
  }
Esempio n. 11
0
 public FileSystemViewKeyInputFormat(FileSystemView<E> view, Configuration conf) {
   this((FileSystemDataset<E>) view.getDataset(), conf);
   this.view = view;
   LOG.debug("View: {}", view);
 }
Esempio n. 12
0
 public IncrementalDatasetWriterCacheLoader(FileSystemView<E> view) {
   this.view = view;
   this.convert = new PathConversion(view.getDataset().getDescriptor().getSchema());
 }
Esempio n. 13
0
 /**
  * Returns an iterator that provides all leaf-level directories in this view.
  *
  * @return leaf-directory iterator
  */
 Iterator<Path> dirIterator() {
   return unbounded.dirIterator();
 }
Esempio n. 14
0
 PathIterator pathIterator() {
   return unbounded.pathIterator();
 }
Esempio n. 15
0
 public boolean deleteAll() {
   // no constraints, so delete is always aligned to partition boundaries
   return unbounded.deleteAllUnsafe();
 }