Java DatasetDescriptor.getSchema примеры использования

Язык программирования: Java

Пространство имен/Пакет: org.kitesdk.data

Класс/Тип: DatasetDescriptor

Метод/Функция: getSchema

Примеров на hotexamples.com: 8

Java DatasetDescriptor.getSchema - 8 примеров найдено. Это лучшие примеры Java кода для org.kitesdk.data.DatasetDescriptor.getSchema, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

isPartitioned(13)

getFormat(11)

getPartitionStrategy(8)

getSchema(8)

getLocation(7)

getProperty(2)

getCompressionType(1)

hasProperty(1)

listProperties(1)

Пример #1

Показать файл

Файл: FileSystemDataset.java Проект: laserson/kite

  @Override
  public void merge(FileSystemDataset<E> update) {
    DatasetDescriptor updateDescriptor = update.getDescriptor();

    if (!updateDescriptor.getFormat().equals(descriptor.getFormat())) {
      throw new DatasetRepositoryException(
          "Cannot merge dataset format "
              + updateDescriptor.getFormat()
              + " with format "
              + descriptor.getFormat());
    }

    if (updateDescriptor.isPartitioned() != descriptor.isPartitioned()) {
      throw new DatasetRepositoryException(
          "Cannot merge an unpartitioned dataset with a " + " partitioned one or vice versa.");
    } else if (updateDescriptor.isPartitioned()
        && descriptor.isPartitioned()
        && !updateDescriptor.getPartitionStrategy().equals(descriptor.getPartitionStrategy())) {
      throw new DatasetRepositoryException(
          "Cannot merge dataset partition strategy "
              + updateDescriptor.getPartitionStrategy()
              + " with "
              + descriptor.getPartitionStrategy());
    }

    if (!updateDescriptor.getSchema().equals(descriptor.getSchema())) {
      throw new DatasetRepositoryException(
          "Cannot merge dataset schema "
              + updateDescriptor.getFormat()
              + " with schema "
              + descriptor.getFormat());
    }

    Set<String> addedPartitions = Sets.newHashSet();
    for (Path path : update.pathIterator()) {
      URI relativePath = update.getDirectory().toUri().relativize(path.toUri());
      Path newPath = new Path(directory, new Path(relativePath));
      Path newPartitionDirectory = newPath.getParent();
      try {
        if (!fileSystem.exists(newPartitionDirectory)) {
          fileSystem.mkdirs(newPartitionDirectory);
        }
        logger.debug("Renaming {} to {}", path, newPath);
        boolean renameOk = fileSystem.rename(path, newPath);
        if (!renameOk) {
          throw new DatasetException(
              "Dataset merge failed during rename of " + path + " to " + newPath);
        }
      } catch (IOException e) {
        throw new DatasetIOException("Dataset merge failed", e);
      }
      if (descriptor.isPartitioned() && partitionListener != null) {
        String partition = newPartitionDirectory.toString();
        if (!addedPartitions.contains(partition)) {
          partitionListener.partitionAdded(name, partition);
          addedPartitions.add(partition);
        }
      }
    }
  }

Пример #2

Показать файл

Файл: FileSystemWriter.java Проект: prazanna/kite

 @VisibleForTesting
 @SuppressWarnings("unchecked")
 <E> FileAppender<E> newAppender(Path temp) {
   Format format = descriptor.getFormat();
   if (Formats.PARQUET.equals(format)) {
     // by default, Parquet is not durable
     if (DescriptorUtil.isDisabled(FileSystemProperties.NON_DURABLE_PARQUET_PROP, descriptor)) {
       return (FileAppender<E>)
           new DurableParquetAppender(
               fs, temp, descriptor.getSchema(), conf, descriptor.getCompressionType());
     } else {
       return (FileAppender<E>)
           new ParquetAppender(
               fs, temp, descriptor.getSchema(), conf, descriptor.getCompressionType());
     }
   } else if (Formats.AVRO.equals(format)) {
     return new AvroAppender<E>(fs, temp, descriptor.getSchema(), descriptor.getCompressionType());
   } else if (Formats.CSV.equals(format)
       && DescriptorUtil.isEnabled(FileSystemProperties.ALLOW_CSV_PROP, descriptor)) {
     return new CSVAppender<E>(fs, temp, descriptor);
   } else {
     this.state = ReaderWriterState.ERROR;
     throw new UnknownFormatException("Unknown format " + descriptor);
   }
 }

Пример #3

Показать файл

Файл: FileSystemDatasetRepository.java Проект: stevek-ngdata/kite

  @Override
  public <E> Dataset<E> create(String name, DatasetDescriptor descriptor) {

    Preconditions.checkArgument(name != null, "Name can not be null");
    Preconditions.checkArgument(descriptor != null, "Descriptor can not be null");
    Preconditions.checkArgument(
        descriptor.getLocation() == null,
        "Descriptor location cannot be set; " + "it is assigned by the MetadataProvider");

    DatasetDescriptor newDescriptor = metadataProvider.create(name, descriptor);
    newDescriptor = addRepositoryUri(newDescriptor);

    final URI location = newDescriptor.getLocation();
    if (location == null) {
      throw new DatasetRepositoryException(
          "[BUG] MetadataProvider did not assign a location to dataset:" + name);
    }

    ensureExists(newDescriptor, conf);

    logger.debug(
        "Created dataset:{} schema:{} datasetPath:{}",
        new Object[] {name, newDescriptor.getSchema(), location.toString()});

    return new FileSystemDataset.Builder()
        .name(name)
        .configuration(conf)
        .descriptor(newDescriptor)
        .partitionKey(
            newDescriptor.isPartitioned()
                ? org.kitesdk.data.impl.Accessor.getDefault().newPartitionKey()
                : null)
        .partitionListener(getPartitionListener())
        .build();
  }

Пример #4

Показать файл

Файл: CrunchDatasets.java Проект: rbrush/kite

 private GetStorageKey(View<E> view) {
   DatasetDescriptor descriptor = view.getDataset().getDescriptor();
   // get serializable versions of transient objects
   this.strategyString = descriptor.getPartitionStrategy().toString(false /* no white space */);
   this.schemaString = descriptor.getSchema().toString(false /* no white space */);
   this.type = view.getType();
   if (view instanceof AbstractRefinableView) {
     this.constraints = ((AbstractRefinableView) view).getConstraints().toQueryMap();
   } else {
     this.constraints = null;
   }
 }

Пример #5

Показать файл

 @SuppressWarnings("unchecked")
 private <E> FileAppender<E> newAppender(Path temp) {
   Format format = descriptor.getFormat();
   if (Formats.PARQUET.equals(format)) {
     // by default, guarantee durability with the more costly writer
     if (DescriptorUtil.isEnabled(FileSystemProperties.NON_DURABLE_PARQUET_PROP, descriptor)) {
       return (FileAppender<E>) new ParquetAppender(fs, temp, descriptor.getSchema(), conf, true);
     } else {
       return (FileAppender<E>)
           new DurableParquetAppender(fs, temp, descriptor.getSchema(), conf, true);
     }
   } else if (Formats.AVRO.equals(format)) {
     return new AvroAppender<E>(fs, temp, descriptor.getSchema(), true);
   } else if (Formats.CSV.equals(format)
       && DescriptorUtil.isEnabled(FileSystemProperties.ALLOW_CSV_PROP, descriptor)) {
     return new CSVAppender<E>(fs, temp, descriptor);
   } else {
     this.state = ReaderWriterState.ERROR;
     throw new DatasetWriterException("Unknown format " + descriptor);
   }
 }

Пример #6

Показать файл

Файл: Compatibility.java Проект: madhukard/kite

  /**
   * Checks that the {@code existing} {@link DatasetDescriptor} is compatible with {@code test}.
   *
   * @param existing the current {@code DatasetDescriptor} for a dataset
   * @param test a new {@code DatasetDescriptor} for the same dataset
   */
  public static void checkCompatible(DatasetDescriptor existing, DatasetDescriptor test) {
    checkNotChanged("format", existing.getFormat(), test.getFormat());

    checkNotChanged("partitioning", existing.isPartitioned(), test.isPartitioned());

    if (existing.isPartitioned()) {
      checkNotChanged(
          "partition strategy", existing.getPartitionStrategy(), test.getPartitionStrategy());
    }

    // check can read records written with old schema using new schema
    Schema oldSchema = existing.getSchema();
    Schema testSchema = test.getSchema();
    if (!SchemaValidationUtil.canRead(oldSchema, testSchema)) {
      throw new IncompatibleSchemaException(
          "Schema cannot read data "
              + "written using existing schema. Schema: "
              + testSchema.toString(true)
              + "\nExisting schema: "
              + oldSchema.toString(true));
    }
  }

Пример #7

Показать файл

Файл: Compatibility.java Проект: madhukard/kite

  /**
   * Precondition-style validation that the DatasetDescriptor is compatible.
   *
   * @param descriptor a {@link DatasetDescriptor}
   */
  public static void checkDescriptor(DatasetDescriptor descriptor) {
    Preconditions.checkNotNull(descriptor, "Descriptor cannot be null");

    Schema schema = descriptor.getSchema();
    checkSchema(schema);

    if (descriptor.isPartitioned()) {
      // marked as [BUG] because this is checked in DatasetDescriptor
      Preconditions.checkArgument(
          schema.getType() == Schema.Type.RECORD,
          "[BUG] Partitioned datasets must have record schemas");

      Set<String> names = Sets.newHashSet();
      for (Schema.Field field : schema.getFields()) {
        names.add(field.name());
      }

      List<String> incompatible = Lists.newArrayList();
      List<String> duplicates = Lists.newArrayList();
      for (FieldPartitioner fp : descriptor.getPartitionStrategy().getFieldPartitioners()) {
        String name = fp.getName();
        if (!isCompatibleName(name)) {
          incompatible.add(name);
        } else if (names.contains(name)) {
          duplicates.add(name);
        } else {
          names.add(name);
        }
      }
      Preconditions.checkState(
          incompatible.isEmpty(),
          "Hive incompatible: partition names are not alphanumeric (plus '_'): %s",
          Joiner.on(", ").join(incompatible));
      Preconditions.checkState(
          duplicates.isEmpty(),
          "Hive incompatible: partition names duplicate data fields: %s",
          Joiner.on(", ").join(duplicates));
    }
  }

Пример #8

Показать файл

Файл: FileSystemDatasetRepository.java Проект: stevek-ngdata/kite

  @Override
  public <E> Dataset<E> update(String name, DatasetDescriptor descriptor) {
    Preconditions.checkArgument(name != null, "Dataset name cannot be null");
    Preconditions.checkArgument(descriptor != null, "DatasetDescriptro cannot be null");

    DatasetDescriptor oldDescriptor = metadataProvider.load(name);

    // oldDescriptor is valid if load didn't throw NoSuchDatasetException

    if (!oldDescriptor.getFormat().equals(descriptor.getFormat())) {
      throw new DatasetRepositoryException(
          "Cannot change dataset format from "
              + oldDescriptor.getFormat()
              + " to "
              + descriptor.getFormat());
    }

    final URI oldLocation = oldDescriptor.getLocation();
    if ((oldLocation != null) && !(oldLocation.equals(descriptor.getLocation()))) {
      throw new DatasetRepositoryException("Cannot change the dataset's location");
    }

    if (oldDescriptor.isPartitioned() != descriptor.isPartitioned()) {
      throw new DatasetRepositoryException(
          "Cannot change an unpartitioned dataset to " + " partitioned or vice versa.");
    } else if (oldDescriptor.isPartitioned()
        && descriptor.isPartitioned()
        && !oldDescriptor.getPartitionStrategy().equals(descriptor.getPartitionStrategy())) {
      throw new DatasetRepositoryException(
          "Cannot change partition strategy from "
              + oldDescriptor.getPartitionStrategy()
              + " to "
              + descriptor.getPartitionStrategy());
    }

    // check can read records written with old schema using new schema
    final Schema oldSchema = oldDescriptor.getSchema();
    final Schema newSchema = descriptor.getSchema();
    if (!SchemaValidationUtil.canRead(oldSchema, newSchema)) {
      throw new IncompatibleSchemaException(
          "New schema cannot read data "
              + "written using "
              + "old schema. New schema: "
              + newSchema.toString(true)
              + "\nOld schema: "
              + oldSchema.toString(true));
    }

    DatasetDescriptor updatedDescriptor = metadataProvider.update(name, descriptor);
    updatedDescriptor = addRepositoryUri(updatedDescriptor);

    logger.debug(
        "Updated dataset:{} schema:{} datasetPath:{}",
        new Object[] {
          name, updatedDescriptor.getSchema(), updatedDescriptor.getLocation().toString()
        });

    return new FileSystemDataset.Builder()
        .name(name)
        .configuration(conf)
        .descriptor(updatedDescriptor)
        .partitionKey(
            updatedDescriptor.isPartitioned()
                ? org.kitesdk.data.impl.Accessor.getDefault().newPartitionKey()
                : null)
        .partitionListener(getPartitionListener())
        .build();
  }