Exemplo n.º 1
0
  @Test
  public void test() throws Exception {
    final PartitionStrategy p =
        new PartitionStrategy.Builder().identity("month", 12).hash("userId", 7).get();

    List<FieldPartitioner> fieldPartitioners = p.getFieldPartitioners();
    Assert.assertEquals(2, fieldPartitioners.size());

    FieldPartitioner fp0 = fieldPartitioners.get(0);
    assertEquals("month", fp0.getName());
    assertEquals(12, fp0.getCardinality());

    FieldPartitioner fp1 = fieldPartitioners.get(1);
    assertEquals("userId", fp1.getName());
    assertEquals(7, fp1.getCardinality());

    Entity e = new Entity();
    e.setMonth(2);
    e.setUserId(10);

    logger.debug("partitionStrategy:{}", p);

    assertEquals(12 * 7, p.getCardinality()); // useful for writers
  }
Exemplo n.º 2
0
  /**
   * Precondition-style validation that the DatasetDescriptor is compatible.
   *
   * @param descriptor a {@link DatasetDescriptor}
   */
  public static void checkDescriptor(DatasetDescriptor descriptor) {
    Preconditions.checkNotNull(descriptor, "Descriptor cannot be null");

    Schema schema = descriptor.getSchema();
    checkSchema(schema);

    if (descriptor.isPartitioned()) {
      // marked as [BUG] because this is checked in DatasetDescriptor
      Preconditions.checkArgument(
          schema.getType() == Schema.Type.RECORD,
          "[BUG] Partitioned datasets must have record schemas");

      Set<String> names = Sets.newHashSet();
      for (Schema.Field field : schema.getFields()) {
        names.add(field.name());
      }

      List<String> incompatible = Lists.newArrayList();
      List<String> duplicates = Lists.newArrayList();
      for (FieldPartitioner fp : descriptor.getPartitionStrategy().getFieldPartitioners()) {
        String name = fp.getName();
        if (!isCompatibleName(name)) {
          incompatible.add(name);
        } else if (names.contains(name)) {
          duplicates.add(name);
        } else {
          names.add(name);
        }
      }
      Preconditions.checkState(
          incompatible.isEmpty(),
          "Hive incompatible: partition names are not alphanumeric (plus '_'): %s",
          Joiner.on(", ").join(incompatible));
      Preconditions.checkState(
          duplicates.isEmpty(),
          "Hive incompatible: partition names duplicate data fields: %s",
          Joiner.on(", ").join(duplicates));
    }
  }