Exemple #1
0
 /**
  * Returns true if {@code container} contains {@code test}, false otherwise.
  *
  * <p>Fields for which {@code set} has no value ({@link Marker#valueFor(FieldPartitioner)} returns
  * null) are treated as wildcards and always match.
  *
  * <p>All fields in the {@link PartitionStrategy} are compared.
  *
  * @param container a {@code Marker} that defines a set of partitions
  * @param test a {@code Marker} that may be a subset of {@code container}
  * @return {@code true} if the partitions in {@code test} are a subset of {@code container},
  *     {@code false} otherwise
  */
 @SuppressWarnings("unchecked")
 public boolean contains(Marker container, Marker test) {
   for (FieldPartitioner field : strategy.getFieldPartitioners()) {
     Object containerValue = container.valueFor(field);
     if (containerValue != null) {
       Object testValue = test.valueFor(field);
       if (testValue == null || field.compare(containerValue, testValue) != 0) {
         return false;
       }
     }
     /*
      * Rather than returning true if containerValue is null, this treats
      * null as a wildcard. Everything matches null, so all non-null fields
      * will be checked.
      */
   }
   return true;
 }
Exemple #2
0
 /**
  * Compare two {@link Marker} objects under the {@link PartitionStrategy}.
  *
  * <p>All comparisons are with respect to the partition ordering defined by this comparator's
  * {@code PartitionStrategy}. Under a {@code PartitionStrategy}, a {@code Marker} contains a set
  * of one or more partitions. A {@code Marker} is strictly less than another if all of the
  * partitions it contains are less than the partitions of the other. Similarly, if all partitions
  * are greater than the partitions of the other, then the {@code Marker} is greater. Two {@code
  * Markers} are equal if they contain the same set of partitions.
  *
  * <p>This method implements left-inclusive comparison: if either {@code Marker} contains the
  * other, then it is considered lesser. This means that there is at least one partition in the
  * containing {@code Marker} that is less than all of the partitions in the contained {@code
  * Marker}. This behavior is for checking an inclusive lower bound for a range.
  *
  * <p>m1 = [ 2013, Oct, * ] m2 = [ 2013, Oct, 12 ] leftCompare(m1, m2) returns 1 leftCompare(m2,
  * m1) returns -1
  *
  * <p>The comparison method {@link #rightCompare(Marker, Marker)} implements right-inclusive
  * comparison.
  *
  * <p>Note: Because {@code Marker} objects are hierarchical, they are either completely disjoint
  * or one marker contains the other. If one contains the other and the two are not equal, this
  * method considers it to be less than than the other.
  *
  * <p>TODO: catch wildcard to concrete comparisons and throw an Exception
  *
  * @param m1 a {@code Marker}
  * @param m2 a {@code Marker}
  * @return -1 If all partitions in m1 are less than the partitions in m2 0 If m1 and m2 contain
  *     the same set of partitions 1 If all partitions of m1 are greater than the partitions in m2
  * @see MarkerComparator#compare(Marker, Marker)
  * @see MarkerComparator#rightCompare(Marker, Marker)
  * @since 0.9.0
  */
 @SuppressWarnings("unchecked")
 public int leftCompare(Marker m1, Marker m2) {
   for (FieldPartitioner field : strategy.getFieldPartitioners()) {
     Object m1Value = m1.valueFor(field);
     Object m2Value = m2.valueFor(field);
     if (m1Value == null) {
       if (m2Value != null) {
         // m1 contains m2
         return -1;
       }
     } else if (m2Value == null) {
       // m2 contains m1
       return 1;
     } else {
       int cmp = field.compare(m1Value, m2Value);
       if (cmp != 0) {
         return cmp;
       }
     }
   }
   return 0;
 }
Exemple #3
0
  /**
   * Precondition-style validation that the DatasetDescriptor is compatible.
   *
   * @param descriptor a {@link DatasetDescriptor}
   */
  public static void checkDescriptor(DatasetDescriptor descriptor) {
    Preconditions.checkNotNull(descriptor, "Descriptor cannot be null");

    Schema schema = descriptor.getSchema();
    checkSchema(schema);

    if (descriptor.isPartitioned()) {
      // marked as [BUG] because this is checked in DatasetDescriptor
      Preconditions.checkArgument(
          schema.getType() == Schema.Type.RECORD,
          "[BUG] Partitioned datasets must have record schemas");

      Set<String> names = Sets.newHashSet();
      for (Schema.Field field : schema.getFields()) {
        names.add(field.name());
      }

      List<String> incompatible = Lists.newArrayList();
      List<String> duplicates = Lists.newArrayList();
      for (FieldPartitioner fp : descriptor.getPartitionStrategy().getFieldPartitioners()) {
        String name = fp.getName();
        if (!isCompatibleName(name)) {
          incompatible.add(name);
        } else if (names.contains(name)) {
          duplicates.add(name);
        } else {
          names.add(name);
        }
      }
      Preconditions.checkState(
          incompatible.isEmpty(),
          "Hive incompatible: partition names are not alphanumeric (plus '_'): %s",
          Joiner.on(", ").join(incompatible));
      Preconditions.checkState(
          duplicates.isEmpty(),
          "Hive incompatible: partition names duplicate data fields: %s",
          Joiner.on(", ").join(duplicates));
    }
  }
Exemple #4
0
 /**
  * Compare two {@link Marker} objects under the {@link PartitionStrategy}.
  *
  * <p>All comparisons are with respect to the partition ordering defined by this comparator's
  * {@code PartitionStrategy}. Under a {@code PartitionStrategy}, a {@code Marker} contains a set
  * of one or more partitions. A {@code Marker} is strictly less than another if all of the
  * partitions it contains are less than the partitions of the other. Similarly, if all partitions
  * are greater than the partitions of the other, then the {@code Marker} is greater. Two {@code
  * Markers} are equal if they contain the same set of partitions.
  *
  * <p>This method implements strictly exclusive comparison: if either {@code Marker} contains the
  * other, then this throws {@code IllegalStateException}. This is because there is at least one
  * partition in the containing {@code Marker} that is less than or equal to all partitions in the
  * contained {@code Marker} and at least one partition that is greater than or equal to all
  * partitions in the contained {@code Marker}.
  *
  * <p>Alternatively, the comparison methods {@link #leftCompare(Marker, Marker)} and {@link
  * #rightCompare(Marker, Marker)} consider contained {@code Marker} objects to be greater-than and
  * less-than respectively.
  *
  * <p>Note: Because {@code Marker} objects are hierarchical, they are either completely disjoint
  * or one marker contains the other. If one contains the other and the two are not equal, this
  * method throws {@code IllegalStateException}.
  *
  * <p>TODO: catch wildcard to concrete comparisons and throw an Exception
  *
  * @param m1 a {@code Marker}
  * @param m2 a {@code Marker}
  * @return -1 If all partitions in m1 are less than the partitions in m2 0 If m1 and m2 contain
  *     the same set of partitions 1 If all partitions of m1 are greater than the partitions in m2
  * @throws IllegalStateException If either {@code Marker} is a proper subset of the other
  * @see MarkerComparator#leftCompare(Marker, Marker)
  * @see MarkerComparator#rightCompare(Marker, Marker)
  * @since 0.9.0
  */
 @Override
 @SuppressWarnings("unchecked")
 public int compare(Marker m1, Marker m2) {
   for (FieldPartitioner field : strategy.getFieldPartitioners()) {
     Object m1Value = m1.valueFor(field);
     Object m2Value = m2.valueFor(field);
     // if either is null, but not both, then they are Incomparable
     if (m1Value == null) {
       if (m2Value != null) {
         // m1 contains m2
         throw new IllegalStateException("Incomparable");
       }
     } else if (m2Value == null) {
       // m2 contains m1
       throw new IllegalStateException("Incomparable");
     } else {
       int cmp = field.compare(m1Value, m2Value);
       if (cmp != 0) {
         return cmp;
       }
     }
   }
   return 0;
 }
  @Test
  public void test() throws Exception {
    final PartitionStrategy p =
        new PartitionStrategy.Builder().identity("month", 12).hash("userId", 7).get();

    List<FieldPartitioner> fieldPartitioners = p.getFieldPartitioners();
    Assert.assertEquals(2, fieldPartitioners.size());

    FieldPartitioner fp0 = fieldPartitioners.get(0);
    assertEquals("month", fp0.getName());
    assertEquals(12, fp0.getCardinality());

    FieldPartitioner fp1 = fieldPartitioners.get(1);
    assertEquals("userId", fp1.getName());
    assertEquals(7, fp1.getCardinality());

    Entity e = new Entity();
    e.setMonth(2);
    e.setUserId(10);

    logger.debug("partitionStrategy:{}", p);

    assertEquals(12 * 7, p.getCardinality()); // useful for writers
  }