/** * Returns true if {@code container} contains {@code test}, false otherwise. * * <p>Fields for which {@code set} has no value ({@link Marker#valueFor(FieldPartitioner)} returns * null) are treated as wildcards and always match. * * <p>All fields in the {@link PartitionStrategy} are compared. * * @param container a {@code Marker} that defines a set of partitions * @param test a {@code Marker} that may be a subset of {@code container} * @return {@code true} if the partitions in {@code test} are a subset of {@code container}, * {@code false} otherwise */ @SuppressWarnings("unchecked") public boolean contains(Marker container, Marker test) { for (FieldPartitioner field : strategy.getFieldPartitioners()) { Object containerValue = container.valueFor(field); if (containerValue != null) { Object testValue = test.valueFor(field); if (testValue == null || field.compare(containerValue, testValue) != 0) { return false; } } /* * Rather than returning true if containerValue is null, this treats * null as a wildcard. Everything matches null, so all non-null fields * will be checked. */ } return true; }
/** * Compare two {@link Marker} objects under the {@link PartitionStrategy}. * * <p>All comparisons are with respect to the partition ordering defined by this comparator's * {@code PartitionStrategy}. Under a {@code PartitionStrategy}, a {@code Marker} contains a set * of one or more partitions. A {@code Marker} is strictly less than another if all of the * partitions it contains are less than the partitions of the other. Similarly, if all partitions * are greater than the partitions of the other, then the {@code Marker} is greater. Two {@code * Markers} are equal if they contain the same set of partitions. * * <p>This method implements left-inclusive comparison: if either {@code Marker} contains the * other, then it is considered lesser. This means that there is at least one partition in the * containing {@code Marker} that is less than all of the partitions in the contained {@code * Marker}. This behavior is for checking an inclusive lower bound for a range. * * <p>m1 = [ 2013, Oct, * ] m2 = [ 2013, Oct, 12 ] leftCompare(m1, m2) returns 1 leftCompare(m2, * m1) returns -1 * * <p>The comparison method {@link #rightCompare(Marker, Marker)} implements right-inclusive * comparison. * * <p>Note: Because {@code Marker} objects are hierarchical, they are either completely disjoint * or one marker contains the other. If one contains the other and the two are not equal, this * method considers it to be less than than the other. * * <p>TODO: catch wildcard to concrete comparisons and throw an Exception * * @param m1 a {@code Marker} * @param m2 a {@code Marker} * @return -1 If all partitions in m1 are less than the partitions in m2 0 If m1 and m2 contain * the same set of partitions 1 If all partitions of m1 are greater than the partitions in m2 * @see MarkerComparator#compare(Marker, Marker) * @see MarkerComparator#rightCompare(Marker, Marker) * @since 0.9.0 */ @SuppressWarnings("unchecked") public int leftCompare(Marker m1, Marker m2) { for (FieldPartitioner field : strategy.getFieldPartitioners()) { Object m1Value = m1.valueFor(field); Object m2Value = m2.valueFor(field); if (m1Value == null) { if (m2Value != null) { // m1 contains m2 return -1; } } else if (m2Value == null) { // m2 contains m1 return 1; } else { int cmp = field.compare(m1Value, m2Value); if (cmp != 0) { return cmp; } } } return 0; }
/** * Precondition-style validation that the DatasetDescriptor is compatible. * * @param descriptor a {@link DatasetDescriptor} */ public static void checkDescriptor(DatasetDescriptor descriptor) { Preconditions.checkNotNull(descriptor, "Descriptor cannot be null"); Schema schema = descriptor.getSchema(); checkSchema(schema); if (descriptor.isPartitioned()) { // marked as [BUG] because this is checked in DatasetDescriptor Preconditions.checkArgument( schema.getType() == Schema.Type.RECORD, "[BUG] Partitioned datasets must have record schemas"); Set<String> names = Sets.newHashSet(); for (Schema.Field field : schema.getFields()) { names.add(field.name()); } List<String> incompatible = Lists.newArrayList(); List<String> duplicates = Lists.newArrayList(); for (FieldPartitioner fp : descriptor.getPartitionStrategy().getFieldPartitioners()) { String name = fp.getName(); if (!isCompatibleName(name)) { incompatible.add(name); } else if (names.contains(name)) { duplicates.add(name); } else { names.add(name); } } Preconditions.checkState( incompatible.isEmpty(), "Hive incompatible: partition names are not alphanumeric (plus '_'): %s", Joiner.on(", ").join(incompatible)); Preconditions.checkState( duplicates.isEmpty(), "Hive incompatible: partition names duplicate data fields: %s", Joiner.on(", ").join(duplicates)); } }
/** * Compare two {@link Marker} objects under the {@link PartitionStrategy}. * * <p>All comparisons are with respect to the partition ordering defined by this comparator's * {@code PartitionStrategy}. Under a {@code PartitionStrategy}, a {@code Marker} contains a set * of one or more partitions. A {@code Marker} is strictly less than another if all of the * partitions it contains are less than the partitions of the other. Similarly, if all partitions * are greater than the partitions of the other, then the {@code Marker} is greater. Two {@code * Markers} are equal if they contain the same set of partitions. * * <p>This method implements strictly exclusive comparison: if either {@code Marker} contains the * other, then this throws {@code IllegalStateException}. This is because there is at least one * partition in the containing {@code Marker} that is less than or equal to all partitions in the * contained {@code Marker} and at least one partition that is greater than or equal to all * partitions in the contained {@code Marker}. * * <p>Alternatively, the comparison methods {@link #leftCompare(Marker, Marker)} and {@link * #rightCompare(Marker, Marker)} consider contained {@code Marker} objects to be greater-than and * less-than respectively. * * <p>Note: Because {@code Marker} objects are hierarchical, they are either completely disjoint * or one marker contains the other. If one contains the other and the two are not equal, this * method throws {@code IllegalStateException}. * * <p>TODO: catch wildcard to concrete comparisons and throw an Exception * * @param m1 a {@code Marker} * @param m2 a {@code Marker} * @return -1 If all partitions in m1 are less than the partitions in m2 0 If m1 and m2 contain * the same set of partitions 1 If all partitions of m1 are greater than the partitions in m2 * @throws IllegalStateException If either {@code Marker} is a proper subset of the other * @see MarkerComparator#leftCompare(Marker, Marker) * @see MarkerComparator#rightCompare(Marker, Marker) * @since 0.9.0 */ @Override @SuppressWarnings("unchecked") public int compare(Marker m1, Marker m2) { for (FieldPartitioner field : strategy.getFieldPartitioners()) { Object m1Value = m1.valueFor(field); Object m2Value = m2.valueFor(field); // if either is null, but not both, then they are Incomparable if (m1Value == null) { if (m2Value != null) { // m1 contains m2 throw new IllegalStateException("Incomparable"); } } else if (m2Value == null) { // m2 contains m1 throw new IllegalStateException("Incomparable"); } else { int cmp = field.compare(m1Value, m2Value); if (cmp != 0) { return cmp; } } } return 0; }
@Test public void test() throws Exception { final PartitionStrategy p = new PartitionStrategy.Builder().identity("month", 12).hash("userId", 7).get(); List<FieldPartitioner> fieldPartitioners = p.getFieldPartitioners(); Assert.assertEquals(2, fieldPartitioners.size()); FieldPartitioner fp0 = fieldPartitioners.get(0); assertEquals("month", fp0.getName()); assertEquals(12, fp0.getCardinality()); FieldPartitioner fp1 = fieldPartitioners.get(1); assertEquals("userId", fp1.getName()); assertEquals(7, fp1.getCardinality()); Entity e = new Entity(); e.setMonth(2); e.setUserId(10); logger.debug("partitionStrategy:{}", p); assertEquals(12 * 7, p.getCardinality()); // useful for writers }