/** * Write a partition file for the given job, using the Sampler provided. Queries the sampler for a * sample keyset, sorts by the output key comparator, selects the keys for each rank, and writes * to the destination returned from {@link TotalOrderPartitioner#getPartitionFile}. */ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = job.getConfiguration(); final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
/** * Compares sequentially the fields from two tuples and returns which field they differ from. Use * custom comparators when provided. The provided RawComparators must implement "compare" so we * should use them. * * <p>Important. The contract of this method is that the tuples will differ always between * minField and maxField. If they are equal then an Exception is thrown. */ private int indexMismatch(ITuple tuple1, ITuple tuple2, int minFieldIndex, int maxFieldIndex) { int schemaId1 = tupleMRConfig.getSchemaIdByName(tuple1.getSchema().getName()); int schemaId2 = tupleMRConfig.getSchemaIdByName(tuple2.getSchema().getName()); int[] translationTuple1 = serInfo.getGroupSchemaIndexTranslation(schemaId1); int[] translationTuple2 = serInfo.getGroupSchemaIndexTranslation(schemaId2); for (int i = minFieldIndex; i <= maxFieldIndex; i++) { Object obj1 = tuple1.get(translationTuple1[i]); Object obj2 = tuple2.get(translationTuple2[i]); @SuppressWarnings("unchecked") RawComparator<Object> customComparator = (RawComparator<Object>) customComparators[i]; if (customComparator != null) { if (customComparator.compare(obj1, obj2) != 0) { return i; } } else { if (!obj1.equals(obj2)) { return i; } } } throw new RuntimeException( "Illegal state.The tuples " + tuple1 + " and " + tuple2 + " compare the same between indexes " + minFieldIndex + " and " + maxFieldIndex); }
public static void compare2(Integer i, Integer j) throws IOException { IntWritable w1 = new IntWritable(i); IntWritable w2 = new IntWritable(j); byte[] b1 = serialize(w1); byte[] b2 = serialize(w2); System.out.println(comparator.compare(b1, 0, b1.length, b2, 0, b2.length)); }
/** Returns the instance files created */ static Set<String> serializeComparators( Criteria criteria, Configuration conf, List<String> comparatorRefs, List<String> comparatorInstanceFiles, String prefix) throws TupleMRException { Set<String> instanceFiles = new HashSet<String>(); if (criteria == null) { return instanceFiles; } for (SortElement element : criteria.getElements()) { if (element.getCustomComparator() != null) { RawComparator<?> comparator = element.getCustomComparator(); if (!(comparator instanceof Serializable)) { throw new TupleMRException( "The class '" + comparator.getClass().getName() + "' is not Serializable." + " The customs comparators must implement Serializable."); } String ref = prefix + "|" + element.getName(); String uniqueName = UUID.randomUUID().toString() + '.' + "comparator.dat"; try { InstancesDistributor.distribute(comparator, uniqueName, conf); instanceFiles.add(uniqueName); } catch (Exception e) { throw new TupleMRException( "The class " + comparator.getClass().getName() + " can't be serialized", e); } comparatorRefs.add(ref); comparatorInstanceFiles.add(uniqueName); } } return instanceFiles; }
@SuppressWarnings("unchecked") protected boolean lessThan(Object a, Object b) { DataInputBuffer key1 = ((Segment<K, V>) a).getKey(); DataInputBuffer key2 = ((Segment<K, V>) b).getKey(); int s1 = key1.getPosition(); int l1 = key1.getLength() - s1; int s2 = key2.getPosition(); int l2 = key2.getLength() - s2; return comparator.compare(key1.getData(), s1, l1, key2.getData(), s2, l2) < 0; }
/** * Compare logical range, st i, j MOD offset capacity. Compare by partition, then by key. * * @see IndexedSortable#compare */ public int compare(int i, int j) { final int ii = kvoffsets[i % kvoffsets.length]; final int ij = kvoffsets[j % kvoffsets.length]; // sort by partition if (kvindices[ii + PARTITION] != kvindices[ij + PARTITION]) { return kvindices[ii + PARTITION] - kvindices[ij + PARTITION]; } // sort by key return comparator.compare( kvbuffer, kvindices[ii + KEYSTART], kvindices[ii + VALSTART] - kvindices[ii + KEYSTART], kvbuffer, kvindices[ij + KEYSTART], kvindices[ij + VALSTART] - kvindices[ij + KEYSTART]); }
public static void compare(Integer i, Integer j) { IntWritable w1 = new IntWritable(i); IntWritable w2 = new IntWritable(j); System.out.println(comparator.compare(w1, w2)); }