Beispiel #1
0
 /**
  * Write a partition file for the given job, using the Sampler provided. Queries the sampler for a
  * sample keyset, sorts by the output key comparator, selects the keys for each rank, and writes
  * to the destination returned from {@link TotalOrderPartitioner#getPartitionFile}.
  */
 @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
 public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler)
     throws IOException, ClassNotFoundException, InterruptedException {
   Configuration conf = job.getConfiguration();
   final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
   int numPartitions = job.getNumReduceTasks();
   K[] samples = sampler.getSample(inf, job);
   RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
   Arrays.sort(samples, comparator);
   Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
   FileSystem fs = dst.getFileSystem(conf);
   if (fs.exists(dst)) {
     fs.delete(dst, false);
   }
   SequenceFile.Writer writer =
       SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class);
   NullWritable nullValue = NullWritable.get();
   float stepSize = samples.length / (float) numPartitions;
   int last = -1;
   for (int i = 1; i < numPartitions; ++i) {
     int k = Math.round(stepSize * i);
     while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
       ++k;
     }
     writer.append(samples[k], nullValue);
     last = k;
   }
   writer.close();
 }
  /**
   * Compares sequentially the fields from two tuples and returns which field they differ from. Use
   * custom comparators when provided. The provided RawComparators must implement "compare" so we
   * should use them.
   *
   * <p>Important. The contract of this method is that the tuples will differ always between
   * minField and maxField. If they are equal then an Exception is thrown.
   */
  private int indexMismatch(ITuple tuple1, ITuple tuple2, int minFieldIndex, int maxFieldIndex) {
    int schemaId1 = tupleMRConfig.getSchemaIdByName(tuple1.getSchema().getName());
    int schemaId2 = tupleMRConfig.getSchemaIdByName(tuple2.getSchema().getName());
    int[] translationTuple1 = serInfo.getGroupSchemaIndexTranslation(schemaId1);
    int[] translationTuple2 = serInfo.getGroupSchemaIndexTranslation(schemaId2);

    for (int i = minFieldIndex; i <= maxFieldIndex; i++) {
      Object obj1 = tuple1.get(translationTuple1[i]);
      Object obj2 = tuple2.get(translationTuple2[i]);
      @SuppressWarnings("unchecked")
      RawComparator<Object> customComparator = (RawComparator<Object>) customComparators[i];

      if (customComparator != null) {
        if (customComparator.compare(obj1, obj2) != 0) {
          return i;
        }
      } else {
        if (!obj1.equals(obj2)) {
          return i;
        }
      }
    }
    throw new RuntimeException(
        "Illegal state.The tuples "
            + tuple1
            + " and "
            + tuple2
            + " compare the same between indexes "
            + minFieldIndex
            + " and "
            + maxFieldIndex);
  }
 public static void compare2(Integer i, Integer j) throws IOException {
   IntWritable w1 = new IntWritable(i);
   IntWritable w2 = new IntWritable(j);
   byte[] b1 = serialize(w1);
   byte[] b2 = serialize(w2);
   System.out.println(comparator.compare(b1, 0, b1.length, b2, 0, b2.length));
 }
Beispiel #4
0
  /** Returns the instance files created */
  static Set<String> serializeComparators(
      Criteria criteria,
      Configuration conf,
      List<String> comparatorRefs,
      List<String> comparatorInstanceFiles,
      String prefix)
      throws TupleMRException {

    Set<String> instanceFiles = new HashSet<String>();

    if (criteria == null) {
      return instanceFiles;
    }

    for (SortElement element : criteria.getElements()) {
      if (element.getCustomComparator() != null) {
        RawComparator<?> comparator = element.getCustomComparator();

        if (!(comparator instanceof Serializable)) {
          throw new TupleMRException(
              "The class '"
                  + comparator.getClass().getName()
                  + "' is not Serializable."
                  + " The customs comparators must implement Serializable.");
        }

        String ref = prefix + "|" + element.getName();
        String uniqueName = UUID.randomUUID().toString() + '.' + "comparator.dat";
        try {
          InstancesDistributor.distribute(comparator, uniqueName, conf);
          instanceFiles.add(uniqueName);
        } catch (Exception e) {
          throw new TupleMRException(
              "The class " + comparator.getClass().getName() + " can't be serialized", e);
        }

        comparatorRefs.add(ref);
        comparatorInstanceFiles.add(uniqueName);
      }
    }

    return instanceFiles;
  }
Beispiel #5
0
    @SuppressWarnings("unchecked")
    protected boolean lessThan(Object a, Object b) {
      DataInputBuffer key1 = ((Segment<K, V>) a).getKey();
      DataInputBuffer key2 = ((Segment<K, V>) b).getKey();
      int s1 = key1.getPosition();
      int l1 = key1.getLength() - s1;
      int s2 = key2.getPosition();
      int l2 = key2.getLength() - s2;

      return comparator.compare(key1.getData(), s1, l1, key2.getData(), s2, l2) < 0;
    }
Beispiel #6
0
 /**
  * Compare logical range, st i, j MOD offset capacity. Compare by partition, then by key.
  *
  * @see IndexedSortable#compare
  */
 public int compare(int i, int j) {
   final int ii = kvoffsets[i % kvoffsets.length];
   final int ij = kvoffsets[j % kvoffsets.length];
   // sort by partition
   if (kvindices[ii + PARTITION] != kvindices[ij + PARTITION]) {
     return kvindices[ii + PARTITION] - kvindices[ij + PARTITION];
   }
   // sort by key
   return comparator.compare(
       kvbuffer,
       kvindices[ii + KEYSTART],
       kvindices[ii + VALSTART] - kvindices[ii + KEYSTART],
       kvbuffer,
       kvindices[ij + KEYSTART],
       kvindices[ij + VALSTART] - kvindices[ij + KEYSTART]);
 }
 public static void compare(Integer i, Integer j) {
   IntWritable w1 = new IntWritable(i);
   IntWritable w2 = new IntWritable(j);
   System.out.println(comparator.compare(w1, w2));
 }