@Test public void testMoreThanEightProps() { List<Field> fields = new ArrayList<Field>(30); for (int i = 0; i < 30; i++) { fields.add(Field.create("field" + i, Type.STRING)); } Schema schema = new Schema("bigmammaschema", fields); ITuple tuple = new Tuple(schema); for (int i = 0; i < 30; i++) { // Fill in with some default values tuple.set("field" + i, "defaultValue" + i); } NullableTuple nullableTuple = new NullableTuple(tuple); for (int i = 0; i < 30; i++) { // Assert the default values assertEquals("defaultValue" + i, nullableTuple.get("field" + i)); } // Set fields to null one by one and assert that things go well for (int i = 0; i < 30; i++) { nullableTuple.set("field" + i, null); assertEquals(null, nullableTuple.getNullable("field" + i)); nullableTuple.set("field" + i, "defaultValue" + i); assertEquals("defaultValue" + i, nullableTuple.getNullable("field" + i)); } }
/** * Compares sequentially the fields from two tuples and returns which field they differ from. Use * custom comparators when provided. The provided RawComparators must implement "compare" so we * should use them. * * <p>Important. The contract of this method is that the tuples will differ always between * minField and maxField. If they are equal then an Exception is thrown. */ private int indexMismatch(ITuple tuple1, ITuple tuple2, int minFieldIndex, int maxFieldIndex) { int schemaId1 = tupleMRConfig.getSchemaIdByName(tuple1.getSchema().getName()); int schemaId2 = tupleMRConfig.getSchemaIdByName(tuple2.getSchema().getName()); int[] translationTuple1 = serInfo.getGroupSchemaIndexTranslation(schemaId1); int[] translationTuple2 = serInfo.getGroupSchemaIndexTranslation(schemaId2); for (int i = minFieldIndex; i <= maxFieldIndex; i++) { Object obj1 = tuple1.get(translationTuple1[i]); Object obj2 = tuple2.get(translationTuple2[i]); @SuppressWarnings("unchecked") RawComparator<Object> customComparator = (RawComparator<Object>) customComparators[i]; if (customComparator != null) { if (customComparator.compare(obj1, obj2) != 0) { return i; } } else { if (!obj1.equals(obj2)) { return i; } } } throw new RuntimeException( "Illegal state.The tuples " + tuple1 + " and " + tuple2 + " compare the same between indexes " + minFieldIndex + " and " + maxFieldIndex); }
@Override public void reduce( ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector) throws IOException, InterruptedException, TupleMRException { int count = 0; ITuple outputTuple = null; for (ITuple tuple : tuples) { outputTuple = tuple; count += (Integer) tuple.get(1); } outputTuple.set(1, count); collector.write(outputTuple, NullWritable.get()); }
@Override public final void reduce(DatumWrapper<ITuple> key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { try { Iterator<NullWritable> iterator = values.iterator(); tupleIterator.setIterator(iterator); ITuple currentTuple = key.datum(); ITuple previousKey = key.previousDatum(); int indexMismatch; if (firstRun) { indexMismatch = minDepth; firstRun = false; } else { indexMismatch = indexMismatch(previousKey, currentTuple, 0, maxDepth); if (indexMismatch < minDepth) { indexMismatch = minDepth; } for (int i = maxDepth; i >= indexMismatch; i--) { handler.onCloseGroup( i, groupSchema.getField(i).getName(), previousKey, this.context, collector); } } for (int i = indexMismatch; i <= maxDepth; i++) { handler.onOpenGroup( i, groupSchema.getField(i).getName(), currentTuple, this.context, collector); } // We set a view over the group fields to the method. if (isMultipleSources) { int schemaId = tupleMRConfig.getSchemaIdByName(currentTuple.getSchema().getName()); int[] indexTranslation = serInfo.getGroupSchemaIndexTranslation(schemaId); groupTuple.setContained(currentTuple, indexTranslation); } else { groupTuple.setContained(currentTuple); } handler.reduce(groupTuple, tupleIterator, this.context, collector); // This loop consumes the remaining elements that reduce didn't consume // The goal of this is to correctly set the last element in the next // onCloseGroup() call while (iterator.hasNext()) { iterator.next(); } } catch (TupleMRException e) { throw new RuntimeException(e); } }
@Override public void reduce( ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector) throws IOException, InterruptedException, TupleMRException { if (outputCount == null) { outputCount = new IntWritable(); } int count = 0; for (ITuple tuple : tuples) { count += (Integer) tuple.get(1); } outputCount.set(count); collector.write((Text) group.get(0), outputCount); }
@Test public void testModifying() { Schema schema = new Schema( "testSchema", Fields.parse("a:string, b:int, c:double, d:float, e:boolean, f:long")); ITuple tuple = new Tuple(schema); tuple.set(0, "foo"); tuple.set(1, null); tuple.set(2, 20d); tuple.set(3, null); tuple.set(4, false); tuple.set(5, null); NullableTuple nullableTuple = new NullableTuple(tuple); assertEquals("foo", nullableTuple.getNullable(0).toString()); assertEquals(null, nullableTuple.getNullable(1)); assertEquals(20d, nullableTuple.getNullable(2)); assertEquals(null, nullableTuple.getNullable(3)); assertEquals(false, nullableTuple.getNullable(4)); assertEquals(null, nullableTuple.getNullable(5)); nullableTuple.set(2, null); nullableTuple.set(4, null); nullableTuple.set(0, null); nullableTuple.set(1, 10); nullableTuple.set(3, 20f); nullableTuple.set(5, 30l); assertEquals(null, nullableTuple.getNullable(0)); assertEquals(10, nullableTuple.getNullable(1)); assertEquals(null, nullableTuple.getNullable(2)); assertEquals(20f, nullableTuple.getNullable(3)); assertEquals(null, nullableTuple.getNullable(4)); assertEquals(30l, nullableTuple.getNullable(5)); }
protected static void fillObject(boolean isRandom, ITuple tuple, Field field, int index) { Object instance = ReflectionUtils.newInstance(field.getObjectClass(), null); if (instance instanceof A) { A a = (A) instance; a.setId(isRandom ? random.nextInt() + "" : ""); a.setUrl(isRandom ? random.nextLong() + "" : ""); } tuple.set(index, instance); }
protected static void fillString(boolean isRandom, ITuple tuple, int index) { if (isRandom) { switch (random.nextInt(4)) { case 0: tuple.set(index, ""); break; case 1: tuple.set(index, random.nextLong() + ""); break; case 2: tuple.set(index, new Utf8(random.nextLong() + "")); break; case 3: tuple.set(index, new Text(random.nextLong() + "")); break; } } else { tuple.set(index, ""); } }
@Override public void reduce( ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector) throws IOException, InterruptedException, TupleMRException { String groupString = group.get(0).toString(); if (groupString.equals("FR")) { FR_PRESENT = true; if (!ES_PRESENT) { throw new AssertionError("ES should have come before FR"); } } else if (groupString.equals("ES")) { ES_PRESENT = true; } for (Object tuple : tuples) { List<String> savedTuples = records.get(groupString); if (savedTuples == null) { savedTuples = new ArrayList<String>(); records.put(groupString, savedTuples); } savedTuples.add(tuple.toString()); } }
/** Fills the fields specified by the range (minIndex, maxIndex) with random data. */ protected static void fillTuple(boolean isRandom, ITuple tuple, int minIndex, int maxIndex) { try { for (int i = minIndex; i <= maxIndex; i++) { Field field = tuple.getSchema().getField(i); switch (field.getType()) { case INT: tuple.set(i, isRandom ? random.nextInt() : 0); break; case LONG: tuple.set(i, isRandom ? random.nextLong() : 0); break; case BOOLEAN: tuple.set(i, isRandom ? random.nextBoolean() : false); break; case DOUBLE: tuple.set(i, isRandom ? random.nextDouble() : 0.0); break; case FLOAT: tuple.set(i, isRandom ? random.nextFloat() : 0f); break; case STRING: fillString(isRandom, tuple, i); break; case ENUM: fillEnum(isRandom, field, tuple, i); break; case OBJECT: fillObject(isRandom, tuple, field, i); break; default: throw new IllegalArgumentException("Not supported type " + field.getType()); } } } catch (Exception e) { throw new RuntimeException(e); } }
protected static void fillTuple(boolean random, ITuple tuple) { fillTuple(random, tuple, 0, tuple.getSchema().getFields().size() - 1); }
protected static void fillEnum(boolean isRandom, Field field, ITuple tuple, int index) throws Exception { Method method = field.getObjectClass().getMethod("values", (Class[]) null); Enum[] values = (Enum[]) method.invoke(null); tuple.set(index, values[isRandom ? random.nextInt(values.length) : 0]); }
public void createInput(String input, Configuration conf) throws IOException, InterruptedException { Path inPath = new Path(input); FileSystem fs = FileSystem.get(inPath.toUri(), conf); TupleFile.Writer writer = new TupleFile.Writer(fs, conf, inPath, TopicalWordCount.getSchema()); // Topic 1, words: { a, 10 } { b, 1 } , { c, 5 } // Top 2 words = a(10), c(5) ITuple tuple = new Tuple(TopicalWordCount.getSchema()); tuple.set("word", "a"); tuple.set("topic", 1); tuple.set("count", 10); writer.append(tuple); tuple.set("word", "b"); tuple.set("topic", 1); tuple.set("count", 1); writer.append(tuple); tuple.set("word", "c"); tuple.set("topic", 1); tuple.set("count", 5); writer.append(tuple); // Topic 2, words: { a, 10 } { b, 9 } , { c, 5 } // Top 2 words = a(10), b(9) tuple.set("word", "a"); tuple.set("topic", 2); tuple.set("count", 10); writer.append(tuple); tuple.set("word", "b"); tuple.set("topic", 2); tuple.set("count", 9); writer.append(tuple); tuple.set("word", "c"); tuple.set("topic", 2); tuple.set("count", 5); writer.append(tuple); writer.close(); }