@Override public void map(ImmutableHexWritable key, FsEntry value, Context context) throws IOException, InterruptedException { if (Extensions.contains(value.extension())) { FullPath.set(value.fullPath()); Ext.set(value.extension()); encodeHex(Sha, value, "sha1"); encodeHex(Md5, value, "md5"); if (value.isContentHDFS()) { Vid.setSize(0); HdfsPath.set(value.getContentHdfsPath()); } else { final byte[] buf = value.getContentBuffer(); if (buf == null) { LOG.warn(value.fullPath() + " didn't have a content buffer, skipping."); return; } Vid.set(buf, 0, buf.length); HdfsPath.set(""); } byte[] keybytes = key.get(); OutKey.set(keybytes, 0, keybytes.length); context.write(OutKey, Fields); } }
@Override public boolean next(BytesWritable k, BytesWritable v) throws IOException { if (nextKeyValue()) { k.set(new BytesWritable(getCurrentKey().getBytes())); v.set(new BytesWritable(getCurrentValue())); return true; } return false; }
@SuppressWarnings("unchecked") protected void setValue(long length) throws IOException { ByteArrayOutputStream baos; if (length > 0) { baos = new ByteArrayOutputStream((int) length); } else { baos = new ByteArrayOutputStream(); } int size; while ((size = zipIn.read(buf, 0, buf.length)) != -1) { baos.write(buf, 0, size); } if (value instanceof Text) { ((Text) value).set(baos.toString(encoding)); } else if (value instanceof BytesWritable) { if (batchSize > 1) { // Copy data since XCC won't do it when Content is created. value = (VALUEIN) new BytesWritable(); } ((BytesWritable) value).set(baos.toByteArray(), 0, baos.size()); } else { String error = "Unsupported input value class: " + value.getClass(); LOG.error(error, new UnsupportedOperationException(error)); key = null; } baos.close(); }
@Override public void write(Writable w) throws IOException { // Get input data byte[] input; int inputLength; if (w instanceof Text) { input = ((Text) w).getBytes(); inputLength = ((Text) w).getLength(); } else { assert (w instanceof BytesWritable); input = ((BytesWritable) w).get(); inputLength = ((BytesWritable) w).getSize(); } // Add signature byte[] wrapped = new byte[signature.length + inputLength]; for (int i = 0; i < signature.length; i++) { wrapped[i] = signature[i]; } for (int i = 0; i < inputLength; i++) { wrapped[i + signature.length] = input[i]; } // Encode byte[] output = base64.encode(wrapped); bytesWritable.set(output, 0, output.length); writer.write(bytesWritable); }
public BytesWritable toBytes() // throws IOException { sbuffer[0] = (byte) (m_isRC ? 1 : 0); sbuffer[1] = (byte) ((m_refID & 0xFF000000) >> 24); sbuffer[2] = (byte) ((m_refID & 0x00FF0000) >> 16); sbuffer[3] = (byte) ((m_refID & 0x0000FF00) >> 8); sbuffer[4] = (byte) ((m_refID & 0x000000FF)); sbuffer[5] = (byte) ((m_refStart & 0xFF000000) >> 24); sbuffer[6] = (byte) ((m_refStart & 0x00FF0000) >> 16); sbuffer[7] = (byte) ((m_refStart & 0x0000FF00) >> 8); sbuffer[8] = (byte) ((m_refStart & 0x000000FF)); sbuffer[9] = (byte) ((m_refEnd & 0xFF000000) >> 24); sbuffer[10] = (byte) ((m_refEnd & 0x00FF0000) >> 16); sbuffer[11] = (byte) ((m_refEnd & 0x0000FF00) >> 8); sbuffer[12] = (byte) ((m_refEnd & 0x000000FF)); sbuffer[13] = (byte) ((m_differences & 0xFF000000) >> 24); sbuffer[14] = (byte) ((m_differences & 0x00FF0000) >> 16); sbuffer[15] = (byte) ((m_differences & 0x0000FF00) >> 8); sbuffer[16] = (byte) ((m_differences & 0x000000FF)); bytes.set(sbuffer, 0, 17); return bytes; }
@Override public boolean nextKeyValue() throws IOException, InterruptedException { FileSystem fileSystem = FileSystem.get(configuration); if (fileSystem.isDirectory(split.getPath())) { return false; } if (fileProcessed) { return false; } int fileLength = (int) split.getLength(); byte[] result = new byte[fileLength]; FSDataInputStream inputStream = null; try { inputStream = fileSystem.open(split.getPath()); IOUtils.readFully(inputStream, result, 0, fileLength); currentValue.set(result, 0, fileLength); } finally { IOUtils.closeStream(inputStream); } fileProcessed = true; return true; }
@Override public Writable create(Object value, TypeConverter typeConverter, Holder<Integer> size) { BytesWritable writable = new BytesWritable(); ByteBuffer bb = (ByteBuffer) value; writable.set(bb.array(), 0, bb.array().length); size.value = bb.array().length; return writable; }
public BytesWritable readBytes(BytesWritable bw) throws IOException { byte[] bytes = in.readBytes(); if (bw == null) { bw = new BytesWritable(bytes); } else { bw.set(bytes, 0, bytes.length); } return bw; }
@Override public void map( LongWritable key, Text value, OutputCollector<BytesWritable, Tuple> output, Reporter reporter) throws IOException { // value should contain "word count" String[] wdct = value.toString().split(" "); if (wdct.length != 2) { // LOG the error return; } byte[] word = wdct[0].getBytes(); bytesKey.set(word, 0, word.length); System.out.println("word: " + new String(word)); tupleRow.set(0, new String(word)); tupleRow.set(1, Integer.parseInt(wdct[1])); System.out.println("count: " + Integer.parseInt(wdct[1])); // This key has to be created by user /* * Tuple userKey = new DefaultTuple(); userKey.append(new String(word)); * userKey.append(Integer.parseInt(wdct[1])); */ System.out.println("in map, sortkey: " + sortKey); Tuple userKey = new ZebraTuple(); if (sortKey.equalsIgnoreCase("word,count")) { userKey.append(new String(word)); userKey.append(Integer.parseInt(wdct[1])); } if (sortKey.equalsIgnoreCase("count")) { userKey.append(Integer.parseInt(wdct[1])); } if (sortKey.equalsIgnoreCase("word")) { userKey.append(new String(word)); } try { /* New M/R Interface */ /* Converts user key to zebra BytesWritable key */ /* using sort key expr tree */ /* Returns a java base object */ /* Done for each user key */ bytesKey = BasicTableOutputFormat.getSortKey(javaObj, userKey); } catch (Exception e) { } output.collect(bytesKey, tupleRow); }
@JRubyMethod(name = "ruby=", required = 1) public IRubyObject ruby_set(final ThreadContext ctx, IRubyObject arg) { RubyString string; try { string = arg.convertToString(); } catch (RaiseException re) { throw newTypeError(ctx.runtime, arg.getMetaClass(), "String"); } ByteList bytes = string.getByteList(); value.set(bytes.getUnsafeBytes(), bytes.getBegin(), bytes.getRealSize()); return arg; }
@Override public void reduce( BytesWritable topkRollupKey, Iterable<BytesWritable> timeSeriesIterable, Context context) throws IOException, InterruptedException { TopKRollupPhaseOneMapOutputKey wrapper = TopKRollupPhaseOneMapOutputKey.fromBytes(topkRollupKey.getBytes()); LOGGER.info( "DimensionName {} DimensionValue {}", wrapper.getDimensionName(), wrapper.getDimensionValue()); MetricTimeSeries aggregateSeries = new MetricTimeSeries(metricSchema); for (BytesWritable writable : timeSeriesIterable) { MetricTimeSeries series = MetricTimeSeries.fromBytes(writable.copyBytes(), metricSchema); aggregateSeries.aggregate(series); } Map<String, Long> metricValues = new HashMap<String, Long>(); for (MetricSpec metricSpec : starTreeConfig.getMetrics()) { metricValues.put(metricSpec.getName(), 0L); } for (Long time : aggregateSeries.getTimeWindowSet()) { for (MetricSpec metricSpec : starTreeConfig.getMetrics()) { String metricName = metricSpec.getName(); long metricValue = aggregateSeries.get(time, metricName).longValue(); metricValues.put(metricName, metricValues.get(metricName) + metricValue); } } boolean aboveThreshold = true; for (MetricSpec metricSpec : starTreeConfig.getMetrics()) { String metricName = metricSpec.getName(); long metricValue = metricValues.get(metricName); long metricSum = metricSums.get(metricName); double metricThreshold = metricThresholds.get(metricName); LOGGER.info("metricValue : {} metricSum : {}", metricValue, metricSum); if (metricValue < (metricThreshold / 100) * metricSum) { aboveThreshold = false; break; } } if (aboveThreshold) { LOGGER.info("Passed threshold"); valWritable.set(aggregateSeries.toBytes(), 0, aggregateSeries.toBytes().length); context.write(topkRollupKey, valWritable); } }
@Override public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { outputByteBuffer.reset(); StructObjectInspector soi = (StructObjectInspector) objInspector; List<? extends StructField> fields = soi.getAllStructFieldRefs(); for (int i = 0; i < columnNames.size(); i++) { serialize( outputByteBuffer, soi.getStructFieldData(obj, fields.get(i)), fields.get(i).getFieldObjectInspector(), columnSortOrderIsDesc[i]); } serializeBytesWritable.set(outputByteBuffer.getData(), 0, outputByteBuffer.getLength()); return serializeBytesWritable; }
@Override public Writable create(Object value, TypeConverter typeConverter, Holder<Integer> size) { InputStream is = null; try { is = typeConverter.convertTo(InputStream.class, value); ByteArrayOutputStream bos = new ByteArrayOutputStream(); IOUtils.copyBytes(is, bos, HdfsConstants.DEFAULT_BUFFERSIZE, false); BytesWritable writable = new BytesWritable(); writable.set(bos.toByteArray(), 0, bos.toByteArray().length); size.value = bos.toByteArray().length; return writable; } catch (IOException ex) { throw new RuntimeCamelException(ex); } finally { IOHelper.close(is); } }
protected boolean get(KafkaETLKey key, BytesWritable value) throws IOException { if (_messageIt != null && _messageIt.hasNext()) { Message msg = _messageIt.next(); ByteBuffer buf = msg.payload(); int origSize = buf.remaining(); byte[] bytes = new byte[origSize]; buf.get(bytes, buf.position(), origSize); value.set(bytes, 0, origSize); key.set(_index, _offset, msg.checksum()); _offset += MessageSet.entrySize(msg); // increase offset _count++; // increase count return true; } else return false; }
@Override public void map(BytesWritable dimensionKeyBytes, BytesWritable aggSeries, Context context) throws IOException, InterruptedException { DimensionKey dimensionKey = DimensionKey.fromBytes(dimensionKeyBytes.getBytes()); String[] dimensionValues = dimensionKey.getDimensionValues(); for (String dimensionName : config.getDimensionNames()) { String dimensionValue = dimensionValues[dimensionNameToIndexMapping.get(dimensionName)]; TopKRollupPhaseOneMapOutputKey keyWrapper; keyWrapper = new TopKRollupPhaseOneMapOutputKey(dimensionName, dimensionValue); byte[] keyBytes = keyWrapper.toBytes(); keyWritable.set(keyBytes, 0, keyBytes.length); context.write(keyWritable, aggSeries); } }
@Override protected void map(AvroKey<DocumentMetadata> avro, NullWritable ignore, Context context) throws IOException, InterruptedException { String docId = null; try { // TODO MiconCodeReview: I would extract a method 'private static String // getDocId(AvroKey<DocumentMetadata> fromAvro)' docId = new DocEntityId(avro.datum().getId().toString()).toString(); docIdWritable.set(docId); // TODO MiconCodeReview: I would extract a method 'private static MatchableEntity // getMatchableEntity(AvroKey<DocumentMetadata> fromAvro)' MatchableEntity entity = MatchableEntity.fromBasicMetadata( docId, Util.avroBasicMetadataToProtoBuf(avro.datum().getBasicMetadata())); byte[] metaBytes = entity.data().toByteArray(); docMetaWritable.set(metaBytes, 0, metaBytes.length); context.write(docIdWritable, docMetaWritable); } catch (Exception e) { log.error("Error" + (docId != null ? " while processing document " + docId : ""), e); } }
static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, Object reuse) throws IOException { // Is this field a null? byte isNull = buffer.read(invert); if (isNull == 0) { return null; } assert (isNull == 1); switch (type.getCategory()) { case PRIMITIVE: { PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type; switch (ptype.getPrimitiveCategory()) { case VOID: { return null; } case BOOLEAN: { BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse; byte b = buffer.read(invert); assert (b == 1 || b == 2); r.set(b == 2); return r; } case BYTE: { ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse; r.set((byte) (buffer.read(invert) ^ 0x80)); return r; } case SHORT: { ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse; int v = buffer.read(invert) ^ 0x80; v = (v << 8) + (buffer.read(invert) & 0xff); r.set((short) v); return r; } case INT: { IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse; int v = buffer.read(invert) ^ 0x80; for (int i = 0; i < 3; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } r.set(v); return r; } case LONG: { LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse; long v = buffer.read(invert) ^ 0x80; for (int i = 0; i < 7; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } r.set(v); return r; } case FLOAT: { FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse; int v = 0; for (int i = 0; i < 4; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } if ((v & (1 << 31)) == 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1 << 31); } r.set(Float.intBitsToFloat(v)); return r; } case DOUBLE: { DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse; long v = 0; for (int i = 0; i < 8; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } if ((v & (1L << 63)) == 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1L << 63); } r.set(Double.longBitsToDouble(v)); return r; } case STRING: { Text r = reuse == null ? new Text() : (Text) reuse; // Get the actual length first int start = buffer.tell(); int length = 0; do { byte b = buffer.read(invert); if (b == 0) { // end of string break; } if (b == 1) { // the last char is an escape char. read the actual char buffer.read(invert); } length++; } while (true); if (length == buffer.tell() - start) { // No escaping happened, so we are already done. r.set(buffer.getData(), start, length); } else { // Escaping happened, we need to copy byte-by-byte. // 1. Set the length first. r.set(buffer.getData(), start, length); // 2. Reset the pointer. buffer.seek(start); // 3. Copy the data. byte[] rdata = r.getBytes(); for (int i = 0; i < length; i++) { byte b = buffer.read(invert); if (b == 1) { // The last char is an escape char, read the actual char. // The serialization format escape \0 to \1, and \1 to \2, // to make sure the string is null-terminated. b = (byte) (buffer.read(invert) - 1); } rdata[i] = b; } // 4. Read the null terminator. byte b = buffer.read(invert); assert (b == 0); } return r; } case BINARY: { BytesWritable bw = new BytesWritable(); // Get the actual length first int start = buffer.tell(); int length = 0; do { byte b = buffer.read(invert); if (b == 0) { // end of string break; } if (b == 1) { // the last char is an escape char. read the actual char buffer.read(invert); } length++; } while (true); if (length == buffer.tell() - start) { // No escaping happened, so we are already done. bw.set(buffer.getData(), start, length); } else { // Escaping happened, we need to copy byte-by-byte. // 1. Set the length first. bw.set(buffer.getData(), start, length); // 2. Reset the pointer. buffer.seek(start); // 3. Copy the data. byte[] rdata = bw.getBytes(); for (int i = 0; i < length; i++) { byte b = buffer.read(invert); if (b == 1) { // The last char is an escape char, read the actual char. // The serialization format escape \0 to \1, and \1 to \2, // to make sure the string is null-terminated. b = (byte) (buffer.read(invert) - 1); } rdata[i] = b; } // 4. Read the null terminator. byte b = buffer.read(invert); assert (b == 0); } return bw; } case DATE: { DateWritable d = reuse == null ? new DateWritable() : (DateWritable) reuse; long v = buffer.read(invert) ^ 0x80; for (int i = 0; i < 7; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } d.set(DateWritable.timeToDate(v)); return d; } case TIMESTAMP: TimestampWritable t = (reuse == null ? new TimestampWritable() : (TimestampWritable) reuse); byte[] bytes = new byte[8]; for (int i = 0; i < bytes.length; i++) { bytes[i] = buffer.read(invert); } t.setBinarySortable(bytes, 0); return t; default: { throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory()); } } } case LIST: { ListTypeInfo ltype = (ListTypeInfo) type; TypeInfo etype = ltype.getListElementTypeInfo(); // Create the list if needed ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse; // Read the list int size = 0; while (true) { int more = buffer.read(invert); if (more == 0) { // \0 to terminate break; } // \1 followed by each element assert (more == 1); if (size == r.size()) { r.add(null); } r.set(size, deserialize(buffer, etype, invert, r.get(size))); size++; } // Remove additional elements if the list is reused while (r.size() > size) { r.remove(r.size() - 1); } return r; } case MAP: { MapTypeInfo mtype = (MapTypeInfo) type; TypeInfo ktype = mtype.getMapKeyTypeInfo(); TypeInfo vtype = mtype.getMapValueTypeInfo(); // Create the map if needed Map<Object, Object> r; if (reuse == null) { r = new HashMap<Object, Object>(); } else { r = (HashMap<Object, Object>) reuse; r.clear(); } while (true) { int more = buffer.read(invert); if (more == 0) { // \0 to terminate break; } // \1 followed by each key and then each value assert (more == 1); Object k = deserialize(buffer, ktype, invert, null); Object v = deserialize(buffer, vtype, invert, null); r.put(k, v); } return r; } case STRUCT: { StructTypeInfo stype = (StructTypeInfo) type; List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos(); int size = fieldTypes.size(); // Create the struct if needed ArrayList<Object> r = reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse; assert (r.size() <= size); // Set the size of the struct while (r.size() < size) { r.add(null); } // Read one field by one field for (int eid = 0; eid < size; eid++) { r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, r.get(eid))); } return r; } case UNION: { UnionTypeInfo utype = (UnionTypeInfo) type; StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse; // Read the tag byte tag = buffer.read(invert); r.setTag(tag); r.setObject( deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, null)); return r; } default: { throw new RuntimeException("Unrecognized type: " + type.getCategory()); } } }
private void testBinarySortableFast( SerdeRandomRowSource source, Object[][] rows, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, boolean ascending, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable { int rowCount = rows.length; int columnCount = primitiveTypeInfos.length; boolean[] columnsToInclude = null; if (useIncludeColumns) { columnsToInclude = new boolean[columnCount]; for (int i = 0; i < columnCount; i++) { columnsToInclude[i] = r.nextBoolean(); } } int writeColumnCount = columnCount; if (doWriteFewerColumns) { writeColumnCount = writeRowOI.getAllStructFieldRefs().size(); } BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite( columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); // Try to serialize // One Writable per row. BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount]; int[][] perFieldWriteLengthsArray = new int[rowCount][]; for (int i = 0; i < rowCount; i++) { Object[] row = rows[i]; Output output = new Output(); binarySortableSerializeWrite.set(output); int[] perFieldWriteLengths = new int[columnCount]; for (int index = 0; index < writeColumnCount; index++) { Writable writable = (Writable) row[index]; VerifyFast.serializeWrite( binarySortableSerializeWrite, primitiveTypeInfos[index], writable); perFieldWriteLengths[index] = output.getLength(); } perFieldWriteLengthsArray[i] = perFieldWriteLengths; BytesWritable bytesWritable = new BytesWritable(); bytesWritable.set(output.getData(), 0, output.getLength()); serializeWriteBytes[i] = bytesWritable; if (i > 0) { int compareResult = serializeWriteBytes[i - 1].compareTo(serializeWriteBytes[i]); if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) { System.out.println( "Test failed in " + (ascending ? "ascending" : "descending") + " order with " + (i - 1) + " and " + i); System.out.println( "serialized data [" + (i - 1) + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1])); System.out.println( "serialized data [" + i + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i])); fail("Sort order of serialized " + (i - 1) + " and " + i + " are reversed!"); } } } // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite. for (int i = 0; i < rowCount; i++) { Object[] row = rows[i]; BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead( primitiveTypeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc); BytesWritable bytesWritable = serializeWriteBytes[i]; binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); for (int index = 0; index < columnCount; index++) { if (useIncludeColumns && !columnsToInclude[index]) { binarySortableDeserializeRead.skipNextField(); } else if (index >= writeColumnCount) { // Should come back a null. VerifyFast.verifyDeserializeRead( binarySortableDeserializeRead, primitiveTypeInfos[index], null); } else { Writable writable = (Writable) row[index]; VerifyFast.verifyDeserializeRead( binarySortableDeserializeRead, primitiveTypeInfos[index], writable); } } if (writeColumnCount == columnCount) { TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached()); } /* * Clip off one byte and expect to get an EOFException on the write field. */ BinarySortableDeserializeRead binarySortableDeserializeRead2 = new BinarySortableDeserializeRead( primitiveTypeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc); binarySortableDeserializeRead2.set( bytesWritable.getBytes(), 0, bytesWritable.getLength() - 1); // One fewer byte. for (int index = 0; index < writeColumnCount; index++) { Writable writable = (Writable) row[index]; if (index == writeColumnCount - 1) { boolean threw = false; try { VerifyFast.verifyDeserializeRead( binarySortableDeserializeRead2, primitiveTypeInfos[index], writable); } catch (EOFException e) { // debugDetailedReadPositionString = // binarySortableDeserializeRead2.getDetailedReadPositionString(); // debugStackTrace = e.getStackTrace(); threw = true; } TestCase.assertTrue(threw); } else { if (useIncludeColumns && !columnsToInclude[index]) { binarySortableDeserializeRead2.skipNextField(); } else { VerifyFast.verifyDeserializeRead( binarySortableDeserializeRead2, primitiveTypeInfos[index], writable); } } } } // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. for (int i = 0; i < rowCount; i++) { BytesWritable bytesWritable = serializeWriteBytes[i]; // Note that regular SerDe doesn't tolerate fewer columns. List<Object> deserializedRow; if (doWriteFewerColumns) { deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable); } else { deserializedRow = (List<Object>) serde.deserialize(bytesWritable); } Object[] row = rows[i]; for (int index = 0; index < writeColumnCount; index++) { Object expected = row[index]; Object object = deserializedRow.get(index); if (expected == null || object == null) { if (expected != null || object != null) { fail("SerDe deserialized NULL column mismatch"); } } else { if (!object.equals(expected)) { fail( "SerDe deserialized value does not match (expected " + expected.getClass().getName() + " " + expected.toString() + ", actual " + object.getClass().getName() + " " + object.toString() + ")"); } } } } // One Writable per row. BytesWritable serdeBytes[] = new BytesWritable[rowCount]; // Serialize using the SerDe, then below deserialize using DeserializeRead. for (int i = 0; i < rowCount; i++) { Object[] row = rows[i]; // Since SerDe reuses memory, we will need to make a copy. BytesWritable serialized; if (doWriteFewerColumns) { serialized = (BytesWritable) serde_fewer.serialize(row, rowOI); } else { serialized = (BytesWritable) serde.serialize(row, rowOI); ; } BytesWritable bytesWritable = new BytesWritable(); bytesWritable.set(serialized); byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); byte[] serializeWriteExpected = Arrays.copyOfRange( serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); if (!Arrays.equals(serDeOutput, serializeWriteExpected)) { int mismatchPos = -1; if (serDeOutput.length != serializeWriteExpected.length) { for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) { if (serDeOutput[b] != serializeWriteExpected[b]) { mismatchPos = b; break; } } fail( "Different byte array lengths: serDeOutput.length " + serDeOutput.length + ", serializeWriteExpected.length " + serializeWriteExpected.length + " mismatchPos " + mismatchPos + " perFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i])); } List<Integer> differentPositions = new ArrayList(); for (int b = 0; b < serDeOutput.length; b++) { if (serDeOutput[b] != serializeWriteExpected[b]) { differentPositions.add(b); } } if (differentPositions.size() > 0) { List<String> serializeWriteExpectedFields = new ArrayList<String>(); List<String> serDeFields = new ArrayList<String>(); int f = 0; int lastBegin = 0; for (int b = 0; b < serDeOutput.length; b++) { int writeLength = perFieldWriteLengthsArray[i][f]; if (b + 1 == writeLength) { serializeWriteExpectedFields.add( displayBytes(serializeWriteExpected, lastBegin, writeLength - lastBegin)); serDeFields.add(displayBytes(serDeOutput, lastBegin, writeLength - lastBegin)); f++; lastBegin = b + 1; } } fail( "SerializeWrite and SerDe serialization does not match at positions " + differentPositions.toString() + "\n(SerializeWrite: " + serializeWriteExpectedFields.toString() + "\nSerDe: " + serDeFields.toString() + "\nperFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]) + "\nprimitiveTypeInfos " + Arrays.toString(primitiveTypeInfos) + "\nrow " + Arrays.toString(row)); } } serdeBytes[i] = bytesWritable; } // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. for (int i = 0; i < rowCount; i++) { Object[] row = rows[i]; BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead( primitiveTypeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc); BytesWritable bytesWritable = serdeBytes[i]; binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); for (int index = 0; index < columnCount; index++) { if (useIncludeColumns && !columnsToInclude[index]) { binarySortableDeserializeRead.skipNextField(); } else if (index >= writeColumnCount) { // Should come back a null. VerifyFast.verifyDeserializeRead( binarySortableDeserializeRead, primitiveTypeInfos[index], null); } else { Writable writable = (Writable) row[index]; VerifyFast.verifyDeserializeRead( binarySortableDeserializeRead, primitiveTypeInfos[index], writable); } } if (writeColumnCount == columnCount) { TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached()); } } }
@Override public <E> void processRow(Object key, Iterator<E> values) throws IOException { if (reducer.getDone()) { return; } try { BytesWritable keyWritable = (BytesWritable) key; byte tag = 0; if (isTagged) { // remove the tag from key coming out of reducer // and store it in separate variable. int size = keyWritable.getSize() - 1; tag = keyWritable.get()[size]; keyWritable.setSize(size); } if (!keyWritable.equals(groupKey)) { // If a operator wants to do some work at the beginning of a group if (groupKey == null) { // the first group groupKey = new BytesWritable(); } else { // If a operator wants to do some work at the end of a group LOG.trace("End Group"); reducer.endGroup(); } try { keyObject = inputKeyDeserializer.deserialize(keyWritable); } catch (Exception e) { throw new HiveException( "Hive Runtime Error: Unable to deserialize reduce input key from " + Utilities.formatBinaryString(keyWritable.get(), 0, keyWritable.getSize()) + " with properties " + keyTableDesc.getProperties(), e); } groupKey.set(keyWritable.get(), 0, keyWritable.getSize()); LOG.trace("Start Group"); reducer.setGroupKeyObject(keyObject); reducer.startGroup(); } /* this.keyObject passed via reference */ if (vectorized) { processVectors(values, tag); } else { processKeyValues(values, tag); } } catch (Throwable e) { abort = true; Utilities.setReduceWork(jc, null); if (e instanceof OutOfMemoryError) { // Don't create a new object if we are already out of memory throw (OutOfMemoryError) e; } else { String msg = "Fatal error: " + e; LOG.fatal(msg, e); throw new RuntimeException(e); } } }
public void map(NullWritable key, Triple value, Context context) throws IOException, InterruptedException { pred.set(value.getBuffer(), value.getPredicateOffset(), value.getPredicateLength()); context.write(pred, one); }
public void testBinary() throws IOException, InterruptedException { Configuration conf = new Configuration(); Job job = new Job(conf); Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq"); Random r = new Random(); long seed = r.nextLong(); r.setSeed(seed); FileOutputFormat.setOutputPath(job, outdir); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class); SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration()); OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat(); OutputCommitter committer = outputFormat.getOutputCommitter(context); committer.setupJob(job); RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); LOG.info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < RECORDS; ++i) { iwritable = new IntWritable(r.nextInt()); iwritable.write(outbuf); bkey.set(outbuf.getData(), 0, outbuf.getLength()); outbuf.reset(); dwritable = new DoubleWritable(r.nextDouble()); dwritable.write(outbuf); bval.set(outbuf.getData(), 0, outbuf.getLength()); outbuf.reset(); writer.write(bkey, bval); } } finally { writer.close(context); } committer.commitTask(context); committer.commitJob(job); InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>(); int count = 0; r.setSeed(seed); SequenceFileInputFormat.setInputPaths(job, outdir); LOG.info("Reading data by SequenceFileInputFormat"); for (InputSplit split : iformat.getSplits(job)) { RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context); MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>( job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { int sourceInt; double sourceDouble; while (reader.nextKeyValue()) { sourceInt = r.nextInt(); sourceDouble = r.nextDouble(); iwritable = reader.getCurrentKey(); dwritable = reader.getCurrentValue(); assertEquals( "Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get()); assertTrue( "Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0); ++count; } } finally { reader.close(); } } assertEquals("Some records not found", RECORDS, count); }