@Override public void serialize(Object o, ImmutableBytesWritable bytesWritable) throws IOException { byte[] bytesToWriteIn = bytesWritable.get(); int offset = bytesWritable.getOffset(); if (o == null) { if (fixedPrefixLength > 0) throw new IllegalStateException( "excepted at least " + fixedPrefixLength + " bytes to write"); else if (terminate()) { // write one (masked) null byte bytesToWriteIn[offset] = mask(NULL); RowKeyUtils.seek(bytesWritable, 1); } } else { final BytesWritable input = (BytesWritable) o; if (fixedPrefixLength > input.getLength()) throw new IllegalStateException( "excepted at least " + fixedPrefixLength + " bytes to write"); else { encodeFixedPrefix(input.getBytes(), bytesWritable); encodedCustomizedReversedPackedBcd( toStringRepresentation( input.getBytes(), fixedPrefixLength, input.getLength() - fixedPrefixLength), bytesWritable); } } }
public KeyAndPartitionWritable(Domain domain, BytesWritable key) { this.key = key; int partition = domain .getPartitioner() .partition(ByteBuffer.wrap(key.getBytes(), 0, key.getLength()), domain.getNumParts()); this.partition = new IntWritable(partition); }
public void map(BytesWritable key, BytesWritable value, Context context) throws IOException, InterruptedException { long start = NumberUtils.decodeLong(key.getBytes(), 0); long end = NumberUtils.decodeLong(key.getBytes(), 8); oKey.set(start); oValue.setSize(value.getLength() + 9); oValue.getBytes()[0] = 0; NumberUtils.encodeLong(oValue.getBytes(), 1, end); System.arraycopy(value.getBytes(), 0, oValue.getBytes(), 9, value.getLength()); context.write(oKey, oValue); oKey.set(end); oValue.getBytes()[0] = 1; NumberUtils.encodeLong(oValue.getBytes(), 1, start); System.arraycopy(value.getBytes(), 0, oValue.getBytes(), 9, value.getLength()); context.write(oKey, oValue); }
@Override public int getSerializedLength(Object o) throws IOException { if (o == null) return terminate() ? fixedPrefixLength + 1 : fixedPrefixLength; final BytesWritable input = (BytesWritable) o; return fixedPrefixLength + getSerializedLength( toStringRepresentation( input.getBytes(), fixedPrefixLength, input.getLength() - fixedPrefixLength)); }
@Override public void rollback() throws IOException { uncommittedLength = target.getLength(); if (uncommittedLength != 0) { if (compressGaps) { writeGap(gamma + 1); } else { writeGapUncompressed(gamma + 1); } } }
/** Convert bytes to SHA-1 */ public Text evaluate(BytesWritable b) { if (b == null) { return null; } digest.reset(); digest.update(b.getBytes(), 0, b.getLength()); byte[] shaBytes = digest.digest(); String shaHex = Hex.encodeHexString(shaBytes); result.set(shaHex); return result; }
private Object deserializeValue(BytesWritable valueWritable, byte tag) throws HiveException { try { return inputValueDeserializer[tag].deserialize(valueWritable); } catch (SerDeException e) { throw new HiveException( "Error: Unable to deserialize reduce input value (tag=" + tag + ") from " + Utilities.formatBinaryString(valueWritable.getBytes(), 0, valueWritable.getLength()) + " with properties " + valueTableDesc[tag].getProperties(), e); } }
public BytesWritable evaluate(BytesWritable geometryref1, DoubleWritable distance) { if (geometryref1 == null || geometryref1.getLength() == 0 || distance == null) { return null; } OGCGeometry ogcGeometry = GeometryUtils.geometryFromEsriShape(geometryref1); if (ogcGeometry == null) { LogUtils.Log_ArgumentsNull(LOG); return null; } OGCGeometry bufferedGeometry = ogcGeometry.buffer(distance.get()); // TODO persist type information (polygon vs multipolygon) return GeometryUtils.geometryToEsriShapeBytesWritable(bufferedGeometry); }
/** Given an output filename, write a bunch of random records to it. */ public void map( WritableComparable key, Writable value, OutputCollector<BytesWritable, BytesWritable> output, Reporter reporter) throws IOException { int itemCount = 0; while (numBytesToWrite > 0) { int keyLength = minKeySize + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0); randomKey.setSize(keyLength); randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength()); int valueLength = minValueSize + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0); randomValue.setSize(valueLength); randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength()); output.collect(randomKey, randomValue); numBytesToWrite -= keyLength + valueLength; reporter.incrCounter(Counters.BYTES_WRITTEN, keyLength + valueLength); reporter.incrCounter(Counters.RECORDS_WRITTEN, 1); if (++itemCount % 200 == 0) { reporter.setStatus("wrote record " + itemCount + ". " + numBytesToWrite + " bytes left."); } } reporter.setStatus("done with " + itemCount + " records."); }
private void processHtmlContent(MapWritable value, Metadata allMetadata, String uniqueId) throws IOException { BytesWritable htmlBytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE_AS_HTML_NAME)); if (htmlBytesWritable != null) { String htmlNativeEntryName = ParameterProcessing.HTML_FOLDER + "/" + uniqueId + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName() + ".html"; zipFileWriter.addBinaryFile( htmlNativeEntryName, htmlBytesWritable.getBytes(), htmlBytesWritable.getLength()); logger.trace("Processing file: {}", htmlNativeEntryName); } // get the list with other files part of the html output Text htmlFiles = (Text) value.get(new Text(ParameterProcessing.NATIVE_AS_HTML)); if (htmlFiles != null) { String fileNames = htmlFiles.toString(); String[] fileNamesArr = fileNames.split(","); for (String fileName : fileNamesArr) { String entry = ParameterProcessing.HTML_FOLDER + "/" + fileName; BytesWritable imageBytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE_AS_HTML + "_" + fileName)); if (imageBytesWritable != null) { zipFileWriter.addBinaryFile( entry, imageBytesWritable.getBytes(), imageBytesWritable.getLength()); logger.trace("Processing file: {}", entry); } } } }
private static byte[] getBytes(BytesWritable val) { byte[] buffer = val.getBytes(); /* * FIXME: remove the following part once the below jira is fixed * https://issues.apache.org/jira/browse/HADOOP-6298 */ long len = val.getLength(); byte[] bytes = buffer; if (len < buffer.length) { bytes = new byte[(int) len]; System.arraycopy(buffer, 0, bytes, 0, (int) len); } return bytes; }
public DoubleWritable evaluate(BytesWritable geomref) { if (geomref == null || geomref.getLength() == 0) { LogUtils.Log_ArgumentsNull(LOG); return null; } OGCGeometry ogcGeometry = GeometryUtils.geometryFromEsriShape(geomref); if (ogcGeometry == null) { LogUtils.Log_ArgumentsNull(LOG); return null; } Envelope envBound = new Envelope(); ogcGeometry.getEsriGeometry().queryEnvelope(envBound); resultDouble.set(envBound.getXMin()); return resultDouble; }
@Override public Object deserialize(Writable wr) throws SerDeException { LOG.debug("Entry SdbSerDe::deserialize"); if (!(wr instanceof BytesWritable)) { throw new SerDeException("Expected BSONWritable, received " + wr.getClass().getName()); } BytesWritable record = (BytesWritable) wr; ByteArrayRef bytes = new ByteArrayRef(); bytes.setData(record.getBytes()); row.init(bytes, 0, record.getLength()); return row; }
@Override public Object deserialize(Writable blob) throws SerDeException { BytesWritable data = (BytesWritable) blob; inputByteBuffer.reset(data.getBytes(), 0, data.getLength()); try { for (int i = 0; i < columnNames.size(); i++) { row.set( i, deserialize(inputByteBuffer, columnTypes.get(i), columnSortOrderIsDesc[i], row.get(i))); } } catch (IOException e) { throw new SerDeException(e); } return row; }
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://boa-njt/"); FileSystem fileSystem = FileSystem.get(conf); String base = conf.get("fs.default.name", ""); HashMap<String, String> sources = new HashMap<String, String>(); HashSet<String> marks = new HashSet<String>(); FileStatus[] files = fileSystem.listStatus(new Path(base + "tmprepcache/2015-07")); for (int i = 0; i < files.length; i++) { FileStatus file = files[i]; String name = file.getPath().getName(); if (name.startsWith("projects-") && name.endsWith(".seq")) { System.out.println("Reading file " + i + " in " + files.length + ": " + name); SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, file.getPath(), conf); final Text key = new Text(); final BytesWritable value = new BytesWritable(); try { while (r.next(key, value)) { String s = key.toString(); if (marks.contains(s)) continue; Project p = Project.parseFrom( CodedInputStream.newInstance(value.getBytes(), 0, value.getLength())); if (p.getCodeRepositoriesCount() > 0 && p.getCodeRepositories(0).getRevisionsCount() > 0) marks.add(s); sources.put(s, name); } } catch (Exception e) { System.err.println(name); e.printStackTrace(); } r.close(); } } SequenceFile.Writer w = SequenceFile.createWriter( fileSystem, conf, new Path(base + "repcache/2015-07/projects.seq"), Text.class, BytesWritable.class); for (int i = 0; i < files.length; i++) { FileStatus file = files[i]; String name = file.getPath().getName(); if (name.startsWith("projects-") && name.endsWith(".seq")) { System.out.println("Reading file " + i + " in " + files.length + ": " + name); SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, file.getPath(), conf); final Text key = new Text(); final BytesWritable value = new BytesWritable(); try { while (r.next(key, value)) { String s = key.toString(); if (sources.get(s).equals(name)) w.append(key, value); } } catch (Exception e) { System.err.println(name); e.printStackTrace(); } r.close(); } } w.close(); fileSystem.close(); }
public void set(int index, BytesWritable value) { values[index] = Arrays.copyOf(value.getBytes(), value.getLength()); usage += keys[index].length + values[index].length; }
protected void processMap(MapWritable value) throws IOException, InterruptedException { columnMetadata.reinit(); ++outputFileCount; DocumentMetadata allMetadata = getAllMetadata(value); Metadata standardMetadata = getStandardMetadata(allMetadata, outputFileCount); columnMetadata.addMetadata(standardMetadata); columnMetadata.addMetadata(allMetadata); // documents other than the first one in this loop are either duplicates or attachments if (first) { masterOutputFileCount = outputFileCount; } else { if (allMetadata.hasParent()) { columnMetadata.addMetadataValue( DocumentMetadataKeys.ATTACHMENT_PARENT, UPIFormat.format(masterOutputFileCount)); } else { columnMetadata.addMetadataValue( DocumentMetadataKeys.MASTER_DUPLICATE, UPIFormat.format(masterOutputFileCount)); } } // String uniqueId = allMetadata.getUniqueId(); String originalFileName = new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName(); // add the text to the text folder String documentText = allMetadata.get(DocumentMetadataKeys.DOCUMENT_TEXT); String textEntryName = ParameterProcessing.TEXT + "/" + UPIFormat.format(outputFileCount) + "_" + originalFileName + ".txt"; if (textEntryName != null) { zipFileWriter.addTextFile(textEntryName, documentText); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_TEXT, textEntryName); // add the native file to the native folder String nativeEntryName = ParameterProcessing.NATIVE + "/" + UPIFormat.format(outputFileCount) + "_" + originalFileName; BytesWritable bytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE)); if (bytesWritable != null) { // some large exception files are not passed zipFileWriter.addBinaryFile( nativeEntryName, bytesWritable.getBytes(), bytesWritable.getLength()); logger.trace("Processing file: {}", nativeEntryName); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_NATIVE, nativeEntryName); // add the pdf made from native to the PDF folder String pdfNativeEntryName = ParameterProcessing.PDF_FOLDER + "/" + UPIFormat.format(outputFileCount) + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName() + ".pdf"; BytesWritable pdfBytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE_AS_PDF)); if (pdfBytesWritable != null) { zipFileWriter.addBinaryFile( pdfNativeEntryName, pdfBytesWritable.getBytes(), pdfBytesWritable.getLength()); logger.trace("Processing file: {}", pdfNativeEntryName); } processHtmlContent(value, allMetadata, UPIFormat.format(outputFileCount)); // add exception to the exception folder String exception = allMetadata.get(DocumentMetadataKeys.PROCESSING_EXCEPTION); if (exception != null) { String exceptionEntryName = "exception/" + UPIFormat.format(outputFileCount) + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName(); if (bytesWritable != null) { zipFileWriter.addBinaryFile( exceptionEntryName, bytesWritable.getBytes(), bytesWritable.getLength()); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_EXCEPTION, exceptionEntryName); } // write this all to the reduce map // context.write(new Text(outputKey), new Text(columnMetadata.delimiterSeparatedValues())); // drop the key altogether, because it messes up the format - but put it in the value // TODO use NullWritable if (OsUtil.isNix()) { context.write(null, new Text(columnMetadata.delimiterSeparatedValues())); } // prepare for the next file with the same key, if there is any first = false; }
static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, boolean invert) { // Is this field a null? if (o == null) { buffer.write((byte) 0, invert); return; } // This field is not a null. buffer.write((byte) 1, invert); switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; switch (poi.getPrimitiveCategory()) { case VOID: { return; } case BOOLEAN: { boolean v = ((BooleanObjectInspector) poi).get(o); buffer.write((byte) (v ? 2 : 1), invert); return; } case BYTE: { ByteObjectInspector boi = (ByteObjectInspector) poi; byte v = boi.get(o); buffer.write((byte) (v ^ 0x80), invert); return; } case SHORT: { ShortObjectInspector spoi = (ShortObjectInspector) poi; short v = spoi.get(o); buffer.write((byte) ((v >> 8) ^ 0x80), invert); buffer.write((byte) v, invert); return; } case INT: { IntObjectInspector ioi = (IntObjectInspector) poi; int v = ioi.get(o); buffer.write((byte) ((v >> 24) ^ 0x80), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case LONG: { LongObjectInspector loi = (LongObjectInspector) poi; long v = loi.get(o); buffer.write((byte) ((v >> 56) ^ 0x80), invert); buffer.write((byte) (v >> 48), invert); buffer.write((byte) (v >> 40), invert); buffer.write((byte) (v >> 32), invert); buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case FLOAT: { FloatObjectInspector foi = (FloatObjectInspector) poi; int v = Float.floatToIntBits(foi.get(o)); if ((v & (1 << 31)) != 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1 << 31); } buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case DOUBLE: { DoubleObjectInspector doi = (DoubleObjectInspector) poi; long v = Double.doubleToLongBits(doi.get(o)); if ((v & (1L << 63)) != 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1L << 63); } buffer.write((byte) (v >> 56), invert); buffer.write((byte) (v >> 48), invert); buffer.write((byte) (v >> 40), invert); buffer.write((byte) (v >> 32), invert); buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case STRING: { StringObjectInspector soi = (StringObjectInspector) poi; Text t = soi.getPrimitiveWritableObject(o); serializeBytes(buffer, t.getBytes(), t.getLength(), invert); return; } case BINARY: { BinaryObjectInspector baoi = (BinaryObjectInspector) poi; BytesWritable ba = baoi.getPrimitiveWritableObject(o); byte[] toSer = new byte[ba.getLength()]; System.arraycopy(ba.getBytes(), 0, toSer, 0, ba.getLength()); serializeBytes(buffer, toSer, ba.getLength(), invert); return; } case DATE: { DateObjectInspector doi = (DateObjectInspector) poi; long v = doi.getPrimitiveWritableObject(o).getTimeInSeconds(); buffer.write((byte) ((v >> 56) ^ 0x80), invert); buffer.write((byte) (v >> 48), invert); buffer.write((byte) (v >> 40), invert); buffer.write((byte) (v >> 32), invert); buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case TIMESTAMP: { TimestampObjectInspector toi = (TimestampObjectInspector) poi; TimestampWritable t = toi.getPrimitiveWritableObject(o); byte[] data = t.getBinarySortable(); for (int i = 0; i < data.length; i++) { buffer.write(data[i], invert); } return; } default: { throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory()); } } } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; ObjectInspector eoi = loi.getListElementObjectInspector(); // \1 followed by each element int size = loi.getListLength(o); for (int eid = 0; eid < size; eid++) { buffer.write((byte) 1, invert); serialize(buffer, loi.getListElement(o, eid), eoi, invert); } // and \0 to terminate buffer.write((byte) 0, invert); return; } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; ObjectInspector koi = moi.getMapKeyObjectInspector(); ObjectInspector voi = moi.getMapValueObjectInspector(); // \1 followed by each key and then each value Map<?, ?> map = moi.getMap(o); for (Map.Entry<?, ?> entry : map.entrySet()) { buffer.write((byte) 1, invert); serialize(buffer, entry.getKey(), koi, invert); serialize(buffer, entry.getValue(), voi, invert); } // and \0 to terminate buffer.write((byte) 0, invert); return; } case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> fields = soi.getAllStructFieldRefs(); for (int i = 0; i < fields.size(); i++) { serialize( buffer, soi.getStructFieldData(o, fields.get(i)), fields.get(i).getFieldObjectInspector(), invert); } return; } case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; byte tag = uoi.getTag(o); buffer.write(tag, invert); serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), invert); return; } default: { throw new RuntimeException("Unrecognized type: " + oi.getCategory()); } } }
@JRubyMethod(name = "ruby") public IRubyObject ruby(final ThreadContext ctx) { RubyString string = RubyString.newString(ctx.runtime, value.getBytes(), 0, value.getLength()); string.setEncoding(ASCIIEncoding.INSTANCE); return string; }
static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, String nullStr) { switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; if (o == null) { sb.append(nullStr); } else { switch (poi.getPrimitiveCategory()) { case BOOLEAN: { boolean b = ((BooleanObjectInspector) poi).get(o); sb.append(b ? "true" : "false"); break; } case BYTE: { sb.append(((ByteObjectInspector) poi).get(o)); break; } case SHORT: { sb.append(((ShortObjectInspector) poi).get(o)); break; } case INT: { sb.append(((IntObjectInspector) poi).get(o)); break; } case LONG: { sb.append(((LongObjectInspector) poi).get(o)); break; } case FLOAT: { sb.append(((FloatObjectInspector) poi).get(o)); break; } case DOUBLE: { sb.append(((DoubleObjectInspector) poi).get(o)); break; } case STRING: { sb.append('"'); sb.append(escapeString(((StringObjectInspector) poi).getPrimitiveJavaObject(o))); sb.append('"'); break; } case CHAR: { sb.append('"'); sb.append( escapeString( ((HiveCharObjectInspector) poi).getPrimitiveJavaObject(o).toString())); sb.append('"'); break; } case VARCHAR: { sb.append('"'); sb.append( escapeString( ((HiveVarcharObjectInspector) poi).getPrimitiveJavaObject(o).toString())); sb.append('"'); break; } case DATE: { sb.append('"'); sb.append(((DateObjectInspector) poi).getPrimitiveWritableObject(o)); sb.append('"'); break; } case TIMESTAMP: { sb.append('"'); sb.append(((TimestampObjectInspector) poi).getPrimitiveWritableObject(o)); sb.append('"'); break; } case BINARY: { BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o); Text txt = new Text(); txt.set(bw.getBytes(), 0, bw.getLength()); sb.append(txt.toString()); break; } case DECIMAL: { sb.append(((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o)); break; } default: throw new RuntimeException("Unknown primitive type: " + poi.getPrimitiveCategory()); } } break; } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector(); List<?> olist = loi.getList(o); if (olist == null) { sb.append(nullStr); } else { sb.append(LBRACKET); for (int i = 0; i < olist.size(); i++) { if (i > 0) { sb.append(COMMA); } buildJSONString(sb, olist.get(i), listElementObjectInspector, JSON_NULL); } sb.append(RBRACKET); } break; } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector(); Map<?, ?> omap = moi.getMap(o); if (omap == null) { sb.append(nullStr); } else { sb.append(LBRACE); boolean first = true; for (Object entry : omap.entrySet()) { if (first) { first = false; } else { sb.append(COMMA); } Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry; buildJSONString(sb, e.getKey(), mapKeyObjectInspector, JSON_NULL); sb.append(COLON); buildJSONString(sb, e.getValue(), mapValueObjectInspector, JSON_NULL); } sb.append(RBRACE); } break; } case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> structFields = soi.getAllStructFieldRefs(); if (o == null) { sb.append(nullStr); } else { sb.append(LBRACE); for (int i = 0; i < structFields.size(); i++) { if (i > 0) { sb.append(COMMA); } sb.append(QUOTE); sb.append(structFields.get(i).getFieldName()); sb.append(QUOTE); sb.append(COLON); buildJSONString( sb, soi.getStructFieldData(o, structFields.get(i)), structFields.get(i).getFieldObjectInspector(), JSON_NULL); } sb.append(RBRACE); } break; } case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; if (o == null) { sb.append(nullStr); } else { sb.append(LBRACE); sb.append(uoi.getTag(o)); sb.append(COLON); buildJSONString( sb, uoi.getField(o), uoi.getObjectInspectors().get(uoi.getTag(o)), JSON_NULL); sb.append(RBRACE); } break; } default: throw new RuntimeException("Unknown type in ObjectInspector!"); } }
public ByteBuffer getKey() { return ByteBuffer.wrap(key.getBytes(), 0, key.getLength()); }
@Override public boolean nextKeyValue() throws IOException, InterruptedException { // we only pull for a specified time. unfinished work will be // rescheduled in the next // run. if (System.currentTimeMillis() > maxPullTime) { if (reader != null) { closeReader(); } return false; } while (true) { try { if (reader == null || reader.hasNext() == false) { EtlRequest request = split.popRequest(); if (request == null) { return false; } if (maxPullHours > 0) { endTimeStamp = 0; } key.set( request.getTopic(), request.getNodeId(), request.getPartition(), request.getOffset(), request.getOffset(), 0); value = new AvroWrapper<Object>(new Object()); System.out.println( "topic:" + request.getTopic() + " partition:" + request.getPartition() + " beginOffset:" + request.getOffset() + " estimatedLastOffset:" + request.getLastOffset()); statusMsg += statusMsg.length() > 0 ? "; " : ""; statusMsg += request.getTopic() + ":" + request.getNodeId() + ":" + request.getPartition(); context.setStatus(statusMsg); if (reader != null) { closeReader(); } reader = new KafkaReader( request, EtlInputFormat.getKafkaClientTimeout(mapperContext), EtlInputFormat.getKafkaClientBufferSize(mapperContext)); decoder = (MessageDecoder<Message, Record>) MessageDecoderFactory.createMessageDecoder(context, request.getTopic()); } while (reader.getNext(key, msgValue)) { context.progress(); mapperContext.getCounter("total", "data-read").increment(msgValue.getLength()); mapperContext.getCounter("total", "event-count").increment(1); byte[] bytes = getBytes(msgValue); // check the checksum of message Message message = new Message(bytes); long checksum = key.getChecksum(); if (checksum != message.checksum()) { throw new ChecksumException( "Invalid message checksum " + message.checksum() + ". Expected " + key.getChecksum(), key.getOffset()); } long tempTime = System.currentTimeMillis(); CamusWrapper wrapper; try { wrapper = getWrappedRecord(key.getTopic(), message); } catch (Exception e) { mapperContext.write(key, new ExceptionWritable(e)); continue; } if (wrapper == null) { mapperContext.write(key, new ExceptionWritable(new RuntimeException("null record"))); continue; } long timeStamp = wrapper.getTimestamp(); try { key.setTime(timeStamp); key.setServer(wrapper.getServer()); key.setService(wrapper.getService()); } catch (Exception e) { mapperContext.write(key, new ExceptionWritable(e)); continue; } if (timeStamp < beginTimeStamp) { mapperContext.getCounter("total", "skip-old").increment(1); } else if (endTimeStamp == 0) { DateTime time = new DateTime(timeStamp); statusMsg += " begin read at " + time.toString(); context.setStatus(statusMsg); System.out.println(key.getTopic() + " begin read at " + time.toString()); endTimeStamp = (time.plusHours(this.maxPullHours)).getMillis(); } else if (timeStamp > endTimeStamp || System.currentTimeMillis() > maxPullTime) { statusMsg += " max read at " + new DateTime(timeStamp).toString(); context.setStatus(statusMsg); System.out.println( key.getTopic() + " max read at " + new DateTime(timeStamp).toString()); mapperContext.getCounter("total", "request-time(ms)").increment(reader.getFetchTime()); closeReader(); } long secondTime = System.currentTimeMillis(); value.datum(wrapper.getRecord()); long decodeTime = ((secondTime - tempTime)); mapperContext.getCounter("total", "decode-time(ms)").increment(decodeTime); if (reader != null) { mapperContext.getCounter("total", "request-time(ms)").increment(reader.getFetchTime()); } return true; } reader = null; } catch (Throwable t) { Exception e = new Exception(t.getLocalizedMessage(), t); e.setStackTrace(t.getStackTrace()); mapperContext.write(key, new ExceptionWritable(e)); reader = null; continue; } } }
@Override public int getHashFromKey() throws SerDeException { byte[] keyBytes = key.getBytes(); int keyLength = key.getLength(); return WriteBuffers.murmurHash(keyBytes, 0, keyLength); }
@Override public void writeKey(RandomAccessOutput dest) throws SerDeException { byte[] keyBytes = key.getBytes(); int keyLength = key.getLength(); dest.write(keyBytes, 0, keyLength); }
@Override public void writeValue(RandomAccessOutput dest) throws SerDeException { byte[] valueBytes = val.getBytes(); int valueLength = val.getLength(); dest.write(valueBytes, 0, valueLength); }
private void testBinarySortableFast( SerdeRandomRowSource source, Object[][] rows, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, boolean ascending, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable { int rowCount = rows.length; int columnCount = primitiveTypeInfos.length; boolean[] columnsToInclude = null; if (useIncludeColumns) { columnsToInclude = new boolean[columnCount]; for (int i = 0; i < columnCount; i++) { columnsToInclude[i] = r.nextBoolean(); } } int writeColumnCount = columnCount; if (doWriteFewerColumns) { writeColumnCount = writeRowOI.getAllStructFieldRefs().size(); } BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite( columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); // Try to serialize // One Writable per row. BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount]; int[][] perFieldWriteLengthsArray = new int[rowCount][]; for (int i = 0; i < rowCount; i++) { Object[] row = rows[i]; Output output = new Output(); binarySortableSerializeWrite.set(output); int[] perFieldWriteLengths = new int[columnCount]; for (int index = 0; index < writeColumnCount; index++) { Writable writable = (Writable) row[index]; VerifyFast.serializeWrite( binarySortableSerializeWrite, primitiveTypeInfos[index], writable); perFieldWriteLengths[index] = output.getLength(); } perFieldWriteLengthsArray[i] = perFieldWriteLengths; BytesWritable bytesWritable = new BytesWritable(); bytesWritable.set(output.getData(), 0, output.getLength()); serializeWriteBytes[i] = bytesWritable; if (i > 0) { int compareResult = serializeWriteBytes[i - 1].compareTo(serializeWriteBytes[i]); if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) { System.out.println( "Test failed in " + (ascending ? "ascending" : "descending") + " order with " + (i - 1) + " and " + i); System.out.println( "serialized data [" + (i - 1) + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1])); System.out.println( "serialized data [" + i + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i])); fail("Sort order of serialized " + (i - 1) + " and " + i + " are reversed!"); } } } // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite. for (int i = 0; i < rowCount; i++) { Object[] row = rows[i]; BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead( primitiveTypeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc); BytesWritable bytesWritable = serializeWriteBytes[i]; binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); for (int index = 0; index < columnCount; index++) { if (useIncludeColumns && !columnsToInclude[index]) { binarySortableDeserializeRead.skipNextField(); } else if (index >= writeColumnCount) { // Should come back a null. VerifyFast.verifyDeserializeRead( binarySortableDeserializeRead, primitiveTypeInfos[index], null); } else { Writable writable = (Writable) row[index]; VerifyFast.verifyDeserializeRead( binarySortableDeserializeRead, primitiveTypeInfos[index], writable); } } if (writeColumnCount == columnCount) { TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached()); } /* * Clip off one byte and expect to get an EOFException on the write field. */ BinarySortableDeserializeRead binarySortableDeserializeRead2 = new BinarySortableDeserializeRead( primitiveTypeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc); binarySortableDeserializeRead2.set( bytesWritable.getBytes(), 0, bytesWritable.getLength() - 1); // One fewer byte. for (int index = 0; index < writeColumnCount; index++) { Writable writable = (Writable) row[index]; if (index == writeColumnCount - 1) { boolean threw = false; try { VerifyFast.verifyDeserializeRead( binarySortableDeserializeRead2, primitiveTypeInfos[index], writable); } catch (EOFException e) { // debugDetailedReadPositionString = // binarySortableDeserializeRead2.getDetailedReadPositionString(); // debugStackTrace = e.getStackTrace(); threw = true; } TestCase.assertTrue(threw); } else { if (useIncludeColumns && !columnsToInclude[index]) { binarySortableDeserializeRead2.skipNextField(); } else { VerifyFast.verifyDeserializeRead( binarySortableDeserializeRead2, primitiveTypeInfos[index], writable); } } } } // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. for (int i = 0; i < rowCount; i++) { BytesWritable bytesWritable = serializeWriteBytes[i]; // Note that regular SerDe doesn't tolerate fewer columns. List<Object> deserializedRow; if (doWriteFewerColumns) { deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable); } else { deserializedRow = (List<Object>) serde.deserialize(bytesWritable); } Object[] row = rows[i]; for (int index = 0; index < writeColumnCount; index++) { Object expected = row[index]; Object object = deserializedRow.get(index); if (expected == null || object == null) { if (expected != null || object != null) { fail("SerDe deserialized NULL column mismatch"); } } else { if (!object.equals(expected)) { fail( "SerDe deserialized value does not match (expected " + expected.getClass().getName() + " " + expected.toString() + ", actual " + object.getClass().getName() + " " + object.toString() + ")"); } } } } // One Writable per row. BytesWritable serdeBytes[] = new BytesWritable[rowCount]; // Serialize using the SerDe, then below deserialize using DeserializeRead. for (int i = 0; i < rowCount; i++) { Object[] row = rows[i]; // Since SerDe reuses memory, we will need to make a copy. BytesWritable serialized; if (doWriteFewerColumns) { serialized = (BytesWritable) serde_fewer.serialize(row, rowOI); } else { serialized = (BytesWritable) serde.serialize(row, rowOI); ; } BytesWritable bytesWritable = new BytesWritable(); bytesWritable.set(serialized); byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); byte[] serializeWriteExpected = Arrays.copyOfRange( serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); if (!Arrays.equals(serDeOutput, serializeWriteExpected)) { int mismatchPos = -1; if (serDeOutput.length != serializeWriteExpected.length) { for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) { if (serDeOutput[b] != serializeWriteExpected[b]) { mismatchPos = b; break; } } fail( "Different byte array lengths: serDeOutput.length " + serDeOutput.length + ", serializeWriteExpected.length " + serializeWriteExpected.length + " mismatchPos " + mismatchPos + " perFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i])); } List<Integer> differentPositions = new ArrayList(); for (int b = 0; b < serDeOutput.length; b++) { if (serDeOutput[b] != serializeWriteExpected[b]) { differentPositions.add(b); } } if (differentPositions.size() > 0) { List<String> serializeWriteExpectedFields = new ArrayList<String>(); List<String> serDeFields = new ArrayList<String>(); int f = 0; int lastBegin = 0; for (int b = 0; b < serDeOutput.length; b++) { int writeLength = perFieldWriteLengthsArray[i][f]; if (b + 1 == writeLength) { serializeWriteExpectedFields.add( displayBytes(serializeWriteExpected, lastBegin, writeLength - lastBegin)); serDeFields.add(displayBytes(serDeOutput, lastBegin, writeLength - lastBegin)); f++; lastBegin = b + 1; } } fail( "SerializeWrite and SerDe serialization does not match at positions " + differentPositions.toString() + "\n(SerializeWrite: " + serializeWriteExpectedFields.toString() + "\nSerDe: " + serDeFields.toString() + "\nperFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]) + "\nprimitiveTypeInfos " + Arrays.toString(primitiveTypeInfos) + "\nrow " + Arrays.toString(row)); } } serdeBytes[i] = bytesWritable; } // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. for (int i = 0; i < rowCount; i++) { Object[] row = rows[i]; BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead( primitiveTypeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc); BytesWritable bytesWritable = serdeBytes[i]; binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); for (int index = 0; index < columnCount; index++) { if (useIncludeColumns && !columnsToInclude[index]) { binarySortableDeserializeRead.skipNextField(); } else if (index >= writeColumnCount) { // Should come back a null. VerifyFast.verifyDeserializeRead( binarySortableDeserializeRead, primitiveTypeInfos[index], null); } else { Writable writable = (Writable) row[index]; VerifyFast.verifyDeserializeRead( binarySortableDeserializeRead, primitiveTypeInfos[index], writable); } } if (writeColumnCount == columnCount) { TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached()); } } }
public static void writeToFile(BytesWritable value, File binaryFile) throws IOException { FileOutputStream fileOut = new FileOutputStream(binaryFile); fileOut.write(value.getBytes(), 0, value.getLength()); fileOut.close(); }
private static byte[] pair(BytesWritable a, BytesWritable b) { byte[] pairData = new byte[a.getLength() + b.getLength()]; System.arraycopy(a.getBytes(), 0, pairData, 0, a.getLength()); System.arraycopy(b.getBytes(), 0, pairData, a.getLength(), b.getLength()); return pairData; }