@Override
  public void serialize(Object o, ImmutableBytesWritable bytesWritable) throws IOException {
    byte[] bytesToWriteIn = bytesWritable.get();
    int offset = bytesWritable.getOffset();

    if (o == null) {
      if (fixedPrefixLength > 0)
        throw new IllegalStateException(
            "excepted at least " + fixedPrefixLength + " bytes to write");
      else if (terminate()) {
        // write one (masked) null byte
        bytesToWriteIn[offset] = mask(NULL);
        RowKeyUtils.seek(bytesWritable, 1);
      }
    } else {
      final BytesWritable input = (BytesWritable) o;
      if (fixedPrefixLength > input.getLength())
        throw new IllegalStateException(
            "excepted at least " + fixedPrefixLength + " bytes to write");
      else {
        encodeFixedPrefix(input.getBytes(), bytesWritable);
        encodedCustomizedReversedPackedBcd(
            toStringRepresentation(
                input.getBytes(), fixedPrefixLength, input.getLength() - fixedPrefixLength),
            bytesWritable);
      }
    }
  }
 public KeyAndPartitionWritable(Domain domain, BytesWritable key) {
   this.key = key;
   int partition =
       domain
           .getPartitioner()
           .partition(ByteBuffer.wrap(key.getBytes(), 0, key.getLength()), domain.getNumParts());
   this.partition = new IntWritable(partition);
 }
Beispiel #3
0
  public void map(BytesWritable key, BytesWritable value, Context context)
      throws IOException, InterruptedException {
    long start = NumberUtils.decodeLong(key.getBytes(), 0);
    long end = NumberUtils.decodeLong(key.getBytes(), 8);

    oKey.set(start);
    oValue.setSize(value.getLength() + 9);
    oValue.getBytes()[0] = 0;
    NumberUtils.encodeLong(oValue.getBytes(), 1, end);
    System.arraycopy(value.getBytes(), 0, oValue.getBytes(), 9, value.getLength());
    context.write(oKey, oValue);

    oKey.set(end);
    oValue.getBytes()[0] = 1;
    NumberUtils.encodeLong(oValue.getBytes(), 1, start);
    System.arraycopy(value.getBytes(), 0, oValue.getBytes(), 9, value.getLength());
    context.write(oKey, oValue);
  }
  @Override
  public int getSerializedLength(Object o) throws IOException {
    if (o == null) return terminate() ? fixedPrefixLength + 1 : fixedPrefixLength;

    final BytesWritable input = (BytesWritable) o;
    return fixedPrefixLength
        + getSerializedLength(
            toStringRepresentation(
                input.getBytes(), fixedPrefixLength, input.getLength() - fixedPrefixLength));
  }
Beispiel #5
0
 @Override
 public void rollback() throws IOException {
   uncommittedLength = target.getLength();
   if (uncommittedLength != 0) {
     if (compressGaps) {
       writeGap(gamma + 1);
     } else {
       writeGapUncompressed(gamma + 1);
     }
   }
 }
Beispiel #6
0
  /** Convert bytes to SHA-1 */
  public Text evaluate(BytesWritable b) {
    if (b == null) {
      return null;
    }

    digest.reset();
    digest.update(b.getBytes(), 0, b.getLength());
    byte[] shaBytes = digest.digest();
    String shaHex = Hex.encodeHexString(shaBytes);

    result.set(shaHex);
    return result;
  }
 private Object deserializeValue(BytesWritable valueWritable, byte tag) throws HiveException {
   try {
     return inputValueDeserializer[tag].deserialize(valueWritable);
   } catch (SerDeException e) {
     throw new HiveException(
         "Error: Unable to deserialize reduce input value (tag="
             + tag
             + ") from "
             + Utilities.formatBinaryString(valueWritable.getBytes(), 0, valueWritable.getLength())
             + " with properties "
             + valueTableDesc[tag].getProperties(),
         e);
   }
 }
  public BytesWritable evaluate(BytesWritable geometryref1, DoubleWritable distance) {
    if (geometryref1 == null || geometryref1.getLength() == 0 || distance == null) {
      return null;
    }

    OGCGeometry ogcGeometry = GeometryUtils.geometryFromEsriShape(geometryref1);
    if (ogcGeometry == null) {
      LogUtils.Log_ArgumentsNull(LOG);
      return null;
    }

    OGCGeometry bufferedGeometry = ogcGeometry.buffer(distance.get());
    // TODO persist type information (polygon vs multipolygon)
    return GeometryUtils.geometryToEsriShapeBytesWritable(bufferedGeometry);
  }
Beispiel #9
0
 /** Given an output filename, write a bunch of random records to it. */
 public void map(
     WritableComparable key,
     Writable value,
     OutputCollector<BytesWritable, BytesWritable> output,
     Reporter reporter)
     throws IOException {
   int itemCount = 0;
   while (numBytesToWrite > 0) {
     int keyLength = minKeySize + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
     randomKey.setSize(keyLength);
     randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
     int valueLength = minValueSize + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
     randomValue.setSize(valueLength);
     randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
     output.collect(randomKey, randomValue);
     numBytesToWrite -= keyLength + valueLength;
     reporter.incrCounter(Counters.BYTES_WRITTEN, keyLength + valueLength);
     reporter.incrCounter(Counters.RECORDS_WRITTEN, 1);
     if (++itemCount % 200 == 0) {
       reporter.setStatus("wrote record " + itemCount + ". " + numBytesToWrite + " bytes left.");
     }
   }
   reporter.setStatus("done with " + itemCount + " records.");
 }
  private void processHtmlContent(MapWritable value, Metadata allMetadata, String uniqueId)
      throws IOException {
    BytesWritable htmlBytesWritable =
        (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE_AS_HTML_NAME));
    if (htmlBytesWritable != null) {
      String htmlNativeEntryName =
          ParameterProcessing.HTML_FOLDER
              + "/"
              + uniqueId
              + "_"
              + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName()
              + ".html";
      zipFileWriter.addBinaryFile(
          htmlNativeEntryName, htmlBytesWritable.getBytes(), htmlBytesWritable.getLength());
      logger.trace("Processing file: {}", htmlNativeEntryName);
    }

    // get the list with other files part of the html output
    Text htmlFiles = (Text) value.get(new Text(ParameterProcessing.NATIVE_AS_HTML));
    if (htmlFiles != null) {
      String fileNames = htmlFiles.toString();
      String[] fileNamesArr = fileNames.split(",");
      for (String fileName : fileNamesArr) {
        String entry = ParameterProcessing.HTML_FOLDER + "/" + fileName;

        BytesWritable imageBytesWritable =
            (BytesWritable)
                value.get(new Text(ParameterProcessing.NATIVE_AS_HTML + "_" + fileName));
        if (imageBytesWritable != null) {
          zipFileWriter.addBinaryFile(
              entry, imageBytesWritable.getBytes(), imageBytesWritable.getLength());
          logger.trace("Processing file: {}", entry);
        }
      }
    }
  }
Beispiel #11
0
  private static byte[] getBytes(BytesWritable val) {
    byte[] buffer = val.getBytes();

    /*
     * FIXME: remove the following part once the below jira is fixed
     * https://issues.apache.org/jira/browse/HADOOP-6298
     */
    long len = val.getLength();
    byte[] bytes = buffer;
    if (len < buffer.length) {
      bytes = new byte[(int) len];
      System.arraycopy(buffer, 0, bytes, 0, (int) len);
    }

    return bytes;
  }
  public DoubleWritable evaluate(BytesWritable geomref) {
    if (geomref == null || geomref.getLength() == 0) {
      LogUtils.Log_ArgumentsNull(LOG);
      return null;
    }

    OGCGeometry ogcGeometry = GeometryUtils.geometryFromEsriShape(geomref);
    if (ogcGeometry == null) {
      LogUtils.Log_ArgumentsNull(LOG);
      return null;
    }

    Envelope envBound = new Envelope();
    ogcGeometry.getEsriGeometry().queryEnvelope(envBound);
    resultDouble.set(envBound.getXMin());
    return resultDouble;
  }
Beispiel #13
0
  @Override
  public Object deserialize(Writable wr) throws SerDeException {
    LOG.debug("Entry SdbSerDe::deserialize");

    if (!(wr instanceof BytesWritable)) {
      throw new SerDeException("Expected BSONWritable, received " + wr.getClass().getName());
    }

    BytesWritable record = (BytesWritable) wr;

    ByteArrayRef bytes = new ByteArrayRef();
    bytes.setData(record.getBytes());

    row.init(bytes, 0, record.getLength());

    return row;
  }
  @Override
  public Object deserialize(Writable blob) throws SerDeException {
    BytesWritable data = (BytesWritable) blob;
    inputByteBuffer.reset(data.getBytes(), 0, data.getLength());

    try {
      for (int i = 0; i < columnNames.size(); i++) {
        row.set(
            i,
            deserialize(inputByteBuffer, columnTypes.get(i), columnSortOrderIsDesc[i], row.get(i)));
      }
    } catch (IOException e) {
      throw new SerDeException(e);
    }

    return row;
  }
  public static void main(String[] args) throws IOException {
    Configuration conf = new Configuration();
    conf.set("fs.default.name", "hdfs://boa-njt/");
    FileSystem fileSystem = FileSystem.get(conf);
    String base = conf.get("fs.default.name", "");

    HashMap<String, String> sources = new HashMap<String, String>();
    HashSet<String> marks = new HashSet<String>();
    FileStatus[] files = fileSystem.listStatus(new Path(base + "tmprepcache/2015-07"));
    for (int i = 0; i < files.length; i++) {
      FileStatus file = files[i];
      String name = file.getPath().getName();
      if (name.startsWith("projects-") && name.endsWith(".seq")) {
        System.out.println("Reading file " + i + " in " + files.length + ": " + name);
        SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, file.getPath(), conf);
        final Text key = new Text();
        final BytesWritable value = new BytesWritable();
        try {
          while (r.next(key, value)) {
            String s = key.toString();
            if (marks.contains(s)) continue;
            Project p =
                Project.parseFrom(
                    CodedInputStream.newInstance(value.getBytes(), 0, value.getLength()));
            if (p.getCodeRepositoriesCount() > 0
                && p.getCodeRepositories(0).getRevisionsCount() > 0) marks.add(s);
            sources.put(s, name);
          }
        } catch (Exception e) {
          System.err.println(name);
          e.printStackTrace();
        }
        r.close();
      }
    }
    SequenceFile.Writer w =
        SequenceFile.createWriter(
            fileSystem,
            conf,
            new Path(base + "repcache/2015-07/projects.seq"),
            Text.class,
            BytesWritable.class);
    for (int i = 0; i < files.length; i++) {
      FileStatus file = files[i];
      String name = file.getPath().getName();
      if (name.startsWith("projects-") && name.endsWith(".seq")) {
        System.out.println("Reading file " + i + " in " + files.length + ": " + name);
        SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, file.getPath(), conf);
        final Text key = new Text();
        final BytesWritable value = new BytesWritable();
        try {
          while (r.next(key, value)) {
            String s = key.toString();
            if (sources.get(s).equals(name)) w.append(key, value);
          }
        } catch (Exception e) {
          System.err.println(name);
          e.printStackTrace();
        }
        r.close();
      }
    }
    w.close();

    fileSystem.close();
  }
 public void set(int index, BytesWritable value) {
   values[index] = Arrays.copyOf(value.getBytes(), value.getLength());
   usage += keys[index].length + values[index].length;
 }
  protected void processMap(MapWritable value) throws IOException, InterruptedException {
    columnMetadata.reinit();
    ++outputFileCount;
    DocumentMetadata allMetadata = getAllMetadata(value);
    Metadata standardMetadata = getStandardMetadata(allMetadata, outputFileCount);
    columnMetadata.addMetadata(standardMetadata);
    columnMetadata.addMetadata(allMetadata);
    // documents other than the first one in this loop are either duplicates or attachments
    if (first) {
      masterOutputFileCount = outputFileCount;
    } else {
      if (allMetadata.hasParent()) {
        columnMetadata.addMetadataValue(
            DocumentMetadataKeys.ATTACHMENT_PARENT, UPIFormat.format(masterOutputFileCount));
      } else {
        columnMetadata.addMetadataValue(
            DocumentMetadataKeys.MASTER_DUPLICATE, UPIFormat.format(masterOutputFileCount));
      }
    }

    // String uniqueId = allMetadata.getUniqueId();

    String originalFileName =
        new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName();
    // add the text to the text folder
    String documentText = allMetadata.get(DocumentMetadataKeys.DOCUMENT_TEXT);
    String textEntryName =
        ParameterProcessing.TEXT
            + "/"
            + UPIFormat.format(outputFileCount)
            + "_"
            + originalFileName
            + ".txt";
    if (textEntryName != null) {
      zipFileWriter.addTextFile(textEntryName, documentText);
    }
    columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_TEXT, textEntryName);
    // add the native file to the native folder
    String nativeEntryName =
        ParameterProcessing.NATIVE
            + "/"
            + UPIFormat.format(outputFileCount)
            + "_"
            + originalFileName;
    BytesWritable bytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE));
    if (bytesWritable != null) { // some large exception files are not passed
      zipFileWriter.addBinaryFile(
          nativeEntryName, bytesWritable.getBytes(), bytesWritable.getLength());
      logger.trace("Processing file: {}", nativeEntryName);
    }
    columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_NATIVE, nativeEntryName);
    // add the pdf made from native to the PDF folder
    String pdfNativeEntryName =
        ParameterProcessing.PDF_FOLDER
            + "/"
            + UPIFormat.format(outputFileCount)
            + "_"
            + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName()
            + ".pdf";
    BytesWritable pdfBytesWritable =
        (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE_AS_PDF));
    if (pdfBytesWritable != null) {
      zipFileWriter.addBinaryFile(
          pdfNativeEntryName, pdfBytesWritable.getBytes(), pdfBytesWritable.getLength());
      logger.trace("Processing file: {}", pdfNativeEntryName);
    }

    processHtmlContent(value, allMetadata, UPIFormat.format(outputFileCount));

    // add exception to the exception folder
    String exception = allMetadata.get(DocumentMetadataKeys.PROCESSING_EXCEPTION);
    if (exception != null) {
      String exceptionEntryName =
          "exception/"
              + UPIFormat.format(outputFileCount)
              + "_"
              + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName();
      if (bytesWritable != null) {
        zipFileWriter.addBinaryFile(
            exceptionEntryName, bytesWritable.getBytes(), bytesWritable.getLength());
      }
      columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_EXCEPTION, exceptionEntryName);
    }
    // write this all to the reduce map
    // context.write(new Text(outputKey), new Text(columnMetadata.delimiterSeparatedValues()));
    // drop the key altogether, because it messes up the format - but put it in the value
    // TODO use NullWritable
    if (OsUtil.isNix()) {
      context.write(null, new Text(columnMetadata.delimiterSeparatedValues()));
    }
    // prepare for the next file with the same key, if there is any
    first = false;
  }
  static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, boolean invert) {
    // Is this field a null?
    if (o == null) {
      buffer.write((byte) 0, invert);
      return;
    }
    // This field is not a null.
    buffer.write((byte) 1, invert);

    switch (oi.getCategory()) {
      case PRIMITIVE:
        {
          PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
          switch (poi.getPrimitiveCategory()) {
            case VOID:
              {
                return;
              }
            case BOOLEAN:
              {
                boolean v = ((BooleanObjectInspector) poi).get(o);
                buffer.write((byte) (v ? 2 : 1), invert);
                return;
              }
            case BYTE:
              {
                ByteObjectInspector boi = (ByteObjectInspector) poi;
                byte v = boi.get(o);
                buffer.write((byte) (v ^ 0x80), invert);
                return;
              }
            case SHORT:
              {
                ShortObjectInspector spoi = (ShortObjectInspector) poi;
                short v = spoi.get(o);
                buffer.write((byte) ((v >> 8) ^ 0x80), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case INT:
              {
                IntObjectInspector ioi = (IntObjectInspector) poi;
                int v = ioi.get(o);
                buffer.write((byte) ((v >> 24) ^ 0x80), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case LONG:
              {
                LongObjectInspector loi = (LongObjectInspector) poi;
                long v = loi.get(o);
                buffer.write((byte) ((v >> 56) ^ 0x80), invert);
                buffer.write((byte) (v >> 48), invert);
                buffer.write((byte) (v >> 40), invert);
                buffer.write((byte) (v >> 32), invert);
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case FLOAT:
              {
                FloatObjectInspector foi = (FloatObjectInspector) poi;
                int v = Float.floatToIntBits(foi.get(o));
                if ((v & (1 << 31)) != 0) {
                  // negative number, flip all bits
                  v = ~v;
                } else {
                  // positive number, flip the first bit
                  v = v ^ (1 << 31);
                }
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case DOUBLE:
              {
                DoubleObjectInspector doi = (DoubleObjectInspector) poi;
                long v = Double.doubleToLongBits(doi.get(o));
                if ((v & (1L << 63)) != 0) {
                  // negative number, flip all bits
                  v = ~v;
                } else {
                  // positive number, flip the first bit
                  v = v ^ (1L << 63);
                }
                buffer.write((byte) (v >> 56), invert);
                buffer.write((byte) (v >> 48), invert);
                buffer.write((byte) (v >> 40), invert);
                buffer.write((byte) (v >> 32), invert);
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case STRING:
              {
                StringObjectInspector soi = (StringObjectInspector) poi;
                Text t = soi.getPrimitiveWritableObject(o);
                serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
                return;
              }

            case BINARY:
              {
                BinaryObjectInspector baoi = (BinaryObjectInspector) poi;
                BytesWritable ba = baoi.getPrimitiveWritableObject(o);
                byte[] toSer = new byte[ba.getLength()];
                System.arraycopy(ba.getBytes(), 0, toSer, 0, ba.getLength());
                serializeBytes(buffer, toSer, ba.getLength(), invert);
                return;
              }
            case DATE:
              {
                DateObjectInspector doi = (DateObjectInspector) poi;
                long v = doi.getPrimitiveWritableObject(o).getTimeInSeconds();
                buffer.write((byte) ((v >> 56) ^ 0x80), invert);
                buffer.write((byte) (v >> 48), invert);
                buffer.write((byte) (v >> 40), invert);
                buffer.write((byte) (v >> 32), invert);
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case TIMESTAMP:
              {
                TimestampObjectInspector toi = (TimestampObjectInspector) poi;
                TimestampWritable t = toi.getPrimitiveWritableObject(o);
                byte[] data = t.getBinarySortable();
                for (int i = 0; i < data.length; i++) {
                  buffer.write(data[i], invert);
                }
                return;
              }
            default:
              {
                throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
              }
          }
        }
      case LIST:
        {
          ListObjectInspector loi = (ListObjectInspector) oi;
          ObjectInspector eoi = loi.getListElementObjectInspector();

          // \1 followed by each element
          int size = loi.getListLength(o);
          for (int eid = 0; eid < size; eid++) {
            buffer.write((byte) 1, invert);
            serialize(buffer, loi.getListElement(o, eid), eoi, invert);
          }
          // and \0 to terminate
          buffer.write((byte) 0, invert);
          return;
        }
      case MAP:
        {
          MapObjectInspector moi = (MapObjectInspector) oi;
          ObjectInspector koi = moi.getMapKeyObjectInspector();
          ObjectInspector voi = moi.getMapValueObjectInspector();

          // \1 followed by each key and then each value
          Map<?, ?> map = moi.getMap(o);
          for (Map.Entry<?, ?> entry : map.entrySet()) {
            buffer.write((byte) 1, invert);
            serialize(buffer, entry.getKey(), koi, invert);
            serialize(buffer, entry.getValue(), voi, invert);
          }
          // and \0 to terminate
          buffer.write((byte) 0, invert);
          return;
        }
      case STRUCT:
        {
          StructObjectInspector soi = (StructObjectInspector) oi;
          List<? extends StructField> fields = soi.getAllStructFieldRefs();

          for (int i = 0; i < fields.size(); i++) {
            serialize(
                buffer,
                soi.getStructFieldData(o, fields.get(i)),
                fields.get(i).getFieldObjectInspector(),
                invert);
          }
          return;
        }
      case UNION:
        {
          UnionObjectInspector uoi = (UnionObjectInspector) oi;
          byte tag = uoi.getTag(o);
          buffer.write(tag, invert);
          serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), invert);
          return;
        }
      default:
        {
          throw new RuntimeException("Unrecognized type: " + oi.getCategory());
        }
    }
  }
Beispiel #19
0
 @JRubyMethod(name = "ruby")
 public IRubyObject ruby(final ThreadContext ctx) {
   RubyString string = RubyString.newString(ctx.runtime, value.getBytes(), 0, value.getLength());
   string.setEncoding(ASCIIEncoding.INSTANCE);
   return string;
 }
Beispiel #20
0
  static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, String nullStr) {

    switch (oi.getCategory()) {
      case PRIMITIVE:
        {
          PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
          if (o == null) {
            sb.append(nullStr);
          } else {
            switch (poi.getPrimitiveCategory()) {
              case BOOLEAN:
                {
                  boolean b = ((BooleanObjectInspector) poi).get(o);
                  sb.append(b ? "true" : "false");
                  break;
                }
              case BYTE:
                {
                  sb.append(((ByteObjectInspector) poi).get(o));
                  break;
                }
              case SHORT:
                {
                  sb.append(((ShortObjectInspector) poi).get(o));
                  break;
                }
              case INT:
                {
                  sb.append(((IntObjectInspector) poi).get(o));
                  break;
                }
              case LONG:
                {
                  sb.append(((LongObjectInspector) poi).get(o));
                  break;
                }
              case FLOAT:
                {
                  sb.append(((FloatObjectInspector) poi).get(o));
                  break;
                }
              case DOUBLE:
                {
                  sb.append(((DoubleObjectInspector) poi).get(o));
                  break;
                }
              case STRING:
                {
                  sb.append('"');
                  sb.append(escapeString(((StringObjectInspector) poi).getPrimitiveJavaObject(o)));
                  sb.append('"');
                  break;
                }
              case CHAR:
                {
                  sb.append('"');
                  sb.append(
                      escapeString(
                          ((HiveCharObjectInspector) poi).getPrimitiveJavaObject(o).toString()));
                  sb.append('"');
                  break;
                }
              case VARCHAR:
                {
                  sb.append('"');
                  sb.append(
                      escapeString(
                          ((HiveVarcharObjectInspector) poi).getPrimitiveJavaObject(o).toString()));
                  sb.append('"');
                  break;
                }
              case DATE:
                {
                  sb.append('"');
                  sb.append(((DateObjectInspector) poi).getPrimitiveWritableObject(o));
                  sb.append('"');
                  break;
                }
              case TIMESTAMP:
                {
                  sb.append('"');
                  sb.append(((TimestampObjectInspector) poi).getPrimitiveWritableObject(o));
                  sb.append('"');
                  break;
                }
              case BINARY:
                {
                  BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
                  Text txt = new Text();
                  txt.set(bw.getBytes(), 0, bw.getLength());
                  sb.append(txt.toString());
                  break;
                }
              case DECIMAL:
                {
                  sb.append(((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o));
                  break;
                }
              default:
                throw new RuntimeException("Unknown primitive type: " + poi.getPrimitiveCategory());
            }
          }
          break;
        }
      case LIST:
        {
          ListObjectInspector loi = (ListObjectInspector) oi;
          ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
          List<?> olist = loi.getList(o);
          if (olist == null) {
            sb.append(nullStr);
          } else {
            sb.append(LBRACKET);
            for (int i = 0; i < olist.size(); i++) {
              if (i > 0) {
                sb.append(COMMA);
              }
              buildJSONString(sb, olist.get(i), listElementObjectInspector, JSON_NULL);
            }
            sb.append(RBRACKET);
          }
          break;
        }
      case MAP:
        {
          MapObjectInspector moi = (MapObjectInspector) oi;
          ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
          ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
          Map<?, ?> omap = moi.getMap(o);
          if (omap == null) {
            sb.append(nullStr);
          } else {
            sb.append(LBRACE);
            boolean first = true;
            for (Object entry : omap.entrySet()) {
              if (first) {
                first = false;
              } else {
                sb.append(COMMA);
              }
              Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
              buildJSONString(sb, e.getKey(), mapKeyObjectInspector, JSON_NULL);
              sb.append(COLON);
              buildJSONString(sb, e.getValue(), mapValueObjectInspector, JSON_NULL);
            }
            sb.append(RBRACE);
          }
          break;
        }
      case STRUCT:
        {
          StructObjectInspector soi = (StructObjectInspector) oi;
          List<? extends StructField> structFields = soi.getAllStructFieldRefs();
          if (o == null) {
            sb.append(nullStr);
          } else {
            sb.append(LBRACE);
            for (int i = 0; i < structFields.size(); i++) {
              if (i > 0) {
                sb.append(COMMA);
              }
              sb.append(QUOTE);
              sb.append(structFields.get(i).getFieldName());
              sb.append(QUOTE);
              sb.append(COLON);
              buildJSONString(
                  sb,
                  soi.getStructFieldData(o, structFields.get(i)),
                  structFields.get(i).getFieldObjectInspector(),
                  JSON_NULL);
            }
            sb.append(RBRACE);
          }
          break;
        }
      case UNION:
        {
          UnionObjectInspector uoi = (UnionObjectInspector) oi;
          if (o == null) {
            sb.append(nullStr);
          } else {
            sb.append(LBRACE);
            sb.append(uoi.getTag(o));
            sb.append(COLON);
            buildJSONString(
                sb, uoi.getField(o), uoi.getObjectInspectors().get(uoi.getTag(o)), JSON_NULL);
            sb.append(RBRACE);
          }
          break;
        }
      default:
        throw new RuntimeException("Unknown type in ObjectInspector!");
    }
  }
 public ByteBuffer getKey() {
   return ByteBuffer.wrap(key.getBytes(), 0, key.getLength());
 }
Beispiel #22
0
  @Override
  public boolean nextKeyValue() throws IOException, InterruptedException {

    // we only pull for a specified time. unfinished work will be
    // rescheduled in the next
    // run.
    if (System.currentTimeMillis() > maxPullTime) {
      if (reader != null) {
        closeReader();
      }
      return false;
    }

    while (true) {
      try {
        if (reader == null || reader.hasNext() == false) {
          EtlRequest request = split.popRequest();
          if (request == null) {
            return false;
          }

          if (maxPullHours > 0) {
            endTimeStamp = 0;
          }

          key.set(
              request.getTopic(),
              request.getNodeId(),
              request.getPartition(),
              request.getOffset(),
              request.getOffset(),
              0);
          value = new AvroWrapper<Object>(new Object());

          System.out.println(
              "topic:"
                  + request.getTopic()
                  + " partition:"
                  + request.getPartition()
                  + " beginOffset:"
                  + request.getOffset()
                  + " estimatedLastOffset:"
                  + request.getLastOffset());

          statusMsg += statusMsg.length() > 0 ? "; " : "";
          statusMsg +=
              request.getTopic() + ":" + request.getNodeId() + ":" + request.getPartition();
          context.setStatus(statusMsg);

          if (reader != null) {
            closeReader();
          }
          reader =
              new KafkaReader(
                  request,
                  EtlInputFormat.getKafkaClientTimeout(mapperContext),
                  EtlInputFormat.getKafkaClientBufferSize(mapperContext));

          decoder =
              (MessageDecoder<Message, Record>)
                  MessageDecoderFactory.createMessageDecoder(context, request.getTopic());
        }

        while (reader.getNext(key, msgValue)) {
          context.progress();
          mapperContext.getCounter("total", "data-read").increment(msgValue.getLength());
          mapperContext.getCounter("total", "event-count").increment(1);
          byte[] bytes = getBytes(msgValue);

          // check the checksum of message
          Message message = new Message(bytes);
          long checksum = key.getChecksum();
          if (checksum != message.checksum()) {
            throw new ChecksumException(
                "Invalid message checksum "
                    + message.checksum()
                    + ". Expected "
                    + key.getChecksum(),
                key.getOffset());
          }

          long tempTime = System.currentTimeMillis();
          CamusWrapper wrapper;
          try {
            wrapper = getWrappedRecord(key.getTopic(), message);
          } catch (Exception e) {
            mapperContext.write(key, new ExceptionWritable(e));
            continue;
          }

          if (wrapper == null) {
            mapperContext.write(key, new ExceptionWritable(new RuntimeException("null record")));
            continue;
          }

          long timeStamp = wrapper.getTimestamp();
          try {
            key.setTime(timeStamp);
            key.setServer(wrapper.getServer());
            key.setService(wrapper.getService());
          } catch (Exception e) {
            mapperContext.write(key, new ExceptionWritable(e));
            continue;
          }

          if (timeStamp < beginTimeStamp) {
            mapperContext.getCounter("total", "skip-old").increment(1);
          } else if (endTimeStamp == 0) {
            DateTime time = new DateTime(timeStamp);
            statusMsg += " begin read at " + time.toString();
            context.setStatus(statusMsg);
            System.out.println(key.getTopic() + " begin read at " + time.toString());
            endTimeStamp = (time.plusHours(this.maxPullHours)).getMillis();
          } else if (timeStamp > endTimeStamp || System.currentTimeMillis() > maxPullTime) {
            statusMsg += " max read at " + new DateTime(timeStamp).toString();
            context.setStatus(statusMsg);
            System.out.println(
                key.getTopic() + " max read at " + new DateTime(timeStamp).toString());
            mapperContext.getCounter("total", "request-time(ms)").increment(reader.getFetchTime());
            closeReader();
          }

          long secondTime = System.currentTimeMillis();
          value.datum(wrapper.getRecord());
          long decodeTime = ((secondTime - tempTime));

          mapperContext.getCounter("total", "decode-time(ms)").increment(decodeTime);

          if (reader != null) {
            mapperContext.getCounter("total", "request-time(ms)").increment(reader.getFetchTime());
          }
          return true;
        }
        reader = null;
      } catch (Throwable t) {
        Exception e = new Exception(t.getLocalizedMessage(), t);
        e.setStackTrace(t.getStackTrace());
        mapperContext.write(key, new ExceptionWritable(e));
        reader = null;
        continue;
      }
    }
  }
 @Override
 public int getHashFromKey() throws SerDeException {
   byte[] keyBytes = key.getBytes();
   int keyLength = key.getLength();
   return WriteBuffers.murmurHash(keyBytes, 0, keyLength);
 }
 @Override
 public void writeKey(RandomAccessOutput dest) throws SerDeException {
   byte[] keyBytes = key.getBytes();
   int keyLength = key.getLength();
   dest.write(keyBytes, 0, keyLength);
 }
 @Override
 public void writeValue(RandomAccessOutput dest) throws SerDeException {
   byte[] valueBytes = val.getBytes();
   int valueLength = val.getLength();
   dest.write(valueBytes, 0, valueLength);
 }
Beispiel #26
0
  private void testBinarySortableFast(
      SerdeRandomRowSource source,
      Object[][] rows,
      boolean[] columnSortOrderIsDesc,
      byte[] columnNullMarker,
      byte[] columnNotNullMarker,
      AbstractSerDe serde,
      StructObjectInspector rowOI,
      AbstractSerDe serde_fewer,
      StructObjectInspector writeRowOI,
      boolean ascending,
      PrimitiveTypeInfo[] primitiveTypeInfos,
      boolean useIncludeColumns,
      boolean doWriteFewerColumns,
      Random r)
      throws Throwable {

    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;

    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
      columnsToInclude = new boolean[columnCount];
      for (int i = 0; i < columnCount; i++) {
        columnsToInclude[i] = r.nextBoolean();
      }
    }

    int writeColumnCount = columnCount;
    if (doWriteFewerColumns) {
      writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
    }

    BinarySortableSerializeWrite binarySortableSerializeWrite =
        new BinarySortableSerializeWrite(
            columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);

    // Try to serialize

    // One Writable per row.
    BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount];

    int[][] perFieldWriteLengthsArray = new int[rowCount][];
    for (int i = 0; i < rowCount; i++) {
      Object[] row = rows[i];
      Output output = new Output();
      binarySortableSerializeWrite.set(output);

      int[] perFieldWriteLengths = new int[columnCount];
      for (int index = 0; index < writeColumnCount; index++) {

        Writable writable = (Writable) row[index];

        VerifyFast.serializeWrite(
            binarySortableSerializeWrite, primitiveTypeInfos[index], writable);
        perFieldWriteLengths[index] = output.getLength();
      }
      perFieldWriteLengthsArray[i] = perFieldWriteLengths;

      BytesWritable bytesWritable = new BytesWritable();
      bytesWritable.set(output.getData(), 0, output.getLength());
      serializeWriteBytes[i] = bytesWritable;
      if (i > 0) {
        int compareResult = serializeWriteBytes[i - 1].compareTo(serializeWriteBytes[i]);
        if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) {
          System.out.println(
              "Test failed in "
                  + (ascending ? "ascending" : "descending")
                  + " order with "
                  + (i - 1)
                  + " and "
                  + i);
          System.out.println(
              "serialized data ["
                  + (i - 1)
                  + "] = "
                  + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1]));
          System.out.println(
              "serialized data ["
                  + i
                  + "] = "
                  + TestBinarySortableSerDe.hexString(serializeWriteBytes[i]));
          fail("Sort order of serialized " + (i - 1) + " and " + i + " are reversed!");
        }
      }
    }

    // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
      Object[] row = rows[i];
      BinarySortableDeserializeRead binarySortableDeserializeRead =
          new BinarySortableDeserializeRead(
              primitiveTypeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc);

      BytesWritable bytesWritable = serializeWriteBytes[i];
      binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());

      for (int index = 0; index < columnCount; index++) {
        if (useIncludeColumns && !columnsToInclude[index]) {
          binarySortableDeserializeRead.skipNextField();
        } else if (index >= writeColumnCount) {
          // Should come back a null.
          VerifyFast.verifyDeserializeRead(
              binarySortableDeserializeRead, primitiveTypeInfos[index], null);
        } else {
          Writable writable = (Writable) row[index];
          VerifyFast.verifyDeserializeRead(
              binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
        }
      }
      if (writeColumnCount == columnCount) {
        TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
      }

      /*
       * Clip off one byte and expect to get an EOFException on the write field.
       */
      BinarySortableDeserializeRead binarySortableDeserializeRead2 =
          new BinarySortableDeserializeRead(
              primitiveTypeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc);

      binarySortableDeserializeRead2.set(
          bytesWritable.getBytes(), 0, bytesWritable.getLength() - 1); // One fewer byte.

      for (int index = 0; index < writeColumnCount; index++) {
        Writable writable = (Writable) row[index];
        if (index == writeColumnCount - 1) {
          boolean threw = false;
          try {
            VerifyFast.verifyDeserializeRead(
                binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
          } catch (EOFException e) {
            //          debugDetailedReadPositionString =
            // binarySortableDeserializeRead2.getDetailedReadPositionString();
            //          debugStackTrace = e.getStackTrace();
            threw = true;
          }
          TestCase.assertTrue(threw);
        } else {
          if (useIncludeColumns && !columnsToInclude[index]) {
            binarySortableDeserializeRead2.skipNextField();
          } else {
            VerifyFast.verifyDeserializeRead(
                binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
          }
        }
      }
    }

    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
      BytesWritable bytesWritable = serializeWriteBytes[i];

      // Note that regular SerDe doesn't tolerate fewer columns.
      List<Object> deserializedRow;
      if (doWriteFewerColumns) {
        deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable);
      } else {
        deserializedRow = (List<Object>) serde.deserialize(bytesWritable);
      }

      Object[] row = rows[i];
      for (int index = 0; index < writeColumnCount; index++) {
        Object expected = row[index];
        Object object = deserializedRow.get(index);
        if (expected == null || object == null) {
          if (expected != null || object != null) {
            fail("SerDe deserialized NULL column mismatch");
          }
        } else {
          if (!object.equals(expected)) {
            fail(
                "SerDe deserialized value does not match (expected "
                    + expected.getClass().getName()
                    + " "
                    + expected.toString()
                    + ", actual "
                    + object.getClass().getName()
                    + " "
                    + object.toString()
                    + ")");
          }
        }
      }
    }

    // One Writable per row.
    BytesWritable serdeBytes[] = new BytesWritable[rowCount];

    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    for (int i = 0; i < rowCount; i++) {
      Object[] row = rows[i];

      // Since SerDe reuses memory, we will need to make a copy.
      BytesWritable serialized;
      if (doWriteFewerColumns) {
        serialized = (BytesWritable) serde_fewer.serialize(row, rowOI);
      } else {
        serialized = (BytesWritable) serde.serialize(row, rowOI);
        ;
      }
      BytesWritable bytesWritable = new BytesWritable();
      bytesWritable.set(serialized);
      byte[] serDeOutput =
          Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());

      byte[] serializeWriteExpected =
          Arrays.copyOfRange(
              serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
      if (!Arrays.equals(serDeOutput, serializeWriteExpected)) {
        int mismatchPos = -1;
        if (serDeOutput.length != serializeWriteExpected.length) {
          for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) {
            if (serDeOutput[b] != serializeWriteExpected[b]) {
              mismatchPos = b;
              break;
            }
          }
          fail(
              "Different byte array lengths: serDeOutput.length "
                  + serDeOutput.length
                  + ", serializeWriteExpected.length "
                  + serializeWriteExpected.length
                  + " mismatchPos "
                  + mismatchPos
                  + " perFieldWriteLengths "
                  + Arrays.toString(perFieldWriteLengthsArray[i]));
        }
        List<Integer> differentPositions = new ArrayList();
        for (int b = 0; b < serDeOutput.length; b++) {
          if (serDeOutput[b] != serializeWriteExpected[b]) {
            differentPositions.add(b);
          }
        }
        if (differentPositions.size() > 0) {
          List<String> serializeWriteExpectedFields = new ArrayList<String>();
          List<String> serDeFields = new ArrayList<String>();
          int f = 0;
          int lastBegin = 0;
          for (int b = 0; b < serDeOutput.length; b++) {
            int writeLength = perFieldWriteLengthsArray[i][f];
            if (b + 1 == writeLength) {
              serializeWriteExpectedFields.add(
                  displayBytes(serializeWriteExpected, lastBegin, writeLength - lastBegin));
              serDeFields.add(displayBytes(serDeOutput, lastBegin, writeLength - lastBegin));
              f++;
              lastBegin = b + 1;
            }
          }
          fail(
              "SerializeWrite and SerDe serialization does not match at positions "
                  + differentPositions.toString()
                  + "\n(SerializeWrite: "
                  + serializeWriteExpectedFields.toString()
                  + "\nSerDe: "
                  + serDeFields.toString()
                  + "\nperFieldWriteLengths "
                  + Arrays.toString(perFieldWriteLengthsArray[i])
                  + "\nprimitiveTypeInfos "
                  + Arrays.toString(primitiveTypeInfos)
                  + "\nrow "
                  + Arrays.toString(row));
        }
      }
      serdeBytes[i] = bytesWritable;
    }

    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
      Object[] row = rows[i];
      BinarySortableDeserializeRead binarySortableDeserializeRead =
          new BinarySortableDeserializeRead(
              primitiveTypeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc);

      BytesWritable bytesWritable = serdeBytes[i];
      binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());

      for (int index = 0; index < columnCount; index++) {
        if (useIncludeColumns && !columnsToInclude[index]) {
          binarySortableDeserializeRead.skipNextField();
        } else if (index >= writeColumnCount) {
          // Should come back a null.
          VerifyFast.verifyDeserializeRead(
              binarySortableDeserializeRead, primitiveTypeInfos[index], null);
        } else {
          Writable writable = (Writable) row[index];
          VerifyFast.verifyDeserializeRead(
              binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
        }
      }
      if (writeColumnCount == columnCount) {
        TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
      }
    }
  }
Beispiel #27
0
 public static void writeToFile(BytesWritable value, File binaryFile) throws IOException {
   FileOutputStream fileOut = new FileOutputStream(binaryFile);
   fileOut.write(value.getBytes(), 0, value.getLength());
   fileOut.close();
 }
Beispiel #28
0
 private static byte[] pair(BytesWritable a, BytesWritable b) {
   byte[] pairData = new byte[a.getLength() + b.getLength()];
   System.arraycopy(a.getBytes(), 0, pairData, 0, a.getLength());
   System.arraycopy(b.getBytes(), 0, pairData, a.getLength(), b.getLength());
   return pairData;
 }