Beispiel #1
0
 @SuppressWarnings({"unchecked", "deprecation"})
 public static Object fromBytes(Schema schema, byte[] val) throws IOException {
   Type type = schema.getType();
   switch (type) {
     case ENUM:
       return AvroUtils.getEnumValue(schema, val[0]);
     case STRING:
       return new Utf8(Bytes.toString(val));
     case BYTES:
       return ByteBuffer.wrap(val);
     case INT:
       return Bytes.toInt(val);
     case LONG:
       return Bytes.toLong(val);
     case FLOAT:
       return Bytes.toFloat(val);
     case DOUBLE:
       return Bytes.toDouble(val);
     case BOOLEAN:
       return val[0] != 0;
     case RECORD:
       // TODO: This is TOO SLOW... OPTIMIZE
       reader.setSchema(schema);
       reader.setExpected(schema);
       BinaryDecoder decoder = new BinaryDecoder(new ByteArrayInputStream(val));
       return reader.read(null, decoder);
     default:
       throw new RuntimeException("Unknown type: " + type);
   }
 }
Beispiel #2
0
  /**
   * Builds Object from Bytes Picked from Hbase.
   *
   * @param b
   * @param klass
   * @return
   */
  public static Object toObject(byte[] b, Class<?> klass) {

    if (klass.isAssignableFrom(String.class)) {
      return Bytes.toString(b);
    } else if (klass.equals(int.class) || klass.isAssignableFrom(Integer.class)) {
      return Bytes.toInt(b);
    } else if (klass.equals(long.class) || klass.isAssignableFrom(Long.class)) {
      return Bytes.toLong(b);
    } else if (klass.equals(boolean.class) || klass.isAssignableFrom(Boolean.class)) {
      return Bytes.toBoolean(b);
    } else if (klass.equals(double.class) || klass.isAssignableFrom(Double.class)) {
      return Bytes.toDouble(b);
    } else if (klass.equals(float.class) || klass.isAssignableFrom(Float.class)) {
      return Bytes.toFloat(b);
    } else if (klass.equals(short.class) || klass.isAssignableFrom(Short.class)) {
      return Bytes.toShort(b);
    } else if (klass.equals(BigDecimal.class)) {
      return Bytes.toBigDecimal(b);
    } else {
      throw new MetaModelException("Could Not find a suitable Type for " + klass.getName());
    }
  }
Beispiel #3
0
 @SuppressWarnings("unchecked")
 public static <K> K fromBytes(Class<K> clazz, byte[] val) {
   if (clazz.equals(Byte.TYPE) || clazz.equals(Byte.class)) {
     return (K) Byte.valueOf(val[0]);
   } else if (clazz.equals(Boolean.TYPE) || clazz.equals(Boolean.class)) {
     return (K) Boolean.valueOf(val[0] == 0 ? false : true);
   } else if (clazz.equals(Short.TYPE) || clazz.equals(Short.class)) {
     return (K) Short.valueOf(Bytes.toShort(val));
   } else if (clazz.equals(Integer.TYPE) || clazz.equals(Integer.class)) {
     return (K) Integer.valueOf(Bytes.toInt(val));
   } else if (clazz.equals(Long.TYPE) || clazz.equals(Long.class)) {
     return (K) Long.valueOf(Bytes.toLong(val));
   } else if (clazz.equals(Float.TYPE) || clazz.equals(Float.class)) {
     return (K) Float.valueOf(Bytes.toFloat(val));
   } else if (clazz.equals(Double.TYPE) || clazz.equals(Double.class)) {
     return (K) Double.valueOf(Bytes.toDouble(val));
   } else if (clazz.equals(String.class)) {
     return (K) Bytes.toString(val);
   } else if (clazz.equals(Utf8.class)) {
     return (K) new Utf8(Bytes.toString(val));
   }
   throw new RuntimeException("Can't parse data as class: " + clazz);
 }
Beispiel #4
0
 public void setProbability(byte[] prob) {
   this.prob = Bytes.toDouble(prob);
 }
Beispiel #5
0
 /** Read a {@code double} value from the buffer {@code buff}. */
 public double decodeDouble(byte[] buff, int offset) {
   double val = Bytes.toDouble(buff, offset);
   return val;
 }
Beispiel #6
0
 @Override
 public Double decode(PositionedByteRange src) {
   double val = Bytes.toDouble(src.getBytes(), src.getOffset() + src.getPosition());
   skip(src);
   return val;
 }
  @Test
  public void testHostRank() throws Exception {

    if (System.getProperty("prop.mapred.job.tracker") != null) {
      if (LOG.isInfoEnabled())
        LOG.info("testHBaseInputOutput: Ignore this test if not local mode.");
      return;
    }

    File jarTest = new File(System.getProperty("prop.jarLocation"));
    if (!jarTest.exists()) {
      fail(
          "Could not find Giraph jar at "
              + "location specified by 'prop.jarLocation'. "
              + "Make sure you built the main Giraph artifact?.");
    }

    MiniHBaseCluster cluster = null;
    MiniZooKeeperCluster zkCluster = null;
    FileSystem fs = null;

    try {
      // using the restart method allows us to avoid having the hbase
      // root directory overwritten by /home/$username
      zkCluster = testUtil.startMiniZKCluster();
      testUtil.restartHBaseCluster(2);
      cluster = testUtil.getMiniHBaseCluster();

      final byte[] OL_BYTES = Bytes.toBytes("ol");
      final byte[] S_BYTES = Bytes.toBytes("s");
      final byte[] METADATA_BYTES = Bytes.toBytes("mtdt");
      final byte[] HR_BYTES = Bytes.toBytes("_hr_");
      final byte[] TAB = Bytes.toBytes(TABLE_NAME);

      Configuration conf = cluster.getConfiguration();
      HTableDescriptor desc = new HTableDescriptor(TAB);
      desc.addFamily(new HColumnDescriptor(OL_BYTES));
      desc.addFamily(new HColumnDescriptor(S_BYTES));
      desc.addFamily(new HColumnDescriptor(METADATA_BYTES));
      HBaseAdmin hbaseAdmin = new HBaseAdmin(conf);
      if (hbaseAdmin.isTableAvailable(TABLE_NAME)) {
        hbaseAdmin.disableTable(TABLE_NAME);
        hbaseAdmin.deleteTable(TABLE_NAME);
      }
      hbaseAdmin.createTable(desc);

      /**
       * Enter the initial data (a,b), (b,c), (a,c) a = 1.0 - google b = 1.0 - yahoo c = 1.0 - bing
       */
      HTable table = new HTable(conf, TABLE_NAME);

      Put p1 = new Put(Bytes.toBytes("com.google.www"));
      p1.add(OL_BYTES, Bytes.toBytes("www.yahoo.com"), Bytes.toBytes("ab"));

      Put p2 = new Put(Bytes.toBytes("com.google.www"));
      p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("ac"));
      p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("invalid1"));
      p2.add(OL_BYTES, Bytes.toBytes("www.google.com"), Bytes.toBytes("invalid2"));

      Put p3 = new Put(Bytes.toBytes("com.yahoo.www"));
      p3.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("bc"));
      // p3.add(OL_BYTES, Bytes.toBytes(""), Bytes.toBytes("invalid4"));

      Put p4 = new Put(Bytes.toBytes("com.bing.www"));
      // TODO: Handle below case. use apache isValid method.
      p4.add(OL_BYTES, Bytes.toBytes("http://invalidurl"), Bytes.toBytes("invalid5"));
      p4.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d));

      Put p5 = new Put(Bytes.toBytes("dummy"));
      p5.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d));

      table.put(p1);
      table.put(p2);
      table.put(p3);
      table.put(p4);
      table.put(p5);

      // Set Giraph configuration
      // now operate over HBase using Vertex I/O formats
      conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME);
      conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME);

      // Start the giraph job
      GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName());
      GiraphConfiguration giraphConf = giraphJob.getConfiguration();
      giraphConf.setZooKeeperConfiguration(cluster.getMaster().getZooKeeper().getQuorum());
      setupConfiguration(giraphJob);
      giraphConf.setComputationClass(LinkRankComputation.class);
      giraphConf.setMasterComputeClass(LinkRankVertexMasterCompute.class);
      giraphConf.setOutEdgesClass(ByteArrayEdges.class);
      giraphConf.setVertexInputFormatClass(Nutch2HostInputFormat.class);
      giraphConf.setVertexOutputFormatClass(Nutch2HostOutputFormat.class);
      giraphConf.setInt("giraph.linkRank.superstepCount", 10);
      giraphConf.setInt("giraph.linkRank.scale", 10);
      giraphConf.set("giraph.linkRank.family", "mtdt");
      giraphConf.set("giraph.linkRank.qualifier", "_hr_");
      giraphConf.setVertexInputFilterClass(HostRankVertexFilter.class);
      assertTrue(giraphJob.run(true));

      if (LOG.isInfoEnabled()) LOG.info("Giraph job successful. Checking output qualifier.");

      /** Check the results * */
      Result result;
      String key;
      byte[] calculatedScoreByte;
      HashMap expectedValues = new HashMap<String, Double>();
      expectedValues.put("com.google.www", 1.3515060339386287d);
      expectedValues.put("com.yahoo.www", 4.144902009567587d);
      expectedValues.put("com.bing.www", 9.063893290511482d);

      for (Object keyObject : expectedValues.keySet()) {
        key = keyObject.toString();
        result = table.get(new Get(key.getBytes()));
        calculatedScoreByte = result.getValue(METADATA_BYTES, HR_BYTES);
        assertNotNull(calculatedScoreByte);
        assertTrue(calculatedScoreByte.length > 0);
        Assert.assertEquals(
            "Scores are not the same",
            (Double) expectedValues.get(key),
            Bytes.toDouble(calculatedScoreByte),
            DELTA);
      }
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
      if (zkCluster != null) {
        zkCluster.shutdown();
      }
      // clean test files
      if (fs != null) {
        fs.delete(hbaseRootdir);
      }
    }
  }
  /** @param args */
  public static void main(String[] args) throws Exception {

    // Configuration conf = HBaseConfiguration.create();
    // change here if you want to change the HBase installation.
    // conf.set("hbase.master", "localhost:60000");

    Configuration config = HBaseConfiguration.create();
    // config.set("hbase.master", "localhost:60020");
    HTable table = new HTable(config, TABLE_NAME);

    // Change here if you want to change the input file.
    BufferedReader reader =
        new BufferedReader(new FileReader("/home/cloudera/ebooks/Hbase/hdi-data.csv"));

    try {
      String line = null;
      // skip first line
      reader.readLine();
      while ((line = reader.readLine()) != null) {
        try {

          String[] tokens = CSVLineParser.tokenizeCSV(line).toArray(new String[0]);
          String country = tokens[1];
          double lifeExpectacny = Double.parseDouble(tokens[3].replaceAll(",", ""));
          double meanYearsOfSchooling = Double.parseDouble(tokens[4].replaceAll(",", ""));
          double gnip = Double.parseDouble(tokens[6].replaceAll(",", ""));

          Put put = new Put(Bytes.toBytes(country));
          put.add(
              "ByCountry".getBytes(),
              Bytes.toBytes("lifeExpectacny"),
              Bytes.toBytes(lifeExpectacny));
          put.add(
              "ByCountry".getBytes(),
              Bytes.toBytes("meanYearsOfSchooling"),
              Bytes.toBytes(meanYearsOfSchooling));
          put.add("ByCountry".getBytes(), Bytes.toBytes("gnip"), Bytes.toBytes(gnip));
          table.put(put);
        } catch (Exception e) {
          e.printStackTrace();
          System.out.println("Error processing " + line + " caused by " + e.getMessage());
        }
      }
    } catch (IOException e) {
      try {
        reader.close();
      } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }
    }

    // Following print back the results
    Scan s = new Scan();
    s.addFamily(Bytes.toBytes("ByCountry"));
    ResultScanner results = table.getScanner(s);

    try {
      for (Result result : results) {
        KeyValue[] keyValuePairs = result.raw();
        System.out.println(new String(result.getRow()));
        for (KeyValue keyValue : keyValuePairs) {
          System.out.println(
              new String(keyValue.getFamily())
                  + " "
                  + new String(keyValue.getQualifier())
                  + "="
                  + Bytes.toDouble(keyValue.getValue()));
        }
      }
    } finally {
      results.close();
    }
  }
  /**
   * Converts the given value from bytes based on data type and cardinality information for the
   * given property. For 'many' or multi-valued properties, if the SDO Java type for the property is
   * not already String, the value is first converted from a String using the SDO conversion which
   * uses java.util.Arrays formatting, resulting in an array of primitive types. For non 'many' or
   * singular properties, only the HBase Bytes utility is used.
   *
   * @param targetProperty the property
   * @param value the bytes value
   * @return the converted object
   * @throws IllegalArgumentException if the given property is not a data type property
   */
  public Object fromBytes(Property targetProperty, byte[] value) {
    Object result = null;

    if (!targetProperty.getType().isDataType())
      throw new IllegalArgumentException(
          "property " + targetProperty.toString() + " is not a datatype property");

    DataType targetDataType = DataType.valueOf(targetProperty.getType().getName());

    switch (targetDataType) {
        // Data types stored as String bytes in HBase
      case String:
      case Strings:
      case URI:
      case Month:
      case MonthDay:
      case Day:
      case Time:
      case Year:
      case YearMonth:
      case YearMonthDay:
      case Duration:
        String resultStr = Bytes.toString(value);
        result = DataConverter.INSTANCE.fromString(targetProperty, resultStr);
        break;
      case Date:
        resultStr = Bytes.toString(value);
        result = DataConverter.INSTANCE.fromString(targetProperty, resultStr);
        break;
      case DateTime:
        // NOTE: remember datetime is a String Java representation in SDO 2.1
        resultStr = Bytes.toString(value);
        result = DataConverter.INSTANCE.fromString(targetProperty, resultStr);
        break;

        // Data types stored by directly converting from primitive types to bytes in HBase.
        // TODO: for these data types determine if there is a way to "delimit" multiple values yet
        // not take the extra and expensive step of first converting to delimited String.
      case Decimal:
        if (!targetProperty.isMany()) result = Bytes.toBigDecimal(value);
        else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value));
        break;
      case Bytes:
        if (!targetProperty.isMany()) result = value; // already bytes
        else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value));
        break;
      case Byte:
        if (!targetProperty.isMany()) {
          // NOTE: no toByte method as would expect as there is opposite method, see below
          // e.g. Bytes.toByte(value);
          if (value != null) {
            if (value.length > 2)
              log.warn(
                  "truncating "
                      + String.valueOf(value.length)
                      + " length byte array for target data type 'byte'");
            result = value[0];
          }
        } else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value));
        break;
      case Boolean:
        if (!targetProperty.isMany()) result = Bytes.toBoolean(value);
        else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value));
        break;
      case Character:
        if (!targetProperty.isMany()) result = Character.valueOf(Bytes.toString(value).charAt(0));
        else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value));
        break;
      case Double:
        if (!targetProperty.isMany()) result = Bytes.toDouble(value);
        else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value));
        break;
      case Float:
        if (!targetProperty.isMany()) result = Bytes.toFloat(value);
        else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value));
        break;
      case Int:
        if (!targetProperty.isMany()) result = Bytes.toInt(value);
        else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value));
        break;
      case Integer:
        if (!targetProperty.isMany()) result = new BigInteger(value);
        else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value));
        break;
      case Long:
        if (!targetProperty.isMany()) result = Bytes.toLong(value);
        else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value));
        break;
      case Short:
        if (!targetProperty.isMany()) result = Bytes.toShort(value);
        else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value));
        break;
      case Object:
        // FIXME: custom serialization?
      default:
        result = Bytes.toString(value);
        break;
    }

    return result;
  }