@SuppressWarnings({"unchecked", "deprecation"}) public static Object fromBytes(Schema schema, byte[] val) throws IOException { Type type = schema.getType(); switch (type) { case ENUM: return AvroUtils.getEnumValue(schema, val[0]); case STRING: return new Utf8(Bytes.toString(val)); case BYTES: return ByteBuffer.wrap(val); case INT: return Bytes.toInt(val); case LONG: return Bytes.toLong(val); case FLOAT: return Bytes.toFloat(val); case DOUBLE: return Bytes.toDouble(val); case BOOLEAN: return val[0] != 0; case RECORD: // TODO: This is TOO SLOW... OPTIMIZE reader.setSchema(schema); reader.setExpected(schema); BinaryDecoder decoder = new BinaryDecoder(new ByteArrayInputStream(val)); return reader.read(null, decoder); default: throw new RuntimeException("Unknown type: " + type); } }
/** * Builds Object from Bytes Picked from Hbase. * * @param b * @param klass * @return */ public static Object toObject(byte[] b, Class<?> klass) { if (klass.isAssignableFrom(String.class)) { return Bytes.toString(b); } else if (klass.equals(int.class) || klass.isAssignableFrom(Integer.class)) { return Bytes.toInt(b); } else if (klass.equals(long.class) || klass.isAssignableFrom(Long.class)) { return Bytes.toLong(b); } else if (klass.equals(boolean.class) || klass.isAssignableFrom(Boolean.class)) { return Bytes.toBoolean(b); } else if (klass.equals(double.class) || klass.isAssignableFrom(Double.class)) { return Bytes.toDouble(b); } else if (klass.equals(float.class) || klass.isAssignableFrom(Float.class)) { return Bytes.toFloat(b); } else if (klass.equals(short.class) || klass.isAssignableFrom(Short.class)) { return Bytes.toShort(b); } else if (klass.equals(BigDecimal.class)) { return Bytes.toBigDecimal(b); } else { throw new MetaModelException("Could Not find a suitable Type for " + klass.getName()); } }
@SuppressWarnings("unchecked") public static <K> K fromBytes(Class<K> clazz, byte[] val) { if (clazz.equals(Byte.TYPE) || clazz.equals(Byte.class)) { return (K) Byte.valueOf(val[0]); } else if (clazz.equals(Boolean.TYPE) || clazz.equals(Boolean.class)) { return (K) Boolean.valueOf(val[0] == 0 ? false : true); } else if (clazz.equals(Short.TYPE) || clazz.equals(Short.class)) { return (K) Short.valueOf(Bytes.toShort(val)); } else if (clazz.equals(Integer.TYPE) || clazz.equals(Integer.class)) { return (K) Integer.valueOf(Bytes.toInt(val)); } else if (clazz.equals(Long.TYPE) || clazz.equals(Long.class)) { return (K) Long.valueOf(Bytes.toLong(val)); } else if (clazz.equals(Float.TYPE) || clazz.equals(Float.class)) { return (K) Float.valueOf(Bytes.toFloat(val)); } else if (clazz.equals(Double.TYPE) || clazz.equals(Double.class)) { return (K) Double.valueOf(Bytes.toDouble(val)); } else if (clazz.equals(String.class)) { return (K) Bytes.toString(val); } else if (clazz.equals(Utf8.class)) { return (K) new Utf8(Bytes.toString(val)); } throw new RuntimeException("Can't parse data as class: " + clazz); }
public void setProbability(byte[] prob) { this.prob = Bytes.toDouble(prob); }
/** Read a {@code double} value from the buffer {@code buff}. */ public double decodeDouble(byte[] buff, int offset) { double val = Bytes.toDouble(buff, offset); return val; }
@Override public Double decode(PositionedByteRange src) { double val = Bytes.toDouble(src.getBytes(), src.getOffset() + src.getPosition()); skip(src); return val; }
@Test public void testHostRank() throws Exception { if (System.getProperty("prop.mapred.job.tracker") != null) { if (LOG.isInfoEnabled()) LOG.info("testHBaseInputOutput: Ignore this test if not local mode."); return; } File jarTest = new File(System.getProperty("prop.jarLocation")); if (!jarTest.exists()) { fail( "Could not find Giraph jar at " + "location specified by 'prop.jarLocation'. " + "Make sure you built the main Giraph artifact?."); } MiniHBaseCluster cluster = null; MiniZooKeeperCluster zkCluster = null; FileSystem fs = null; try { // using the restart method allows us to avoid having the hbase // root directory overwritten by /home/$username zkCluster = testUtil.startMiniZKCluster(); testUtil.restartHBaseCluster(2); cluster = testUtil.getMiniHBaseCluster(); final byte[] OL_BYTES = Bytes.toBytes("ol"); final byte[] S_BYTES = Bytes.toBytes("s"); final byte[] METADATA_BYTES = Bytes.toBytes("mtdt"); final byte[] HR_BYTES = Bytes.toBytes("_hr_"); final byte[] TAB = Bytes.toBytes(TABLE_NAME); Configuration conf = cluster.getConfiguration(); HTableDescriptor desc = new HTableDescriptor(TAB); desc.addFamily(new HColumnDescriptor(OL_BYTES)); desc.addFamily(new HColumnDescriptor(S_BYTES)); desc.addFamily(new HColumnDescriptor(METADATA_BYTES)); HBaseAdmin hbaseAdmin = new HBaseAdmin(conf); if (hbaseAdmin.isTableAvailable(TABLE_NAME)) { hbaseAdmin.disableTable(TABLE_NAME); hbaseAdmin.deleteTable(TABLE_NAME); } hbaseAdmin.createTable(desc); /** * Enter the initial data (a,b), (b,c), (a,c) a = 1.0 - google b = 1.0 - yahoo c = 1.0 - bing */ HTable table = new HTable(conf, TABLE_NAME); Put p1 = new Put(Bytes.toBytes("com.google.www")); p1.add(OL_BYTES, Bytes.toBytes("www.yahoo.com"), Bytes.toBytes("ab")); Put p2 = new Put(Bytes.toBytes("com.google.www")); p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("ac")); p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("invalid1")); p2.add(OL_BYTES, Bytes.toBytes("www.google.com"), Bytes.toBytes("invalid2")); Put p3 = new Put(Bytes.toBytes("com.yahoo.www")); p3.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("bc")); // p3.add(OL_BYTES, Bytes.toBytes(""), Bytes.toBytes("invalid4")); Put p4 = new Put(Bytes.toBytes("com.bing.www")); // TODO: Handle below case. use apache isValid method. p4.add(OL_BYTES, Bytes.toBytes("http://invalidurl"), Bytes.toBytes("invalid5")); p4.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d)); Put p5 = new Put(Bytes.toBytes("dummy")); p5.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d)); table.put(p1); table.put(p2); table.put(p3); table.put(p4); table.put(p5); // Set Giraph configuration // now operate over HBase using Vertex I/O formats conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME); conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME); // Start the giraph job GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName()); GiraphConfiguration giraphConf = giraphJob.getConfiguration(); giraphConf.setZooKeeperConfiguration(cluster.getMaster().getZooKeeper().getQuorum()); setupConfiguration(giraphJob); giraphConf.setComputationClass(LinkRankComputation.class); giraphConf.setMasterComputeClass(LinkRankVertexMasterCompute.class); giraphConf.setOutEdgesClass(ByteArrayEdges.class); giraphConf.setVertexInputFormatClass(Nutch2HostInputFormat.class); giraphConf.setVertexOutputFormatClass(Nutch2HostOutputFormat.class); giraphConf.setInt("giraph.linkRank.superstepCount", 10); giraphConf.setInt("giraph.linkRank.scale", 10); giraphConf.set("giraph.linkRank.family", "mtdt"); giraphConf.set("giraph.linkRank.qualifier", "_hr_"); giraphConf.setVertexInputFilterClass(HostRankVertexFilter.class); assertTrue(giraphJob.run(true)); if (LOG.isInfoEnabled()) LOG.info("Giraph job successful. Checking output qualifier."); /** Check the results * */ Result result; String key; byte[] calculatedScoreByte; HashMap expectedValues = new HashMap<String, Double>(); expectedValues.put("com.google.www", 1.3515060339386287d); expectedValues.put("com.yahoo.www", 4.144902009567587d); expectedValues.put("com.bing.www", 9.063893290511482d); for (Object keyObject : expectedValues.keySet()) { key = keyObject.toString(); result = table.get(new Get(key.getBytes())); calculatedScoreByte = result.getValue(METADATA_BYTES, HR_BYTES); assertNotNull(calculatedScoreByte); assertTrue(calculatedScoreByte.length > 0); Assert.assertEquals( "Scores are not the same", (Double) expectedValues.get(key), Bytes.toDouble(calculatedScoreByte), DELTA); } } finally { if (cluster != null) { cluster.shutdown(); } if (zkCluster != null) { zkCluster.shutdown(); } // clean test files if (fs != null) { fs.delete(hbaseRootdir); } } }
/** @param args */ public static void main(String[] args) throws Exception { // Configuration conf = HBaseConfiguration.create(); // change here if you want to change the HBase installation. // conf.set("hbase.master", "localhost:60000"); Configuration config = HBaseConfiguration.create(); // config.set("hbase.master", "localhost:60020"); HTable table = new HTable(config, TABLE_NAME); // Change here if you want to change the input file. BufferedReader reader = new BufferedReader(new FileReader("/home/cloudera/ebooks/Hbase/hdi-data.csv")); try { String line = null; // skip first line reader.readLine(); while ((line = reader.readLine()) != null) { try { String[] tokens = CSVLineParser.tokenizeCSV(line).toArray(new String[0]); String country = tokens[1]; double lifeExpectacny = Double.parseDouble(tokens[3].replaceAll(",", "")); double meanYearsOfSchooling = Double.parseDouble(tokens[4].replaceAll(",", "")); double gnip = Double.parseDouble(tokens[6].replaceAll(",", "")); Put put = new Put(Bytes.toBytes(country)); put.add( "ByCountry".getBytes(), Bytes.toBytes("lifeExpectacny"), Bytes.toBytes(lifeExpectacny)); put.add( "ByCountry".getBytes(), Bytes.toBytes("meanYearsOfSchooling"), Bytes.toBytes(meanYearsOfSchooling)); put.add("ByCountry".getBytes(), Bytes.toBytes("gnip"), Bytes.toBytes(gnip)); table.put(put); } catch (Exception e) { e.printStackTrace(); System.out.println("Error processing " + line + " caused by " + e.getMessage()); } } } catch (IOException e) { try { reader.close(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } // Following print back the results Scan s = new Scan(); s.addFamily(Bytes.toBytes("ByCountry")); ResultScanner results = table.getScanner(s); try { for (Result result : results) { KeyValue[] keyValuePairs = result.raw(); System.out.println(new String(result.getRow())); for (KeyValue keyValue : keyValuePairs) { System.out.println( new String(keyValue.getFamily()) + " " + new String(keyValue.getQualifier()) + "=" + Bytes.toDouble(keyValue.getValue())); } } } finally { results.close(); } }
/** * Converts the given value from bytes based on data type and cardinality information for the * given property. For 'many' or multi-valued properties, if the SDO Java type for the property is * not already String, the value is first converted from a String using the SDO conversion which * uses java.util.Arrays formatting, resulting in an array of primitive types. For non 'many' or * singular properties, only the HBase Bytes utility is used. * * @param targetProperty the property * @param value the bytes value * @return the converted object * @throws IllegalArgumentException if the given property is not a data type property */ public Object fromBytes(Property targetProperty, byte[] value) { Object result = null; if (!targetProperty.getType().isDataType()) throw new IllegalArgumentException( "property " + targetProperty.toString() + " is not a datatype property"); DataType targetDataType = DataType.valueOf(targetProperty.getType().getName()); switch (targetDataType) { // Data types stored as String bytes in HBase case String: case Strings: case URI: case Month: case MonthDay: case Day: case Time: case Year: case YearMonth: case YearMonthDay: case Duration: String resultStr = Bytes.toString(value); result = DataConverter.INSTANCE.fromString(targetProperty, resultStr); break; case Date: resultStr = Bytes.toString(value); result = DataConverter.INSTANCE.fromString(targetProperty, resultStr); break; case DateTime: // NOTE: remember datetime is a String Java representation in SDO 2.1 resultStr = Bytes.toString(value); result = DataConverter.INSTANCE.fromString(targetProperty, resultStr); break; // Data types stored by directly converting from primitive types to bytes in HBase. // TODO: for these data types determine if there is a way to "delimit" multiple values yet // not take the extra and expensive step of first converting to delimited String. case Decimal: if (!targetProperty.isMany()) result = Bytes.toBigDecimal(value); else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value)); break; case Bytes: if (!targetProperty.isMany()) result = value; // already bytes else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value)); break; case Byte: if (!targetProperty.isMany()) { // NOTE: no toByte method as would expect as there is opposite method, see below // e.g. Bytes.toByte(value); if (value != null) { if (value.length > 2) log.warn( "truncating " + String.valueOf(value.length) + " length byte array for target data type 'byte'"); result = value[0]; } } else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value)); break; case Boolean: if (!targetProperty.isMany()) result = Bytes.toBoolean(value); else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value)); break; case Character: if (!targetProperty.isMany()) result = Character.valueOf(Bytes.toString(value).charAt(0)); else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value)); break; case Double: if (!targetProperty.isMany()) result = Bytes.toDouble(value); else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value)); break; case Float: if (!targetProperty.isMany()) result = Bytes.toFloat(value); else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value)); break; case Int: if (!targetProperty.isMany()) result = Bytes.toInt(value); else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value)); break; case Integer: if (!targetProperty.isMany()) result = new BigInteger(value); else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value)); break; case Long: if (!targetProperty.isMany()) result = Bytes.toLong(value); else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value)); break; case Short: if (!targetProperty.isMany()) result = Bytes.toShort(value); else result = DataConverter.INSTANCE.fromString(targetProperty, Bytes.toString(value)); break; case Object: // FIXME: custom serialization? default: result = Bytes.toString(value); break; } return result; }