static { RocksDB.loadLibrary(); try { db = RocksDB.open(options, DB_PATH); } catch (RocksDBException e) { logger.error(e.getMessage()); } }
/** * Creates a new RocksDB backed state and restores from the given backup directory. After * restoring the backup directory is deleted. * * @param keySerializer The serializer for the keys. * @param namespaceSerializer The serializer for the namespace. * @param basePath The path on the local system where RocksDB data should be stored. * @param restorePath The path to a backup directory from which to restore RocksDb database. */ protected AbstractRocksDBState( TypeSerializer<K> keySerializer, TypeSerializer<N> namespaceSerializer, File basePath, String checkpointPath, String restorePath, Options options) { rocksDbPath = new File(basePath, "db" + UUID.randomUUID().toString()); hadoopConfPath = new File(basePath, HADOOP_CONF_NAME); RocksDB.loadLibrary(); // clean it, this will remove the last part of the path but RocksDB will recreate it try { if (rocksDbPath.exists()) { LOG.warn("Deleting already existing db directory {}.", rocksDbPath); FileUtils.deleteDirectory(rocksDbPath); } } catch (IOException e) { throw new RuntimeException("Error cleaning RocksDB data directory.", e); } try (BackupEngine backupEngine = BackupEngine.open(Env.getDefault(), new BackupableDBOptions(restorePath + "/"))) { backupEngine.restoreDbFromLatestBackup( rocksDbPath.getAbsolutePath(), rocksDbPath.getAbsolutePath(), new RestoreOptions(true)); } catch (RocksDBException | IllegalArgumentException e) { throw new RuntimeException("Error while restoring RocksDB state from " + restorePath, e); } finally { try { FileUtils.deleteDirectory(new File(restorePath)); } catch (IOException e) { LOG.error("Error cleaning up local restore directory " + restorePath, e); } } this.keySerializer = requireNonNull(keySerializer); this.namespaceSerializer = namespaceSerializer; this.basePath = basePath; this.checkpointPath = checkpointPath; if (!basePath.exists()) { if (!basePath.mkdirs()) { throw new RuntimeException("Could not create RocksDB data directory."); } } try { db = RocksDB.open(options, rocksDbPath.getAbsolutePath()); } catch (RocksDBException e) { throw new RuntimeException("Error while opening RocksDB instance.", e); } writeHadoopConfig(hadoopConfPath); }
/** Should only be called by one thread, and only after all accesses to the DB happened. */ @Override public void dispose() { super.dispose(); // Acquire the lock, so that no ongoing snapshots access the db during cleanup synchronized (asyncSnapshotLock) { // IMPORTANT: null reference to signal potential async checkpoint workers that the db was // disposed, as // working on the disposed object results in SEGFAULTS. Other code has to check field #db for // null // and access it in a synchronized block that locks on #dbDisposeLock. if (db != null) { for (Tuple2<ColumnFamilyHandle, StateDescriptor<?, ?>> column : kvStateInformation.values()) { column.f0.close(); } kvStateInformation.clear(); db.close(); db = null; } } try { FileUtils.deleteDirectory(instanceBasePath); } catch (IOException ioex) { LOG.info("Could not delete instace base path for RocksDB: " + instanceBasePath); } }
/** * Creates a column family handle for use with a k/v state. When restoring from a snapshot we * don't restore the individual k/v states, just the global RocksDB data base and the list of * column families. When a k/v state is first requested we check here whether we already have a * column family for that and return it or create a new one if it doesn't exist. * * <p>This also checks whether the {@link StateDescriptor} for a state matches the one that we * checkpointed, i.e. is already in the map of column families. */ protected ColumnFamilyHandle getColumnFamily(StateDescriptor<?, ?> descriptor) { Tuple2<ColumnFamilyHandle, StateDescriptor<?, ?>> stateInfo = kvStateInformation.get(descriptor.getName()); if (stateInfo != null) { if (!stateInfo.f1.equals(descriptor)) { throw new RuntimeException( "Trying to access state using wrong StateDescriptor, was " + stateInfo.f1 + " trying access with " + descriptor); } return stateInfo.f0; } ColumnFamilyDescriptor columnDescriptor = new ColumnFamilyDescriptor(descriptor.getName().getBytes(), columnOptions); try { ColumnFamilyHandle columnFamily = db.createColumnFamily(columnDescriptor); Tuple2<ColumnFamilyHandle, StateDescriptor<?, ?>> tuple = new Tuple2<ColumnFamilyHandle, StateDescriptor<?, ?>>(columnFamily, descriptor); kvStateInformation.put(descriptor.getName(), tuple); return columnFamily; } catch (RocksDBException e) { throw new RuntimeException("Error creating ColumnFamilyHandle.", e); } }
@Override public boolean setOrCreateHashBucket(String distrCode, String fileCode) { List<String> fileCodeList; try { byte[] bytes = db.get(distrCode.getBytes(RDB_DECODE)); if (bytes != null) { fileCodeList = JSON.parseObject(new String(bytes, RDB_DECODE), List.class); } else { fileCodeList = new ArrayList<String>(); } fileCodeList.add(fileCode); db.put(String.valueOf(distrCode).getBytes(), JSON.toJSONString(fileCodeList).getBytes()); return true; } catch (Exception e) { logger.error(String.format("[ERROR] caught the unexpected exception -- %s\n", e)); } return false; }
@Override public final void dispose() { db.dispose(); try { FileUtils.deleteDirectory(basePath); } catch (IOException e) { throw new RuntimeException("Error disposing RocksDB data directory.", e); } }
@Override public int delete(String table, String key) { try { db.remove(key.getBytes()); } catch (RocksDBException e) { System.out.format("[ERROR] caught the unexpceted exception -- %s\n", e); assert (false); } return 0; }
@Override public boolean putMdAttr(String fileCode, MdAttr mdAttr) { try { db.put(fileCode.getBytes(RDB_DECODE), JSON.toJSONString(mdAttr).getBytes(RDB_DECODE)); return true; } catch (Exception e) { logger.error(String.format("[ERROR] caught the unexpected exception -- %s\n", e)); } return false; }
public RocksDBKeyedStateBackend( JobID jobId, String operatorIdentifier, ClassLoader userCodeClassLoader, File instanceBasePath, DBOptions dbOptions, ColumnFamilyOptions columnFamilyOptions, TaskKvStateRegistry kvStateRegistry, TypeSerializer<K> keySerializer, int numberOfKeyGroups, KeyGroupRange keyGroupRange) throws Exception { super(kvStateRegistry, keySerializer, userCodeClassLoader, numberOfKeyGroups, keyGroupRange); this.operatorIdentifier = operatorIdentifier; this.jobId = jobId; this.columnOptions = columnFamilyOptions; this.instanceBasePath = instanceBasePath; this.instanceRocksDBPath = new File(instanceBasePath, "db"); if (!instanceBasePath.exists()) { if (!instanceBasePath.mkdirs()) { throw new RuntimeException("Could not create RocksDB data directory."); } } // clean it, this will remove the last part of the path but RocksDB will recreate it try { if (instanceRocksDBPath.exists()) { LOG.warn("Deleting already existing db directory {}.", instanceRocksDBPath); FileUtils.deleteDirectory(instanceRocksDBPath); } } catch (IOException e) { throw new RuntimeException("Error cleaning RocksDB data directory.", e); } List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>(1); // RocksDB seems to need this... columnFamilyDescriptors.add(new ColumnFamilyDescriptor("default".getBytes())); List<ColumnFamilyHandle> columnFamilyHandles = new ArrayList<>(1); try { db = RocksDB.open( dbOptions, instanceRocksDBPath.getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles); } catch (RocksDBException e) { throw new RuntimeException("Error while opening RocksDB instance.", e); } keyGroupPrefixBytes = getNumberOfKeyGroups() > (Byte.MAX_VALUE + 1) ? 2 : 1; kvStateInformation = new HashMap<>(); }
@Override public List<MdAttr> getDirMdAttrList(String distrCode) { List<MdAttr> mdAttrs = new ArrayList<MdAttr>(); try { byte[] bytes = db.get(distrCode.getBytes(RDB_DECODE)); if (bytes != null) { List<String> fileCodeList = JSON.parseObject(new String(bytes, RDB_DECODE), List.class); for (String fileCode : fileCodeList) { byte[] attrBytes = db.get(fileCode.getBytes(RDB_DECODE)); if (attrBytes != null) { mdAttrs.add(JSON.parseObject(new String(attrBytes, RDB_DECODE), MdAttr.class)); } } return mdAttrs; } } catch (Exception e) { logger.error(String.format("[ERROR] caught the unexpected exception -- %s\n", e)); } return null; }
@Override public int insert(String table, String key, HashMap<String, ByteIterator> values) { try { byte[] serialized = serialize(values); db.put(key.getBytes(), serialized); } catch (RocksDBException e) { System.out.format("[ERROR] caught the unexpceted exception -- %s\n", e); assert (false); } return 0; }
/** * Creates a new RocksDB backed state. * * @param keySerializer The serializer for the keys. * @param namespaceSerializer The serializer for the namespace. * @param basePath The path on the local system where RocksDB data should be stored. */ protected AbstractRocksDBState( TypeSerializer<K> keySerializer, TypeSerializer<N> namespaceSerializer, File basePath, String checkpointPath, Options options) { rocksDbPath = new File(basePath, "db" + UUID.randomUUID().toString()); hadoopConfPath = new File(basePath, HADOOP_CONF_NAME); this.keySerializer = requireNonNull(keySerializer); this.namespaceSerializer = namespaceSerializer; this.basePath = basePath; this.checkpointPath = checkpointPath; RocksDB.loadLibrary(); if (!basePath.exists()) { if (!basePath.mkdirs()) { throw new RuntimeException("Could not create RocksDB data directory."); } } // clean it, this will remove the last part of the path but RocksDB will recreate it try { if (rocksDbPath.exists()) { LOG.warn("Deleting already existing db directory {}.", rocksDbPath); FileUtils.deleteDirectory(rocksDbPath); } } catch (IOException e) { throw new RuntimeException("Error cleaning RocksDB data directory.", e); } try { db = RocksDB.open(options, rocksDbPath.getAbsolutePath()); } catch (RocksDBException e) { throw new RuntimeException("Error while opening RocksDB instance.", e); } writeHadoopConfig(hadoopConfPath); }
@Override public MdAttr getFileMdAttr(String fileCode) { try { byte[] attrBytes = db.get(fileCode.getBytes(RDB_DECODE)); if (attrBytes != null) { return JSON.parseObject(new String(attrBytes, RDB_DECODE), MdAttr.class); } } catch (Exception e) { logger.error(String.format("[ERROR] caught the unexpected exception -- %s\n", e)); } return null; }
@Override public final void clear() { ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputViewStreamWrapper out = new DataOutputViewStreamWrapper(baos); try { writeKeyAndNamespace(out); byte[] key = baos.toByteArray(); db.remove(key); } catch (IOException | RocksDBException e) { throw new RuntimeException("Error while removing entry from RocksDB", e); } }
@Override public int read( String table, String key, Set<String> fields, HashMap<String, ByteIterator> result) { try { byte[] value = db.get(key.getBytes()); HashMap<String, ByteIterator> deserialized = deserialize(value); result.putAll(deserialized); } catch (RocksDBException e) { System.out.format("[ERROR] caught the unexpceted exception -- %s\n", e); assert (false); } return 0; }
@Override public void cleanup() throws DBException { super.cleanup(); try { String str = db.getProperty("rocksdb.stats"); System.out.println(str); } catch (RocksDBException e) { throw new DBException("Error while trying to print RocksDB statistics"); } System.out.println("Cleaning up RocksDB database..."); // db.close(); // options.dispose(); // Why does it cause error? : "pointer being freed was not allocated" }
@Test public void snapshots() throws RocksDBException { RocksDB db = null; Options options = null; ReadOptions readOptions = null; try { options = new Options(); options.setCreateIfMissing(true); db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()); db.put("key".getBytes(), "value".getBytes()); // Get new Snapshot of database Snapshot snapshot = db.getSnapshot(); readOptions = new ReadOptions(); // set snapshot in ReadOptions readOptions.setSnapshot(snapshot); // retrieve key value pair assertThat(new String(db.get("key".getBytes()))).isEqualTo("value"); // retrieve key value pair created before // the snapshot was made assertThat(new String(db.get(readOptions, "key".getBytes()))).isEqualTo("value"); // add new key/value pair db.put("newkey".getBytes(), "newvalue".getBytes()); // using no snapshot the latest db entries // will be taken into account assertThat(new String(db.get("newkey".getBytes()))).isEqualTo("newvalue"); // snapshopot was created before newkey assertThat(db.get(readOptions, "newkey".getBytes())).isNull(); // Retrieve snapshot from read options Snapshot sameSnapshot = readOptions.snapshot(); readOptions.setSnapshot(sameSnapshot); // results must be the same with new Snapshot // instance using the same native pointer assertThat(new String(db.get(readOptions, "key".getBytes()))).isEqualTo("value"); // update key value pair to newvalue db.put("key".getBytes(), "newvalue".getBytes()); // read with previously created snapshot will // read previous version of key value pair assertThat(new String(db.get(readOptions, "key".getBytes()))).isEqualTo("value"); // read for newkey using the snapshot must be // null assertThat(db.get(readOptions, "newkey".getBytes())).isNull(); // setting null to snapshot in ReadOptions leads // to no Snapshot being used. readOptions.setSnapshot(null); assertThat(new String(db.get(readOptions, "newkey".getBytes()))).isEqualTo("newvalue"); // release Snapshot db.releaseSnapshot(snapshot); } finally { if (db != null) { db.close(); } if (options != null) { options.dispose(); } if (readOptions != null) { readOptions.dispose(); } } }
/** For backwards compatibility, remove again later! */ @Deprecated private void restoreOldSavepointKeyedState(Collection<KeyGroupsStateHandle> restoreState) throws Exception { if (restoreState.isEmpty()) { return; } Preconditions.checkState(1 == restoreState.size(), "Only one element expected here."); HashMap<String, RocksDBStateBackend.FinalFullyAsyncSnapshot> namedStates = InstantiationUtil.deserializeObject( restoreState.iterator().next().openInputStream(), userCodeClassLoader); Preconditions.checkState(1 == namedStates.size(), "Only one element expected here."); DataInputView inputView = namedStates.values().iterator().next().stateHandle.getState(userCodeClassLoader); // clear k/v state information before filling it kvStateInformation.clear(); // first get the column family mapping int numColumns = inputView.readInt(); Map<Byte, StateDescriptor> columnFamilyMapping = new HashMap<>(numColumns); for (int i = 0; i < numColumns; i++) { byte mappingByte = inputView.readByte(); ObjectInputStream ooIn = new InstantiationUtil.ClassLoaderObjectInputStream( new DataInputViewStream(inputView), userCodeClassLoader); StateDescriptor stateDescriptor = (StateDescriptor) ooIn.readObject(); columnFamilyMapping.put(mappingByte, stateDescriptor); // this will fill in the k/v state information getColumnFamily(stateDescriptor); } // try and read until EOF try { // the EOFException will get us out of this... while (true) { byte mappingByte = inputView.readByte(); ColumnFamilyHandle handle = getColumnFamily(columnFamilyMapping.get(mappingByte)); byte[] keyAndNamespace = BytePrimitiveArraySerializer.INSTANCE.deserialize(inputView); ByteArrayInputStreamWithPos bis = new ByteArrayInputStreamWithPos(keyAndNamespace); K reconstructedKey = keySerializer.deserialize(new DataInputViewStreamWrapper(bis)); int len = bis.getPosition(); int keyGroup = (byte) KeyGroupRangeAssignment.assignToKeyGroup(reconstructedKey, numberOfKeyGroups); if (keyGroupPrefixBytes == 1) { // copy and override one byte (42) between key and namespace System.arraycopy(keyAndNamespace, 0, keyAndNamespace, 1, len); keyAndNamespace[0] = (byte) keyGroup; } else { byte[] largerKey = new byte[1 + keyAndNamespace.length]; // write key-group largerKey[0] = (byte) ((keyGroup >> 8) & 0xFF); largerKey[1] = (byte) (keyGroup & 0xFF); // write key System.arraycopy(keyAndNamespace, 0, largerKey, 2, len); // skip one byte (42), write namespace System.arraycopy( keyAndNamespace, 1 + len, largerKey, 2 + len, keyAndNamespace.length - len - 1); keyAndNamespace = largerKey; } byte[] value = BytePrimitiveArraySerializer.INSTANCE.deserialize(inputView); db.put(handle, keyAndNamespace, value); } } catch (EOFException e) { // expected } }
@Override public void init() throws DBException { System.out.println("Initializing RocksDB..."); String db_path = DB_PATH; options = new Options(); options .setCreateIfMissing(true) .createStatistics() .setWriteBufferSize(8 * SizeUnit.KB) .setMaxWriteBufferNumber(3) .setMaxBackgroundCompactions(10) .setCompressionType(CompressionType.SNAPPY_COMPRESSION) .setCompactionStyle(CompactionStyle.UNIVERSAL); Statistics stats = options.statisticsPtr(); assert (options.createIfMissing() == true); assert (options.writeBufferSize() == 8 * SizeUnit.KB); assert (options.maxWriteBufferNumber() == 3); assert (options.maxBackgroundCompactions() == 10); assert (options.compressionType() == CompressionType.SNAPPY_COMPRESSION); assert (options.compactionStyle() == CompactionStyle.UNIVERSAL); assert (options.memTableFactoryName().equals("SkipListFactory")); options.setMemTableConfig( new HashSkipListMemTableConfig() .setHeight(4) .setBranchingFactor(4) .setBucketCount(2000000)); assert (options.memTableFactoryName().equals("HashSkipListRepFactory")); options.setMemTableConfig(new HashLinkedListMemTableConfig().setBucketCount(100000)); assert (options.memTableFactoryName().equals("HashLinkedListRepFactory")); options.setMemTableConfig(new VectorMemTableConfig().setReservedSize(10000)); assert (options.memTableFactoryName().equals("VectorRepFactory")); options.setMemTableConfig(new SkipListMemTableConfig()); assert (options.memTableFactoryName().equals("SkipListFactory")); // options.setTableFormatConfig(new PlainTableConfig()); // // Plain-Table requires mmap read // options.setAllowMmapReads(true); // assert(options.tableFactoryName().equals("PlainTable")); // // options.setRateLimiterConfig(new GenericRateLimiterConfig(10000000, // 10000, 10)); // options.setRateLimiterConfig(new GenericRateLimiterConfig(10000000)); // // Filter bloomFilter = new BloomFilter(10); // BlockBasedTableConfig table_options = new BlockBasedTableConfig(); // table_options.setBlockCacheSize(64 * SizeUnit.KB) // .setFilter(bloomFilter) // .setCacheNumShardBits(6) // .setBlockSizeDeviation(5) // .setBlockRestartInterval(10) // .setCacheIndexAndFilterBlocks(true) // .setHashIndexAllowCollision(false) // .setBlockCacheCompressedSize(64 * SizeUnit.KB) // .setBlockCacheCompressedNumShardBits(10); // // assert(table_options.blockCacheSize() == 64 * SizeUnit.KB); // assert(table_options.cacheNumShardBits() == 6); // assert(table_options.blockSizeDeviation() == 5); // assert(table_options.blockRestartInterval() == 10); // assert(table_options.cacheIndexAndFilterBlocks() == true); // assert(table_options.hashIndexAllowCollision() == false); // assert(table_options.blockCacheCompressedSize() == 64 * SizeUnit.KB); // assert(table_options.blockCacheCompressedNumShardBits() == 10); // // options.setTableFormatConfig(table_options); // assert(options.tableFactoryName().equals("BlockBasedTable")); try { db = RocksDB.open(options, db_path); db.put("hello".getBytes(), "world".getBytes()); byte[] value = db.get("hello".getBytes()); assert ("world".equals(new String(value))); String str = db.getProperty("rocksdb.stats"); assert (str != null && str != ""); } catch (RocksDBException e) { System.out.format("[ERROR] caught the unexpceted exception -- %s\n", e); assert (db == null); assert (false); } System.out.println("Initializing RocksDB is over"); }
/** * Constructor. * * @param db RocksDB database to snapshot * @throws NullPointerException if {@code db} is null */ public SnapshotRocksDBKVStore(RocksDB db) { this(db, db.getSnapshot()); }