/** * Commit all changes and persist them to disk. This method does nothing if there are no unsaved * changes, otherwise it increments the current version and stores the data (for file based * storages). * * <p>At most one storage operation may run at any time. * * @return the new version (incremented if there were changes) */ private synchronized long commitAndSave() { if (closed) { return currentVersion; } if (map.isInMemory()) { throw DataUtils.newIllegalStateException( DataUtils.ERROR_WRITING_FAILED, "This is an in-memory storage"); } if (map.isReadOnly()) { throw DataUtils.newIllegalStateException( DataUtils.ERROR_WRITING_FAILED, "This storage is read-only"); } if (!hasUnsavedChanges()) { return currentVersion; } try { currentStoreVersion = currentVersion; return save(); } catch (IllegalStateException e) { panic(e); return -1; } finally { currentStoreVersion = -1; } }
/** * Remove a page. * * @param pos the position of the page * @param memory the memory usage */ void removePage(long pos, int memory) { // we need to keep temporary pages, // to support reading old versions and rollback if (pos == 0) { // the page was not yet stored: // just using "unsavedMemory -= memory" could result in negative // values, because in some cases a page is allocated, but never // stored, so we need to use max unsavedMemory = Math.max(0, unsavedMemory - memory); return; } // This could result in a cache miss if the operation is rolled back, // but we don't optimize for rollback. // We could also keep the page in the cache, as somebody // could still read it (reading the old version). if (cache != null) { if (DataUtils.getPageType(pos) == DataUtils.PAGE_TYPE_LEAF) { // keep nodes in the cache, because they are still used for // garbage collection cache.remove(pos); } } BTreeChunk chunk = getChunk(pos); long maxLengthLive = DataUtils.getPageMaxLength(pos); // synchronize, because pages could be freed concurrently synchronized (chunk) { chunk.maxLenLive -= maxLengthLive; chunk.pageCountLive--; chunk.changed = true; } }
/** * Create a BLOB value from a stream. * * @param in the input stream * @param length the number of characters to read, or -1 for no limit * @param handler the data handler * @return the lob value */ private static ValueLob createBlob(InputStream in, long length, DataHandler handler) { try { if (handler == null) { byte[] data = IOUtils.readBytesAndClose(in, (int) length); return createSmallLob(Value.BLOB, data); } long remaining = Long.MAX_VALUE; boolean compress = handler.getLobCompressionAlgorithm(Value.BLOB) != null; if (length >= 0 && length < remaining) { remaining = length; } int len = getBufferSize(handler, compress, remaining); byte[] buff; if (len >= Integer.MAX_VALUE) { buff = IOUtils.readBytesAndClose(in, -1); len = buff.length; } else { buff = DataUtils.newBytes(len); len = IOUtils.readFully(in, buff, len); } if (len <= handler.getMaxLengthInplaceLob()) { byte[] small = DataUtils.newBytes(len); System.arraycopy(buff, 0, small, 0, len); return ValueLob.createSmallLob(Value.BLOB, small); } ValueLob lob = new ValueLob(Value.BLOB, null); lob.createFromStream(buff, len, in, remaining, handler); return lob; } catch (IOException e) { throw DbException.convertIOException(e, null); } }
/** * Get the chunk for the given position. * * @param pos the position * @return the chunk */ private BTreeChunk getChunk(long pos) { int chunkId = DataUtils.getPageChunkId(pos); BTreeChunk c = chunks.get(chunkId); if (c == null) c = readChunkHeader(chunkId); if (c == null) throw DataUtils.newIllegalStateException( DataUtils.ERROR_FILE_CORRUPT, "Chunk {0} not found", chunkId); return c; }
private void writeChunkHeader(BTreeChunk chunk) { StringBuilder buff = chunk.asStringBuilder(); byte[] bytes = buff.toString().getBytes(DataUtils.LATIN); int checksum = DataUtils.getFletcher32(bytes, bytes.length); DataUtils.appendMap(buff, "fletcher", checksum); buff.append("\n"); bytes = buff.toString().getBytes(DataUtils.LATIN); ByteBuffer header = ByteBuffer.allocate(CHUNK_HEADER_SIZE); header.put(bytes); header.position(BLOCK_SIZE); header.put(bytes); header.rewind(); write(chunk.fileStorage, 0, header); }
private void expand(int plus) { byte[] d = DataUtils.newBytes((data.length + plus) * 2); // must copy everything, because pos could be 0 and data may be // still required System.arraycopy(data, 0, d, 0, data.length); data = d; }
/** * Open an old, stored version of a map. * * @param version the version * @return the read-only map */ @SuppressWarnings("unchecked") <T extends BTreeMap<?, ?>> T openMapVersion(long version) { BTreeChunk c = getChunkForVersion(version); DataUtils.checkArgument(c != null, "Unknown version {0}", version); BTreeMap<?, ?> m = map.openReadOnly(); m.setRootPos(c.rootPagePos, version); return (T) m; }
/** * Read a page. * * @param pos the page position * @return the page */ BTreePage readPage(long pos) { if (pos == 0) { throw DataUtils.newIllegalStateException(DataUtils.ERROR_FILE_CORRUPT, "Position 0"); } BTreePage p = cache == null ? null : cache.get(pos); if (p == null) { BTreeChunk c = getChunk(pos); long filePos = CHUNK_HEADER_SIZE + DataUtils.getPageOffset(pos); if (filePos < 0) { throw DataUtils.newIllegalStateException( DataUtils.ERROR_FILE_CORRUPT, "Negative position {0}", filePos); } long maxPos = c.blockCount * BLOCK_SIZE; p = BTreePage.read(c.fileStorage, pos, map, filePos, maxPos); cachePage(pos, p, p.getMemory()); } return p; }
/** * Revert to the beginning of the given version. All later changes (stored or not) are forgotten. * All maps that were created later are closed. A rollback to a version before the last stored * version is immediately persisted. Rollback to version 0 means all data is removed. * * @param version the version to revert to */ public synchronized void rollbackTo(long version) { checkOpen(); if (version == 0) { // special case: remove all data map.close(); chunks.clear(); currentVersion = version; return; } DataUtils.checkArgument(isKnownVersion(version), "Unknown version {0}", version); map.internalRollbackTo(version); boolean loadFromFile = false; // find out which chunks to remove, // and which is the newest chunk to keep // (the chunk list can have gaps) ArrayList<Integer> remove = new ArrayList<Integer>(); BTreeChunk keep = null; for (BTreeChunk c : chunks.values()) { if (c.version > version) { remove.add(c.id); } else if (keep == null || keep.id < c.id) { keep = c; } } if (remove.size() > 0) { // remove the youngest first, so we don't create gaps // (in case we remove many chunks) Collections.sort(remove, Collections.reverseOrder()); map.removeUnusedOldVersions(); loadFromFile = true; for (int id : remove) { BTreeChunk c = chunks.remove(id); c.fileStorage.close(); c.fileStorage.delete(); } lastChunkId = keep.id; setLastChunk(keep); } if (createVersion >= version) { map.close(); } else { if (loadFromFile) { map.setRootPos(lastChunk.rootPagePos, lastChunk.version); } } currentVersion = version; }
/** * Store the lob data to a file if the size of the buffer is larger than the maximum size for an * in-place lob. * * @param h the data handler */ public void convertToFileIfRequired(DataHandler h) { try { if (small != null && small.length > h.getMaxLengthInplaceLob()) { boolean compress = h.getLobCompressionAlgorithm(type) != null; int len = getBufferSize(h, compress, Long.MAX_VALUE); int tabId = tableId; if (type == Value.BLOB) { createFromStream(DataUtils.newBytes(len), 0, getInputStream(), Long.MAX_VALUE, h); } else { createFromReader(new char[len], 0, getReader(), Long.MAX_VALUE, h); } Value v2 = link(h, tabId); if (SysProperties.CHECK && v2 != this) { DbException.throwInternalError(); } } } catch (IOException e) { throw DbException.convertIOException(e, null); } }
private synchronized BTreeChunk readChunkHeader(int chunkId) { FileStorage fileStorage = getFileStorage(chunkId); BTreeChunk chunk = null; ByteBuffer chunkHeaderBlocks = fileStorage.readFully(0, CHUNK_HEADER_SIZE); byte[] buff = new byte[BLOCK_SIZE]; for (int i = 0; i <= BLOCK_SIZE; i += BLOCK_SIZE) { chunkHeaderBlocks.get(buff); try { String s = new String(buff, 0, BLOCK_SIZE, DataUtils.LATIN).trim(); HashMap<String, String> m = DataUtils.parseMap(s); int blockSize = DataUtils.readHexInt(m, "blockSize", BLOCK_SIZE); if (blockSize != BLOCK_SIZE) { throw DataUtils.newIllegalStateException( DataUtils.ERROR_UNSUPPORTED_FORMAT, "Block size {0} is currently not supported", blockSize); } int check = DataUtils.readHexInt(m, "fletcher", 0); m.remove("fletcher"); s = s.substring(0, s.lastIndexOf("fletcher") - 1); byte[] bytes = s.getBytes(DataUtils.LATIN); int checksum = DataUtils.getFletcher32(bytes, bytes.length); if (check != checksum) { continue; } chunk = BTreeChunk.fromString(s); break; } catch (Exception e) { continue; } } if (chunk == null) { throw DataUtils.newIllegalStateException( DataUtils.ERROR_FILE_CORRUPT, "Storage header is corrupt: {0}", fileStorage); } chunk.fileStorage = fileStorage; chunks.put(chunk.id, chunk); return chunk; }
private void checkOpen() { if (closed) { throw DataUtils.newIllegalStateException( DataUtils.ERROR_CLOSED, "This storage is closed", panicException); } }
/** * Read a value. * * @return the value */ public Value readValue() { int type = data[pos++] & 255; switch (type) { case Value.NULL: return ValueNull.INSTANCE; case BOOLEAN_TRUE: return ValueBoolean.get(true); case BOOLEAN_FALSE: return ValueBoolean.get(false); case INT_NEG: return ValueInt.get(-readVarInt()); case Value.INT: return ValueInt.get(readVarInt()); case LONG_NEG: return ValueLong.get(-readVarLong()); case Value.LONG: return ValueLong.get(readVarLong()); case Value.BYTE: return ValueByte.get(readByte()); case Value.SHORT: return ValueShort.get(readShortInt()); case DECIMAL_0_1: return (ValueDecimal) ValueDecimal.ZERO; case DECIMAL_0_1 + 1: return (ValueDecimal) ValueDecimal.ONE; case DECIMAL_SMALL_0: return ValueDecimal.get(BigDecimal.valueOf(readVarLong())); case DECIMAL_SMALL: { int scale = readVarInt(); return ValueDecimal.get(BigDecimal.valueOf(readVarLong(), scale)); } case Value.DECIMAL: { int scale = readVarInt(); int len = readVarInt(); byte[] buff = DataUtils.newBytes(len); read(buff, 0, len); BigInteger b = new BigInteger(buff); return ValueDecimal.get(new BigDecimal(b, scale)); } case LOCAL_DATE: { return ValueDate.fromDateValue(readVarLong()); } case Value.DATE: { long x = readVarLong() * MILLIS_PER_MINUTE; return ValueDate.get(new Date(DateTimeUtils.getTimeUTCWithoutDst(x))); } case LOCAL_TIME: { long nanos = readVarLong() * 1000000 + readVarLong(); return ValueTime.fromNanos(nanos); } case Value.TIME: // need to normalize the year, month and day return ValueTime.get(new Time(DateTimeUtils.getTimeUTCWithoutDst(readVarLong()))); case LOCAL_TIMESTAMP: { long dateValue = readVarLong(); long nanos = readVarLong() * 1000000 + readVarLong(); return ValueTimestamp.fromDateValueAndNanos(dateValue, nanos); } case Value.TIMESTAMP: { Timestamp ts = new Timestamp(DateTimeUtils.getTimeUTCWithoutDst(readVarLong())); ts.setNanos(readVarInt()); return ValueTimestamp.get(ts); } case Value.BYTES: { int len = readVarInt(); byte[] b = DataUtils.newBytes(len); read(b, 0, len); return ValueBytes.getNoCopy(b); } case Value.JAVA_OBJECT: { int len = readVarInt(); byte[] b = DataUtils.newBytes(len); read(b, 0, len); return ValueJavaObject.getNoCopy(null, b); } case Value.UUID: return ValueUuid.get(readLong(), readLong()); case Value.STRING: return ValueString.get(readString()); case Value.STRING_IGNORECASE: return ValueStringIgnoreCase.get(readString()); case Value.STRING_FIXED: return ValueStringFixed.get(readString()); case FLOAT_0_1: return ValueFloat.get(0); case FLOAT_0_1 + 1: return ValueFloat.get(1); case DOUBLE_0_1: return ValueDouble.get(0); case DOUBLE_0_1 + 1: return ValueDouble.get(1); case Value.DOUBLE: return ValueDouble.get(Double.longBitsToDouble(Long.reverse(readVarLong()))); case Value.FLOAT: return ValueFloat.get(Float.intBitsToFloat(Integer.reverse(readVarInt()))); case Value.BLOB: case Value.CLOB: { int smallLen = readVarInt(); if (smallLen >= 0) { byte[] small = DataUtils.newBytes(smallLen); read(small, 0, smallLen); return ValueLob.createSmallLob(type, small); } else { int tableId = readVarInt(); long lobId = readVarLong(); long precision = readVarLong(); ValueLob lob = ValueLob.create(type, handler, tableId, lobId, null, precision); return lob; } } case Value.ARRAY: { int len = readVarInt(); Value[] list = new Value[len]; for (int i = 0; i < len; i++) { list[i] = readValue(); } return ValueArray.get(list); } case Value.RESULT_SET: { SimpleResultSet rs = new SimpleResultSet(); int columns = readVarInt(); for (int i = 0; i < columns; i++) { rs.addColumn(readString(), readVarInt(), readVarInt(), readVarInt()); } while (true) { if (readByte() == 0) { break; } Object[] o = new Object[columns]; for (int i = 0; i < columns; i++) { o[i] = readValue().getObject(); } rs.addRow(o); } return ValueResultSet.get(rs); } default: if (type >= INT_0_15 && type < INT_0_15 + 16) { return ValueInt.get(type - INT_0_15); } else if (type >= LONG_0_7 && type < LONG_0_7 + 8) { return ValueLong.get(type - LONG_0_7); } else if (type >= BYTES_0_31 && type < BYTES_0_31 + 32) { int len = type - BYTES_0_31; byte[] b = DataUtils.newBytes(len); read(b, 0, len); return ValueBytes.getNoCopy(b); } else if (type >= STRING_0_31 && type < STRING_0_31 + 32) { return ValueString.get(readString(type - STRING_0_31)); } throw DbException.get(ErrorCode.FILE_CORRUPTED_1, "type: " + type); } }