/** * Get the next row from the list. * * @return the next row */ public Row next() { Row r; if (file == null) { r = list.get(index++); } else { if (listIndex >= list.size()) { list.clear(); listIndex = 0; Data buff = rowBuff; buff.reset(); int min = Constants.FILE_BLOCK_SIZE; file.readFully(buff.getBytes(), 0, min); int len = buff.readInt() * Constants.FILE_BLOCK_SIZE; buff.checkCapacity(len); if (len - min > 0) { file.readFully(buff.getBytes(), min, len - min); } while (true) { r = readRow(buff); if (r == null) { break; } list.add(r); } } index++; r = list.get(listIndex++); } return r; }
@Override public int addRows(ArrayList<Value[]> rows) { if (sort != null) { sort.sort(rows); } Data buff = rowBuff; long start = file.getFilePointer(); ByteArrayOutputStream buffer = new ByteArrayOutputStream(); int bufferLen = 0; for (Value[] row : rows) { buff.reset(); buff.writeInt(0); for (int j = 0; j < columnCount; j++) { Value v = row[j]; buff.checkCapacity(buff.getValueLen(v)); buff.writeValue(v); } buff.fillAligned(); int len = buff.length(); buff.setInt(0, len); if (maxBufferSize > 0) { buffer.write(buff.getBytes(), 0, len); bufferLen += len; if (bufferLen > maxBufferSize) { byte[] data = buffer.toByteArray(); buffer.reset(); file.write(data, 0, data.length); bufferLen = 0; } } else { file.write(buff.getBytes(), 0, len); } } if (bufferLen > 0) { byte[] data = buffer.toByteArray(); file.write(data, 0, data.length); } if (sort != null) { ResultDiskTape tape = new ResultDiskTape(); tape.start = start; tape.end = file.getFilePointer(); tapes.add(tape); } else { mainTape.end = file.getFilePointer(); } rowCount += rows.size(); return rowCount; }
private void flushBuffer(Data buff) { buff.checkCapacity(1); buff.writeByte((byte) 0); buff.fillAligned(); buff.setInt(0, buff.length() / Constants.FILE_BLOCK_SIZE); file.write(buff.getBytes(), 0, buff.length()); }
private void readRow(ResultDiskTape tape) { int min = Constants.FILE_BLOCK_SIZE; Data buff = rowBuff; buff.reset(); file.readFully(buff.getBytes(), 0, min); int len = buff.readInt(); buff.checkCapacity(len); if (len - min > 0) { file.readFully(buff.getBytes(), min, len - min); } tape.pos += len; Value[] row = new Value[columnCount]; for (int k = 0; k < columnCount; k++) { row[k] = buff.readValue(); } tape.buffer.add(row); }
/** * Set the state of an in-doubt transaction. * * @param sessionId the session * @param pageId the page where the commit was prepared * @param commit whether the transaction should be committed */ void setInDoubtTransactionState(int sessionId, int pageId, boolean commit) { PageStreamData d = (PageStreamData) store.getPage(pageId); d.initWrite(); Data buff = store.createData(); buff.writeByte((byte) (commit ? COMMIT : ROLLBACK)); buff.writeVarInt(sessionId); byte[] bytes = buff.getBytes(); d.write(bytes, 0, bytes.length); bytes = new byte[d.getRemaining()]; d.write(bytes, 0, bytes.length); d.write(); }
/** * Add an undo entry to the log. The page data is only written once until the next checkpoint. * * @param pageId the page id * @param page the old page data */ void addUndo(int pageId, Data page) { if (undo.get(pageId) || freeing) { return; } if (trace.isDebugEnabled()) { trace.debug("log undo " + pageId); } if (SysProperties.CHECK) { if (page == null) { DbException.throwInternalError("Undo entry not written"); } } undo.set(pageId); undoAll.set(pageId); Data buffer = getBuffer(); buffer.writeByte((byte) UNDO); buffer.writeVarInt(pageId); if (page.getBytes()[0] == 0) { buffer.writeVarInt(1); } else { int pageSize = store.getPageSize(); if (COMPRESS_UNDO) { int size = compress.compress(page.getBytes(), pageSize, compressBuffer, 0); if (size < pageSize) { buffer.writeVarInt(size); buffer.checkCapacity(size); buffer.write(compressBuffer, 0, size); } else { buffer.writeVarInt(0); buffer.checkCapacity(pageSize); buffer.write(page.getBytes(), 0, pageSize); } } else { buffer.writeVarInt(0); buffer.checkCapacity(pageSize); buffer.write(page.getBytes(), 0, pageSize); } } write(buffer); }
/** * Read a row from an input stream. * * @param in the input stream * @param data a temporary buffer * @return the row */ public static Row readRow(DataReader in, Data data) throws IOException { long key = in.readVarLong(); int len = in.readVarInt(); data.reset(); data.checkCapacity(len); in.readFully(data.getBytes(), len); int columnCount = data.readVarInt(); Value[] values = new Value[columnCount]; for (int i = 0; i < columnCount; i++) { values[i] = data.readValue(); } Row row = new Row(values, Row.MEMORY_CALCULATE); row.setKey(key); return row; }
/** * A record is added to a table, or removed from a table. * * @param session the session * @param tableId the table id * @param row the row to add * @param add true if the row is added, false if it is removed */ void logAddOrRemoveRow(Session session, int tableId, Row row, boolean add) { if (trace.isDebugEnabled()) { trace.debug( "log " + (add ? "+" : "-") + " s: " + session.getId() + " table: " + tableId + " row: " + row); } session.addLogPos(logSectionId, logPos); logPos++; Data data = dataBuffer; data.reset(); int columns = row.getColumnCount(); data.writeVarInt(columns); data.checkCapacity(row.getByteCount(data)); if (session.isRedoLogBinaryEnabled()) { for (int i = 0; i < columns; i++) { data.writeValue(row.getValue(i)); } } else { for (int i = 0; i < columns; i++) { Value v = row.getValue(i); if (v.getType() == Value.BYTES) { data.writeValue(ValueNull.INSTANCE); } else { data.writeValue(v); } } } Data buffer = getBuffer(); buffer.writeByte((byte) (add ? ADD : REMOVE)); buffer.writeVarInt(session.getId()); buffer.writeVarInt(tableId); buffer.writeVarLong(row.getKey()); if (add) { buffer.writeVarInt(data.length()); buffer.checkCapacity(data.length()); buffer.write(data.getBytes(), 0, data.length()); } write(buffer); }
/** * Read the next bytes from the buffer. * * @param startPos the position in the data page * @param buff the target buffer * @param off the offset in the target buffer * @param len the number of bytes to read */ void read(int startPos, byte[] buff, int off, int len) { System.arraycopy(data.getBytes(), startPos, buff, off, len); }
private void write(Data data) { pageOut.write(data.getBytes(), 0, data.length()); data.reset(); }
/** * Run one recovery stage. There are three recovery stages: 0: only the undo steps are run * (restoring the state before the last checkpoint). 1: the pages that are used by the transaction * log are allocated. 2: the committed operations are re-applied. * * @param stage the recovery stage * @return whether the transaction log was empty */ boolean recover(int stage) { if (trace.isDebugEnabled()) { trace.debug("log recover stage: " + stage); } if (stage == RECOVERY_STAGE_ALLOCATE) { PageInputStream in = new PageInputStream(store, logKey, firstTrunkPage, firstDataPage); usedLogPages = in.allocateAllPages(); in.close(); return true; } PageInputStream pageIn = new PageInputStream(store, logKey, firstTrunkPage, firstDataPage); DataReader in = new DataReader(pageIn); int logId = 0; Data data = store.createData(); boolean isEmpty = true; try { int pos = 0; while (true) { int x = in.readByte(); if (x < 0) { break; } pos++; isEmpty = false; if (x == UNDO) { int pageId = in.readVarInt(); int size = in.readVarInt(); if (size == 0) { in.readFully(data.getBytes(), store.getPageSize()); } else if (size == 1) { // empty Arrays.fill(data.getBytes(), 0, store.getPageSize(), (byte) 0); } else { in.readFully(compressBuffer, size); try { compress.expand(compressBuffer, 0, size, data.getBytes(), 0, store.getPageSize()); } catch (ArrayIndexOutOfBoundsException e) { DbException.convertToIOException(e); } } if (stage == RECOVERY_STAGE_UNDO) { if (!undo.get(pageId)) { if (trace.isDebugEnabled()) { trace.debug("log undo {0}", pageId); } store.writePage(pageId, data); undo.set(pageId); undoAll.set(pageId); } else { if (trace.isDebugEnabled()) { trace.debug("log undo skip {0}", pageId); } } } } else if (x == ADD) { int sessionId = in.readVarInt(); int tableId = in.readVarInt(); Row row = readRow(in, data); if (stage == RECOVERY_STAGE_UNDO) { store.allocateIfIndexRoot(pos, tableId, row); } else if (stage == RECOVERY_STAGE_REDO) { if (isSessionCommitted(sessionId, logId, pos)) { if (trace.isDebugEnabled()) { trace.debug("log redo + table: " + tableId + " s: " + sessionId + " " + row); } store.redo(tableId, row, true); } else { if (trace.isDebugEnabled()) { trace.debug("log ignore s: " + sessionId + " + table: " + tableId + " " + row); } } } } else if (x == REMOVE) { int sessionId = in.readVarInt(); int tableId = in.readVarInt(); long key = in.readVarLong(); if (stage == RECOVERY_STAGE_REDO) { if (isSessionCommitted(sessionId, logId, pos)) { if (trace.isDebugEnabled()) { trace.debug("log redo - table: " + tableId + " s:" + sessionId + " key: " + key); } store.redoDelete(tableId, key); } else { if (trace.isDebugEnabled()) { trace.debug("log ignore s: " + sessionId + " - table: " + tableId + " " + key); } } } } else if (x == TRUNCATE) { int sessionId = in.readVarInt(); int tableId = in.readVarInt(); if (stage == RECOVERY_STAGE_REDO) { if (isSessionCommitted(sessionId, logId, pos)) { if (trace.isDebugEnabled()) { trace.debug("log redo truncate table: " + tableId); } store.redoTruncate(tableId); } else { if (trace.isDebugEnabled()) { trace.debug("log ignore s: " + sessionId + " truncate table: " + tableId); } } } } else if (x == PREPARE_COMMIT) { int sessionId = in.readVarInt(); String transaction = in.readString(); if (trace.isDebugEnabled()) { trace.debug("log prepare commit " + sessionId + " " + transaction + " pos: " + pos); } if (stage == RECOVERY_STAGE_UNDO) { int page = pageIn.getDataPage(); setPrepareCommit(sessionId, page, transaction); } } else if (x == ROLLBACK) { int sessionId = in.readVarInt(); if (trace.isDebugEnabled()) { trace.debug("log rollback " + sessionId + " pos: " + pos); } // ignore - this entry is just informational } else if (x == COMMIT) { int sessionId = in.readVarInt(); if (trace.isDebugEnabled()) { trace.debug("log commit " + sessionId + " pos: " + pos); } if (stage == RECOVERY_STAGE_UNDO) { setLastCommitForSession(sessionId, logId, pos); } } else if (x == NOOP) { // nothing to do } else if (x == CHECKPOINT) { logId++; } else if (x == FREE_LOG) { int count = in.readVarInt(); for (int i = 0; i < count; i++) { int pageId = in.readVarInt(); if (stage == RECOVERY_STAGE_REDO) { if (!usedLogPages.get(pageId)) { store.free(pageId, false); } } } } else { if (trace.isDebugEnabled()) { trace.debug("log end"); break; } } } } catch (DbException e) { if (e.getErrorCode() == ErrorCode.FILE_CORRUPTED_1) { trace.debug("log recovery stopped"); } else { throw e; } } catch (IOException e) { trace.debug("log recovery completed"); } undo = new BitField(); if (stage == RECOVERY_STAGE_REDO) { usedLogPages = null; } return isEmpty; }