private void checkDataConsistencyWithMaster( URI availableMasterId, Master master, NeoStoreXaDataSource nioneoDataSource, TransactionIdStore transactionIdStore) throws IOException { long myLastCommittedTx = transactionIdStore.getLastCommittingTransactionId(); int myMaster = -1; long myChecksum = 0; TransactionMetadataCache.TransactionMetadata metadata = nioneoDataSource .getDependencyResolver() .resolveDependency(LogicalTransactionStore.class) .getMetadataFor(myLastCommittedTx); myMaster = metadata.getMasterId(); myChecksum = metadata.getChecksum(); HandshakeResult handshake; try (Response<HandshakeResult> response = master.handshake(myLastCommittedTx, nioneoDataSource.getStoreId())) { handshake = response.response(); requestContextFactory.setEpoch(handshake.epoch()); } catch (BranchedDataException e) { // Rethrow wrapped in a branched data exception on our side, to clarify where the problem // originates. throw new BranchedDataException( "The database stored on this machine has diverged from that " + "of the master. This will be automatically resolved.", e); } catch (RuntimeException e) { // Checked exceptions will be wrapped as the cause if this was a serialized // server-side exception if (e.getCause() instanceof MissingLogDataException) { /* * This means the master was unable to find a log entry for the txid we just asked. This * probably means the thing we asked for is too old or too new. Anyway, since it doesn't * have the tx it is better if we just throw our store away and ask for a new copy. Next * time around it shouldn't have to even pass from here. */ throw new StoreOutOfDateException( "The master is missing the log required to complete the " + "consistency check", e.getCause()); } throw e; } if (myMaster != -1 && (myMaster != handshake.txAuthor() || myChecksum != handshake.txChecksum())) { String msg = "The cluster contains two logically different versions of the database.. This will be " + "automatically resolved. Details: I (machineId:" + config.get(ClusterSettings.server_id) + ") think machineId for txId (" + myLastCommittedTx + ") is " + myMaster + ", but master (machineId:" + getServerId(availableMasterId) + ") says that it's " + handshake; throw new BranchedDataException(msg); } msgLog.logMessage( "Master id for last committed tx ok with highestTxId=" + myLastCommittedTx + " with masterId=" + myMaster, true); }
private void checkDataConsistency( RequestContextFactory requestContextFactory, NeoStoreXaDataSource nioneoDataSource, URI masterUri) throws Throwable { // Must be called under lock on XaDataSourceManager LifeSupport checkConsistencyLife = new LifeSupport(); TransactionIdStore txIdStore = null; try { MasterClient checkConsistencyMaster = newMasterClient(masterUri, nioneoDataSource.getStoreId(), checkConsistencyLife); checkConsistencyLife.start(); console.log("Checking store consistency with master"); txIdStore = nioneoDataSource.getDependencyResolver().resolveDependency(TransactionIdStore.class); checkDataConsistencyWithMaster( masterUri, checkConsistencyMaster, nioneoDataSource, txIdStore); console.log("Store is consistent"); /* * Pull updates, since the store seems happy and everything. No matter how far back we are, this is just * one thread doing the pulling, while the guard is up. This will prevent a race between all transactions * that may start the moment the database becomes available, where all of them will pull the same txs from * the master but eventually only one will get to apply them. */ console.log("Catching up with master"); resolver .resolveDependency(TransactionCommittingResponseUnpacker.class) .unpackResponse( checkConsistencyMaster.pullUpdates(requestContextFactory.newRequestContext())); console.log("Now consistent with master"); } catch (NoSuchLogVersionException e) { msgLog.logMessage( "Cannot catch up to master by pulling updates, because I cannot find the archived " + "logical log file that has the transaction I would start from. I'm going to copy the whole " + "store from the master instead."); try { stopServicesAndHandleBranchedStore(config.get(HaSettings.branched_data_policy)); } catch (Throwable throwable) { msgLog.warn("Failed preparing for copying the store from the master instance", throwable); } throw e; } catch (StoreUnableToParticipateInClusterException upe) { console.log( "The store is inconsistent. Will treat it as branched and fetch a new one from the master"); msgLog.warn( "Current store is unable to participate in the cluster; fetching new store from master", upe); try { stopServicesAndHandleBranchedStore(config.get(HaSettings.branched_data_policy)); } catch (IOException e) { msgLog.warn("Failed while trying to handle branched data", e); } throw upe; } catch (MismatchingStoreIdException e) { console.log( "The store does not represent the same database as master. Will remove and fetch a new one from master"); if (txIdStore.getLastCommittingTransactionId() == 0) { msgLog.warn("Found and deleting empty store with mismatching store id " + e.getMessage()); stopServicesAndHandleBranchedStore(BranchedDataPolicy.keep_none); } else { msgLog.error("Store cannot participate in cluster due to mismatching store IDs"); } throw e; } finally { checkConsistencyLife.shutdown(); } }