public void restart(int siteId) throws Exception { EnvironmentConfig ec = makeBasicConfig(); int p = config.getMyPort(siteId); ReplicationManagerSiteConfig dbsite = new ReplicationManagerSiteConfig("localhost", p); dbsite.setLocalSite(true); dbsite.setLegacy(true); ec.addReplicationManagerSite(dbsite); p = config.getOtherPort(siteId); dbsite = new ReplicationManagerSiteConfig("localhost", p); dbsite.setLegacy(true); ec.addReplicationManagerSite(dbsite); MyEventHandler monitor = new MyEventHandler(); ec.setEventHandler(monitor); File clientDir = new File(config.getBaseDir(), "dir" + siteId); assertTrue(clientDir.exists()); Environment client = new Environment(clientDir, ec); client.setReplicationConfig(ReplicationConfig.STRICT_2SITE, false); envs[siteId] = client; monitors[siteId] = monitor; // we want to make sure we don't retry from here after the // initial failure, because we want to make the old master // connect to us. client.setReplicationTimeout(ReplicationTimeoutType.CONNECTION_RETRY, Integer.MAX_VALUE); client.replicationManagerStart(3, ReplicationManagerStartPolicy.REP_CLIENT); }
public void joinExistingClient(int site, boolean useHB) throws Exception { EnvironmentConfig ec = makeBasicConfig(); int p = config.getMyPort(site); ReplicationManagerSiteConfig dbsite = new ReplicationManagerSiteConfig("localhost", p); dbsite.setLocalSite(true); dbsite.setLegacy(true); ec.addReplicationManagerSite(dbsite); p = config.getOtherPort(site); dbsite = new ReplicationManagerSiteConfig("localhost", p); dbsite.setLegacy(true); ec.addReplicationManagerSite(dbsite); MyEventHandler monitor = new MyEventHandler(); monitors[site] = monitor; ec.setEventHandler(monitor); File clientDir = new File(config.getBaseDir(), "dir" + site); assertTrue(clientDir.exists()); Environment client = new Environment(clientDir, ec); client.setReplicationConfig(ReplicationConfig.STRICT_2SITE, false); if (useHB) { client.setReplicationTimeout(ReplicationTimeoutType.HEARTBEAT_SEND, 3000000); client.setReplicationTimeout(ReplicationTimeoutType.HEARTBEAT_MONITOR, 6000000); } envs[site] = client; client.setReplicationTimeout(ReplicationTimeoutType.CONNECTION_RETRY, 1000000); // be impatient client.replicationManagerStart(3, ReplicationManagerStartPolicy.REP_CLIENT); monitor.await(); assertTrue(client.getReplicationStats(StatsConfig.DEFAULT).getStartupComplete()); }
public void run() { String homedirName = baseDirName + threadNumber; TestUtils.removeDir(homedirName); try { homedir = new File(homedirName); homedir.mkdir(); } catch (Exception e) { TestUtils.DEBUGOUT( 2, "Warning: initialization had a problem creating a clean directory.\n" + e); } try { homedir = new File(homedirName); } catch (NullPointerException npe) { // can't really happen :) } TestUtils.DEBUGOUT(1, "Creating worker: " + threadNumber); envConfig = new EnvironmentConfig(); envConfig.setErrorStream(TestUtils.getErrorStream()); envConfig.setErrorPrefix("RepmgrElectionTest test(" + threadNumber + ")"); envConfig.setAllowCreate(true); envConfig.setRunRecovery(true); envConfig.setThreaded(true); envConfig.setInitializeLocking(true); envConfig.setInitializeLogging(true); envConfig.setInitializeCache(true); envConfig.setTransactional(true); envConfig.setTxnNoSync(true); envConfig.setInitializeReplication(true); envConfig.setVerboseReplication(false); ReplicationManagerSiteConfig localConfig = new ReplicationManagerSiteConfig(address, basePort + threadNumber); localConfig.setLocalSite(true); envConfig.addReplicationManagerSite(localConfig); envConfig.setReplicationPriority(priorities[threadNumber]); envConfig.setEventHandler(this); envConfig.setReplicationManagerAckPolicy(ReplicationManagerAckPolicy.ALL); if (masterThreadIndex >= 0) { // If we already have the master, then set it as the bootstrap helper, // otherwise, set local site as new master. ReplicationManagerSiteConfig remoteConfig = new ReplicationManagerSiteConfig(address, basePort + masterThreadIndex); remoteConfig.setBootstrapHelper(true); envConfig.addReplicationManagerSite(remoteConfig); } try { dbenv = new Environment(homedir, envConfig); } catch (FileNotFoundException e) { fail("Unexpected FNFE in standard environment creation." + e); } catch (DatabaseException dbe) { fail("Unexpected database exception came from environment create." + dbe); } try { // If we do not have master, then set local site as new master. if (masterThreadIndex == -1) dbenv.replicationManagerStart(NUM_WORKER_THREADS, ReplicationManagerStartPolicy.REP_MASTER); else dbenv.replicationManagerStart(NUM_WORKER_THREADS, ReplicationManagerStartPolicy.REP_CLIENT); } catch (DatabaseException dbe) { fail("Unexpected database exception came from replicationManagerStart." + dbe); } TestUtils.DEBUGOUT(1, "Started replication site: " + threadNumber); lastSiteStarted = true; try { java.lang.Thread.sleep(1000 * (1 + threadNumber)); } catch (InterruptedException ie) { } if (masterThreadIndex != -1) { // Wait for "Start-up done" for each client, then add next client. ReplicationStats rs = null; int i = 0; do { try { java.lang.Thread.sleep(2000); } catch (InterruptedException e) { } try { rs = dbenv.getReplicationStats(StatsConfig.DEFAULT); } catch (DatabaseException dbe) { dbe.printStackTrace(); fail("Unexpected database exception came from getReplicationStats." + dbe); } } while (!rs.getStartupComplete() && i++ < maxLoopWait); assertTrue(rs.getStartupComplete()); } }
public int init(RepConfig config) throws DatabaseException { int ret = 0; appConfig = config; EnvironmentConfig envConfig = new EnvironmentConfig(); envConfig.setErrorStream(System.err); envConfig.setErrorPrefix(RepConfig.progname); envConfig.setReplicationManagerLocalSite(appConfig.getThisHost()); for (RepRemoteHost host = appConfig.getFirstOtherHost(); host != null; host = appConfig.getNextOtherHost()) { envConfig.replicationManagerAddRemoteSite(host.getAddress(), host.isPeer()); } if (appConfig.totalSites > 0) envConfig.setReplicationNumSites(appConfig.totalSites); /* * Set replication group election priority for this environment. * An election first selects the site with the most recent log * records as the new master. If multiple sites have the most * recent log records, the site with the highest priority value * is selected as master. */ envConfig.setReplicationPriority(appConfig.priority); envConfig.setCacheSize(RepConfig.CACHESIZE); envConfig.setTxnNoSync(true); envConfig.setEventHandler(new RepQuoteEventHandler()); /* * Set the policy that determines how master and client sites * handle acknowledgement of replication messages needed for * permanent records. The default policy of "quorum" requires only * a quorum of electable peers sufficient to ensure a permanent * record remains durable if an election is held. The "all" option * requires all clients to acknowledge a permanent replication * message instead. */ envConfig.setReplicationManagerAckPolicy(appConfig.ackPolicy); /* * Set the threshold for the minimum and maximum time the client * waits before requesting retransmission of a missing message. * Base these values on the performance and load characteristics * of the master and client host platforms as well as the round * trip message time. */ envConfig.setReplicationRequestMin(20000); envConfig.setReplicationRequestMax(500000); /* * Configure deadlock detection to ensure that any deadlocks * are broken by having one of the conflicting lock requests * rejected. DB_LOCK_DEFAULT uses the lock policy specified * at environment creation time or DB_LOCK_RANDOM if none was * specified. */ envConfig.setLockDetectMode(LockDetectMode.DEFAULT); envConfig.setAllowCreate(true); envConfig.setRunRecovery(true); envConfig.setThreaded(true); envConfig.setInitializeReplication(true); envConfig.setInitializeLocking(true); envConfig.setInitializeLogging(true); envConfig.setInitializeCache(true); envConfig.setTransactional(true); envConfig.setVerboseReplication(appConfig.verbose); try { dbenv = new RepQuoteEnvironment(appConfig.getHome(), envConfig); } catch (FileNotFoundException e) { System.err.println("FileNotFound exception: " + e); System.err.println("Ensure that the environment directory is pre-created."); ret = 1; } if (appConfig.bulk) dbenv.setReplicationConfig(ReplicationConfig.BULK, true); /* * Configure heartbeat timeouts so that repmgr monitors the * health of the TCP connection. Master sites broadcast a heartbeat * at the frequency specified by the DB_REP_HEARTBEAT_SEND timeout. * Client sites wait for message activity the length of the * DB_REP_HEARTBEAT_MONITOR timeout before concluding that the * connection to the master is lost. The DB_REP_HEARTBEAT_MONITOR * timeout should be longer than the DB_REP_HEARTBEAT_SEND timeout. */ dbenv.setReplicationTimeout(ReplicationTimeoutType.HEARTBEAT_SEND, 5000000); dbenv.setReplicationTimeout(ReplicationTimeoutType.HEARTBEAT_MONITOR, 10000000); /* The following base replication features may also be useful to your * application. See Berkeley DB documentation for more details. * - Master leases: Provide stricter consistency for data reads * on a master site. * - Timeouts: Customize the amount of time Berkeley DB waits * for such things as an election to be concluded or a master * lease to be granted. * - Delayed client synchronization: Manage the master site's * resources by spreading out resource-intensive client * synchronizations. * - Blocked client operations: Return immediately with an error * instead of waiting indefinitely if a client operation is * blocked by an ongoing client synchronization. * * The following repmgr features may also be useful to your * application. See Berkeley DB documentation for more details. * - Two-site strict majority rule - In a two-site replication * group, require both sites to be available to elect a new * master. * - Timeouts - Customize the amount of time repmgr waits * for such things as waiting for acknowledgements or attempting * to reconnect to other sites. * - Site list - return a list of sites currently known to repmgr. */ /* Start checkpoint and log archive support threads. */ ckpThr = new CheckpointThread(dbenv); ckpThr.start(); lgaThr = new LogArchiveThread(dbenv, envConfig); lgaThr.start(); /* Start replication manager. */ dbenv.replicationManagerStart(3, appConfig.startPolicy); return ret; }
@Test public void testDraining() throws Exception { EnvironmentConfig masterConfig = makeBasicConfig(); masterConfig.setReplicationLimit(100000000); ReplicationManagerSiteConfig site = new ReplicationManagerSiteConfig("localhost", masterPort); site.setLocalSite(true); site.setLegacy(true); masterConfig.addReplicationManagerSite(site); site = new ReplicationManagerSiteConfig("localhost", clientPort); site.setLegacy(true); masterConfig.addReplicationManagerSite(site); site = new ReplicationManagerSiteConfig("localhost", client2Port); site.setLegacy(true); masterConfig.addReplicationManagerSite(site); site = new ReplicationManagerSiteConfig("localhost", client3Port); site.setLegacy(true); masterConfig.addReplicationManagerSite(site); Environment master = new Environment(mkdir("master"), masterConfig); setTimeouts(master); // Prevent connection retries, so that all connections // originate from clients master.setReplicationTimeout(ReplicationTimeoutType.CONNECTION_RETRY, Integer.MAX_VALUE); master.replicationManagerStart(2, ReplicationManagerStartPolicy.REP_MASTER); DatabaseConfig dc = new DatabaseConfig(); dc.setTransactional(true); dc.setAllowCreate(true); dc.setType(DatabaseType.BTREE); dc.setPageSize(4096); Database db = master.openDatabase(null, "test.db", null, dc); DatabaseEntry key = new DatabaseEntry(); DatabaseEntry value = new DatabaseEntry(); value.setData(data); for (int i = 0; ((BtreeStats) db.getStats(null, null)).getPageCount() < 500; i++) { String k = "The record number is: " + i; key.setData(k.getBytes()); db.put(null, key, value); } // tell fiddler to stop reading once it sees a PAGE message Socket s = new Socket("localhost", mgrPort); OutputStreamWriter w = new OutputStreamWriter(s.getOutputStream()); String path1 = "{" + masterPort + "," + clientPort + "}"; // looks like {6000,6001} w.write("{init," + path1 + ",page_clog}\r\n"); w.flush(); BufferedReader br = new BufferedReader(new InputStreamReader(s.getInputStream())); br.readLine(); assertEquals("ok", br.readLine()); // create client // EnvironmentConfig ec = makeBasicConfig(); site = new ReplicationManagerSiteConfig("localhost", clientPort); site.setLocalSite(true); site.setLegacy(true); ec.addReplicationManagerSite(site); site = new ReplicationManagerSiteConfig("localhost", masterPort); site.setLegacy(true); ec.addReplicationManagerSite(site); site = new ReplicationManagerSiteConfig("localhost", client2Port); site.setLegacy(true); ec.addReplicationManagerSite(site); site = new ReplicationManagerSiteConfig("localhost", client3Port); site.setLegacy(true); ec.addReplicationManagerSite(site); Environment client = new Environment(mkdir("client"), ec); setTimeouts(client); client.replicationManagerStart(1, ReplicationManagerStartPolicy.REP_CLIENT); // wait til it gets stuck Thread.sleep(5000); // FIXME // Do the same for another client, because the master has 2 // msg processing threads. (It's no longer possible to // configure just 1.) String path2 = "{" + masterPort + "," + client2Port + "}"; w.write("{init," + path2 + ",page_clog}\r\n"); w.flush(); br = new BufferedReader(new InputStreamReader(s.getInputStream())); br.readLine(); assertEquals("ok", br.readLine()); ec = makeBasicConfig(); site = new ReplicationManagerSiteConfig("localhost", client2Port); site.setLocalSite(true); site.setLegacy(true); ec.addReplicationManagerSite(site); site = new ReplicationManagerSiteConfig("localhost", masterPort); site.setLegacy(true); ec.addReplicationManagerSite(site); site = new ReplicationManagerSiteConfig("localhost", clientPort); site.setLegacy(true); ec.addReplicationManagerSite(site); site = new ReplicationManagerSiteConfig("localhost", client3Port); site.setLegacy(true); ec.addReplicationManagerSite(site); Environment client2 = new Environment(mkdir("client2"), ec); setTimeouts(client2); client2.replicationManagerStart(1, ReplicationManagerStartPolicy.REP_CLIENT); // wait til it gets stuck Thread.sleep(5000); // With the connection stuck, the master cannot write out log // records for new "live" transactions. Knowing we didn't // write the record, we should not bother waiting for an ack // that cannot possibly arrive; so we should simply return // quickly. The duration should be very quick, but anything // less than the ack timeout indicates correct behavior (in // case this test runs on a slow, overloaded system). // long startTime = System.currentTimeMillis(); key.setData("one extra record".getBytes()); db.put(null, key, value); long duration = System.currentTimeMillis() - startTime; assertTrue("txn duration: " + duration, duration < 29000); System.out.println("txn duration: " + duration); db.close(); // Tell fiddler to close the connections. That should trigger // us to abandon the timeout. Then create another client and // see that it can complete its internal init quickly. Since // we have limited threads at the master, this demonstrates // that they were abandoned. // path1 = "{" + clientPort + "," + masterPort + "}"; // looks like {6001,6000} w.write("{" + path1 + ",shutdown}\r\n"); w.flush(); assertEquals("ok", br.readLine()); path2 = "{" + client2Port + "," + masterPort + "}"; // looks like {6001,6000} w.write("{" + path2 + ",shutdown}\r\n"); w.flush(); assertEquals("ok", br.readLine()); ec = makeBasicConfig(); site = new ReplicationManagerSiteConfig("localhost", client3Port); site.setLocalSite(true); site.setLegacy(true); ec.addReplicationManagerSite(site); site = new ReplicationManagerSiteConfig("localhost", masterPort); site.setLegacy(true); ec.addReplicationManagerSite(site); site = new ReplicationManagerSiteConfig("localhost", clientPort); site.setLegacy(true); ec.addReplicationManagerSite(site); site = new ReplicationManagerSiteConfig("localhost", client2Port); site.setLegacy(true); ec.addReplicationManagerSite(site); EventHandler clientMonitor = new EventHandler(); ec.setEventHandler(clientMonitor); Environment client3 = new Environment(mkdir("client3"), ec); setTimeouts(client3); startTime = System.currentTimeMillis(); client3.replicationManagerStart(2, ReplicationManagerStartPolicy.REP_CLIENT); clientMonitor.await(); duration = System.currentTimeMillis() - startTime; assertTrue("sync duration: " + duration, duration < 20000); // 20 seconds should be plenty client3.close(); master.close(); w.write("shutdown\r\n"); w.flush(); assertEquals("ok", br.readLine()); s.close(); }