Beispiel #1
0
  public void restart(int siteId) throws Exception {
    EnvironmentConfig ec = makeBasicConfig();

    int p = config.getMyPort(siteId);
    ReplicationManagerSiteConfig dbsite = new ReplicationManagerSiteConfig("localhost", p);
    dbsite.setLocalSite(true);
    dbsite.setLegacy(true);
    ec.addReplicationManagerSite(dbsite);

    p = config.getOtherPort(siteId);
    dbsite = new ReplicationManagerSiteConfig("localhost", p);
    dbsite.setLegacy(true);
    ec.addReplicationManagerSite(dbsite);

    MyEventHandler monitor = new MyEventHandler();
    ec.setEventHandler(monitor);
    File clientDir = new File(config.getBaseDir(), "dir" + siteId);
    assertTrue(clientDir.exists());
    Environment client = new Environment(clientDir, ec);
    client.setReplicationConfig(ReplicationConfig.STRICT_2SITE, false);

    envs[siteId] = client;
    monitors[siteId] = monitor;
    // we want to make sure we don't retry from here after the
    // initial failure, because we want to make the old master
    // connect to us.
    client.setReplicationTimeout(ReplicationTimeoutType.CONNECTION_RETRY, Integer.MAX_VALUE);
    client.replicationManagerStart(3, ReplicationManagerStartPolicy.REP_CLIENT);
  }
Beispiel #2
0
  public void joinExistingClient(int site, boolean useHB) throws Exception {
    EnvironmentConfig ec = makeBasicConfig();

    int p = config.getMyPort(site);
    ReplicationManagerSiteConfig dbsite = new ReplicationManagerSiteConfig("localhost", p);
    dbsite.setLocalSite(true);
    dbsite.setLegacy(true);
    ec.addReplicationManagerSite(dbsite);

    p = config.getOtherPort(site);
    dbsite = new ReplicationManagerSiteConfig("localhost", p);
    dbsite.setLegacy(true);
    ec.addReplicationManagerSite(dbsite);

    MyEventHandler monitor = new MyEventHandler();
    monitors[site] = monitor;
    ec.setEventHandler(monitor);
    File clientDir = new File(config.getBaseDir(), "dir" + site);
    assertTrue(clientDir.exists());
    Environment client = new Environment(clientDir, ec);
    client.setReplicationConfig(ReplicationConfig.STRICT_2SITE, false);

    if (useHB) {
      client.setReplicationTimeout(ReplicationTimeoutType.HEARTBEAT_SEND, 3000000);
      client.setReplicationTimeout(ReplicationTimeoutType.HEARTBEAT_MONITOR, 6000000);
    }

    envs[site] = client;
    client.setReplicationTimeout(ReplicationTimeoutType.CONNECTION_RETRY, 1000000); // be impatient
    client.replicationManagerStart(3, ReplicationManagerStartPolicy.REP_CLIENT);
    monitor.await();

    assertTrue(client.getReplicationStats(StatsConfig.DEFAULT).getStartupComplete());
  }
Beispiel #3
0
  public void run() {
    String homedirName = baseDirName + threadNumber;
    TestUtils.removeDir(homedirName);

    try {
      homedir = new File(homedirName);
      homedir.mkdir();
    } catch (Exception e) {
      TestUtils.DEBUGOUT(
          2, "Warning: initialization had a problem creating a clean directory.\n" + e);
    }
    try {
      homedir = new File(homedirName);
    } catch (NullPointerException npe) {
      // can't really happen :)
    }

    TestUtils.DEBUGOUT(1, "Creating worker: " + threadNumber);

    envConfig = new EnvironmentConfig();
    envConfig.setErrorStream(TestUtils.getErrorStream());
    envConfig.setErrorPrefix("RepmgrElectionTest test(" + threadNumber + ")");
    envConfig.setAllowCreate(true);
    envConfig.setRunRecovery(true);
    envConfig.setThreaded(true);
    envConfig.setInitializeLocking(true);
    envConfig.setInitializeLogging(true);
    envConfig.setInitializeCache(true);
    envConfig.setTransactional(true);
    envConfig.setTxnNoSync(true);
    envConfig.setInitializeReplication(true);
    envConfig.setVerboseReplication(false);

    ReplicationManagerSiteConfig localConfig =
        new ReplicationManagerSiteConfig(address, basePort + threadNumber);
    localConfig.setLocalSite(true);
    envConfig.addReplicationManagerSite(localConfig);

    envConfig.setReplicationPriority(priorities[threadNumber]);
    envConfig.setEventHandler(this);
    envConfig.setReplicationManagerAckPolicy(ReplicationManagerAckPolicy.ALL);

    if (masterThreadIndex >= 0) {
      // If we already have the master, then set it as the bootstrap helper,
      // otherwise, set local site as new master.
      ReplicationManagerSiteConfig remoteConfig =
          new ReplicationManagerSiteConfig(address, basePort + masterThreadIndex);
      remoteConfig.setBootstrapHelper(true);
      envConfig.addReplicationManagerSite(remoteConfig);
    }

    try {
      dbenv = new Environment(homedir, envConfig);

    } catch (FileNotFoundException e) {
      fail("Unexpected FNFE in standard environment creation." + e);
    } catch (DatabaseException dbe) {
      fail("Unexpected database exception came from environment create." + dbe);
    }

    try {
      // If we do not have master, then set local site as new master.
      if (masterThreadIndex == -1)
        dbenv.replicationManagerStart(NUM_WORKER_THREADS, ReplicationManagerStartPolicy.REP_MASTER);
      else
        dbenv.replicationManagerStart(NUM_WORKER_THREADS, ReplicationManagerStartPolicy.REP_CLIENT);
    } catch (DatabaseException dbe) {
      fail("Unexpected database exception came from replicationManagerStart." + dbe);
    }

    TestUtils.DEBUGOUT(1, "Started replication site: " + threadNumber);
    lastSiteStarted = true;

    try {
      java.lang.Thread.sleep(1000 * (1 + threadNumber));
    } catch (InterruptedException ie) {
    }

    if (masterThreadIndex != -1) {
      // Wait for "Start-up done" for each client, then add next client.
      ReplicationStats rs = null;
      int i = 0;
      do {
        try {
          java.lang.Thread.sleep(2000);
        } catch (InterruptedException e) {
        }

        try {
          rs = dbenv.getReplicationStats(StatsConfig.DEFAULT);
        } catch (DatabaseException dbe) {
          dbe.printStackTrace();
          fail("Unexpected database exception came from getReplicationStats." + dbe);
        }
      } while (!rs.getStartupComplete() && i++ < maxLoopWait);
      assertTrue(rs.getStartupComplete());
    }
  }
Beispiel #4
0
  public int init(RepConfig config) throws DatabaseException {
    int ret = 0;
    appConfig = config;
    EnvironmentConfig envConfig = new EnvironmentConfig();
    envConfig.setErrorStream(System.err);
    envConfig.setErrorPrefix(RepConfig.progname);

    envConfig.setReplicationManagerLocalSite(appConfig.getThisHost());
    for (RepRemoteHost host = appConfig.getFirstOtherHost();
        host != null;
        host = appConfig.getNextOtherHost()) {
      envConfig.replicationManagerAddRemoteSite(host.getAddress(), host.isPeer());
    }
    if (appConfig.totalSites > 0) envConfig.setReplicationNumSites(appConfig.totalSites);

    /*
     * Set replication group election priority for this environment.
     * An election first selects the site with the most recent log
     * records as the new master.  If multiple sites have the most
     * recent log records, the site with the highest priority value
     * is selected as master.
     */
    envConfig.setReplicationPriority(appConfig.priority);

    envConfig.setCacheSize(RepConfig.CACHESIZE);
    envConfig.setTxnNoSync(true);

    envConfig.setEventHandler(new RepQuoteEventHandler());

    /*
     * Set the policy that determines how master and client sites
     * handle acknowledgement of replication messages needed for
     * permanent records.  The default policy of "quorum" requires only
     * a quorum of electable peers sufficient to ensure a permanent
     * record remains durable if an election is held.  The "all" option
     * requires all clients to acknowledge a permanent replication
     * message instead.
     */
    envConfig.setReplicationManagerAckPolicy(appConfig.ackPolicy);

    /*
     * Set the threshold for the minimum and maximum time the client
     * waits before requesting retransmission of a missing message.
     * Base these values on the performance and load characteristics
     * of the master and client host platforms as well as the round
     * trip message time.
     */
    envConfig.setReplicationRequestMin(20000);
    envConfig.setReplicationRequestMax(500000);

    /*
     * Configure deadlock detection to ensure that any deadlocks
     * are broken by having one of the conflicting lock requests
     * rejected. DB_LOCK_DEFAULT uses the lock policy specified
     * at environment creation time or DB_LOCK_RANDOM if none was
     * specified.
     */
    envConfig.setLockDetectMode(LockDetectMode.DEFAULT);

    envConfig.setAllowCreate(true);
    envConfig.setRunRecovery(true);
    envConfig.setThreaded(true);
    envConfig.setInitializeReplication(true);
    envConfig.setInitializeLocking(true);
    envConfig.setInitializeLogging(true);
    envConfig.setInitializeCache(true);
    envConfig.setTransactional(true);
    envConfig.setVerboseReplication(appConfig.verbose);
    try {
      dbenv = new RepQuoteEnvironment(appConfig.getHome(), envConfig);
    } catch (FileNotFoundException e) {
      System.err.println("FileNotFound exception: " + e);
      System.err.println("Ensure that the environment directory is pre-created.");
      ret = 1;
    }

    if (appConfig.bulk) dbenv.setReplicationConfig(ReplicationConfig.BULK, true);

    /*
     * Configure heartbeat timeouts so that repmgr monitors the
     * health of the TCP connection.  Master sites broadcast a heartbeat
     * at the frequency specified by the DB_REP_HEARTBEAT_SEND timeout.
     * Client sites wait for message activity the length of the
     * DB_REP_HEARTBEAT_MONITOR timeout before concluding that the
     * connection to the master is lost.  The DB_REP_HEARTBEAT_MONITOR
     * timeout should be longer than the DB_REP_HEARTBEAT_SEND timeout.
     */
    dbenv.setReplicationTimeout(ReplicationTimeoutType.HEARTBEAT_SEND, 5000000);
    dbenv.setReplicationTimeout(ReplicationTimeoutType.HEARTBEAT_MONITOR, 10000000);

    /* The following base replication features may also be useful to your
     * application. See Berkeley DB documentation for more details.
     *   - Master leases: Provide stricter consistency for data reads
     *     on a master site.
     *   - Timeouts: Customize the amount of time Berkeley DB waits
     *     for such things as an election to be concluded or a master
     *     lease to be granted.
     *   - Delayed client synchronization: Manage the master site's
     *     resources by spreading out resource-intensive client
     *     synchronizations.
     *   - Blocked client operations: Return immediately with an error
     *     instead of waiting indefinitely if a client operation is
     *     blocked by an ongoing client synchronization.
     *
     * The following repmgr features may also be useful to your
     * application.  See Berkeley DB documentation for more details.
     *  - Two-site strict majority rule - In a two-site replication
     *    group, require both sites to be available to elect a new
     *    master.
     *  - Timeouts - Customize the amount of time repmgr waits
     *    for such things as waiting for acknowledgements or attempting
     *    to reconnect to other sites.
     *  - Site list - return a list of sites currently known to repmgr.
     */

    /* Start checkpoint and log archive support threads. */
    ckpThr = new CheckpointThread(dbenv);
    ckpThr.start();
    lgaThr = new LogArchiveThread(dbenv, envConfig);
    lgaThr.start();

    /* Start replication manager. */
    dbenv.replicationManagerStart(3, appConfig.startPolicy);

    return ret;
  }
Beispiel #5
0
  @Test
  public void testDraining() throws Exception {
    EnvironmentConfig masterConfig = makeBasicConfig();
    masterConfig.setReplicationLimit(100000000);
    ReplicationManagerSiteConfig site = new ReplicationManagerSiteConfig("localhost", masterPort);
    site.setLocalSite(true);
    site.setLegacy(true);
    masterConfig.addReplicationManagerSite(site);

    site = new ReplicationManagerSiteConfig("localhost", clientPort);
    site.setLegacy(true);
    masterConfig.addReplicationManagerSite(site);
    site = new ReplicationManagerSiteConfig("localhost", client2Port);
    site.setLegacy(true);
    masterConfig.addReplicationManagerSite(site);
    site = new ReplicationManagerSiteConfig("localhost", client3Port);
    site.setLegacy(true);
    masterConfig.addReplicationManagerSite(site);

    Environment master = new Environment(mkdir("master"), masterConfig);
    setTimeouts(master);
    // Prevent connection retries, so that all connections
    // originate from clients
    master.setReplicationTimeout(ReplicationTimeoutType.CONNECTION_RETRY, Integer.MAX_VALUE);
    master.replicationManagerStart(2, ReplicationManagerStartPolicy.REP_MASTER);

    DatabaseConfig dc = new DatabaseConfig();
    dc.setTransactional(true);
    dc.setAllowCreate(true);
    dc.setType(DatabaseType.BTREE);
    dc.setPageSize(4096);
    Database db = master.openDatabase(null, "test.db", null, dc);

    DatabaseEntry key = new DatabaseEntry();
    DatabaseEntry value = new DatabaseEntry();
    value.setData(data);

    for (int i = 0; ((BtreeStats) db.getStats(null, null)).getPageCount() < 500; i++) {
      String k = "The record number is: " + i;
      key.setData(k.getBytes());
      db.put(null, key, value);
    }

    // tell fiddler to stop reading once it sees a PAGE message
    Socket s = new Socket("localhost", mgrPort);
    OutputStreamWriter w = new OutputStreamWriter(s.getOutputStream());

    String path1 = "{" + masterPort + "," + clientPort + "}"; // looks like {6000,6001}
    w.write("{init," + path1 + ",page_clog}\r\n");
    w.flush();
    BufferedReader br = new BufferedReader(new InputStreamReader(s.getInputStream()));
    br.readLine();
    assertEquals("ok", br.readLine());
    // create client
    //
    EnvironmentConfig ec = makeBasicConfig();
    site = new ReplicationManagerSiteConfig("localhost", clientPort);
    site.setLocalSite(true);
    site.setLegacy(true);
    ec.addReplicationManagerSite(site);
    site = new ReplicationManagerSiteConfig("localhost", masterPort);
    site.setLegacy(true);
    ec.addReplicationManagerSite(site);
    site = new ReplicationManagerSiteConfig("localhost", client2Port);
    site.setLegacy(true);
    ec.addReplicationManagerSite(site);
    site = new ReplicationManagerSiteConfig("localhost", client3Port);
    site.setLegacy(true);
    ec.addReplicationManagerSite(site);
    Environment client = new Environment(mkdir("client"), ec);
    setTimeouts(client);
    client.replicationManagerStart(1, ReplicationManagerStartPolicy.REP_CLIENT);

    // wait til it gets stuck
    Thread.sleep(5000); // FIXME

    // Do the same for another client, because the master has 2
    // msg processing threads.  (It's no longer possible to
    // configure just 1.)
    String path2 = "{" + masterPort + "," + client2Port + "}";
    w.write("{init," + path2 + ",page_clog}\r\n");
    w.flush();
    br = new BufferedReader(new InputStreamReader(s.getInputStream()));
    br.readLine();
    assertEquals("ok", br.readLine());

    ec = makeBasicConfig();
    site = new ReplicationManagerSiteConfig("localhost", client2Port);
    site.setLocalSite(true);
    site.setLegacy(true);
    ec.addReplicationManagerSite(site);
    site = new ReplicationManagerSiteConfig("localhost", masterPort);
    site.setLegacy(true);
    ec.addReplicationManagerSite(site);
    site = new ReplicationManagerSiteConfig("localhost", clientPort);
    site.setLegacy(true);
    ec.addReplicationManagerSite(site);
    site = new ReplicationManagerSiteConfig("localhost", client3Port);
    site.setLegacy(true);
    ec.addReplicationManagerSite(site);
    Environment client2 = new Environment(mkdir("client2"), ec);
    setTimeouts(client2);
    client2.replicationManagerStart(1, ReplicationManagerStartPolicy.REP_CLIENT);

    // wait til it gets stuck
    Thread.sleep(5000);

    // With the connection stuck, the master cannot write out log
    // records for new "live" transactions.  Knowing we didn't
    // write the record, we should not bother waiting for an ack
    // that cannot possibly arrive; so we should simply return
    // quickly.  The duration should be very quick, but anything
    // less than the ack timeout indicates correct behavior (in
    // case this test runs on a slow, overloaded system).
    //
    long startTime = System.currentTimeMillis();
    key.setData("one extra record".getBytes());
    db.put(null, key, value);
    long duration = System.currentTimeMillis() - startTime;
    assertTrue("txn duration: " + duration, duration < 29000);
    System.out.println("txn duration: " + duration);
    db.close();

    // Tell fiddler to close the connections.  That should trigger
    // us to abandon the timeout.  Then create another client and
    // see that it can complete its internal init quickly.  Since
    // we have limited threads at the master, this demonstrates
    // that they were abandoned.
    //
    path1 = "{" + clientPort + "," + masterPort + "}"; // looks like {6001,6000}
    w.write("{" + path1 + ",shutdown}\r\n");
    w.flush();
    assertEquals("ok", br.readLine());
    path2 = "{" + client2Port + "," + masterPort + "}"; // looks like {6001,6000}
    w.write("{" + path2 + ",shutdown}\r\n");
    w.flush();
    assertEquals("ok", br.readLine());

    ec = makeBasicConfig();
    site = new ReplicationManagerSiteConfig("localhost", client3Port);
    site.setLocalSite(true);
    site.setLegacy(true);
    ec.addReplicationManagerSite(site);
    site = new ReplicationManagerSiteConfig("localhost", masterPort);
    site.setLegacy(true);
    ec.addReplicationManagerSite(site);
    site = new ReplicationManagerSiteConfig("localhost", clientPort);
    site.setLegacy(true);
    ec.addReplicationManagerSite(site);
    site = new ReplicationManagerSiteConfig("localhost", client2Port);
    site.setLegacy(true);
    ec.addReplicationManagerSite(site);

    EventHandler clientMonitor = new EventHandler();
    ec.setEventHandler(clientMonitor);
    Environment client3 = new Environment(mkdir("client3"), ec);
    setTimeouts(client3);
    startTime = System.currentTimeMillis();
    client3.replicationManagerStart(2, ReplicationManagerStartPolicy.REP_CLIENT);
    clientMonitor.await();
    duration = System.currentTimeMillis() - startTime;
    assertTrue("sync duration: " + duration, duration < 20000); // 20 seconds should be plenty

    client3.close();
    master.close();

    w.write("shutdown\r\n");
    w.flush();
    assertEquals("ok", br.readLine());
    s.close();
  }