/**
   * Create the completion node for the snapshot identified by the txnId. It assumes that all hosts
   * will race to call this, so it doesn't fail if the node already exists.
   *
   * @param nonce Nonce of the snapshot
   * @param txnId
   * @param hostId The local host ID
   * @param isTruncation Whether or not this is a truncation snapshot
   * @param truncReqId Optional unique ID fed back to the monitor for identification
   * @return true if the node is created successfully, false if the node already exists.
   */
  public static ZKUtil.StringCallback createSnapshotCompletionNode(
      String path, String nonce, long txnId, boolean isTruncation, String truncReqId) {
    if (!(txnId > 0)) {
      VoltDB.crashGlobalVoltDB("Txnid must be greather than 0", true, null);
    }

    byte nodeBytes[] = null;
    try {
      JSONStringer stringer = new JSONStringer();
      stringer.object();
      stringer.key("txnId").value(txnId);
      stringer.key("isTruncation").value(isTruncation);
      stringer.key("didSucceed").value(false);
      stringer.key("hostCount").value(-1);
      stringer.key("path").value(path);
      stringer.key("nonce").value(nonce);
      stringer.key("truncReqId").value(truncReqId);
      stringer.key("exportSequenceNumbers").object().endObject();
      stringer.endObject();
      JSONObject jsonObj = new JSONObject(stringer.toString());
      nodeBytes = jsonObj.toString(4).getBytes(Charsets.UTF_8);
    } catch (Exception e) {
      VoltDB.crashLocalVoltDB("Error serializing snapshot completion node JSON", true, e);
    }

    ZKUtil.StringCallback cb = new ZKUtil.StringCallback();
    final String snapshotPath = VoltZK.completed_snapshots + "/" + txnId;
    VoltDB.instance()
        .getHostMessenger()
        .getZK()
        .create(snapshotPath, nodeBytes, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, cb, null);

    return cb;
  }
Exemple #2
0
  public MockVoltDB(int clientPort, int adminPort, int httpPort, int drPort) {
    try {
      JSONObject obj = new JSONObject();
      JSONArray jsonArray = new JSONArray();
      jsonArray.put("127.0.0.1");
      obj.put("interfaces", jsonArray);
      obj.put("clientPort", clientPort);
      obj.put("adminPort", adminPort);
      obj.put("httpPort", httpPort);
      obj.put("drPort", drPort);
      m_localMetadata = obj.toString(4);

      m_catalog = new Catalog();
      m_catalog.execute("add / clusters " + m_clusterName);
      m_catalog.execute(
          "add "
              + m_catalog.getClusters().get(m_clusterName).getPath()
              + " databases "
              + m_databaseName);
      Cluster cluster = m_catalog.getClusters().get(m_clusterName);
      // Set a sane default for TestMessaging (at least)
      cluster.setHeartbeattimeout(10000);
      assert (cluster != null);

      try {
        m_hostMessenger.start();
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
      VoltZK.createPersistentZKNodes(m_hostMessenger.getZK());
      m_hostMessenger
          .getZK()
          .create(
              VoltZK.cluster_metadata + "/" + m_hostMessenger.getHostId(),
              getLocalMetadata().getBytes("UTF-8"),
              Ids.OPEN_ACL_UNSAFE,
              CreateMode.EPHEMERAL);

      m_hostMessenger.generateMailboxId(
          m_hostMessenger.getHSIdForLocalSite(HostMessenger.STATS_SITE_ID));
      m_statsAgent = new StatsAgent();
      m_statsAgent.registerMailbox(
          m_hostMessenger, m_hostMessenger.getHSIdForLocalSite(HostMessenger.STATS_SITE_ID));
      for (MailboxType type : MailboxType.values()) {
        m_mailboxMap.put(type, new LinkedList<MailboxNodeContent>());
      }
      m_mailboxMap
          .get(MailboxType.StatsAgent)
          .add(
              new MailboxNodeContent(
                  m_hostMessenger.getHSIdForLocalSite(HostMessenger.STATS_SITE_ID), null));
      m_siteTracker = new SiteTracker(m_hostId, m_mailboxMap);
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }
Exemple #3
0
 private void writeKnownLiveNodes(List<Integer> liveNodes) {
   try {
     if (m_zk.exists(VoltZK.lastKnownLiveNodes, null) == null) {
       // VoltZK.createPersistentZKNodes should have done this
       m_zk.create(VoltZK.lastKnownLiveNodes, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
     }
     JSONStringer stringer = new JSONStringer();
     stringer.object();
     stringer.key("liveNodes").array();
     for (Integer node : liveNodes) {
       stringer.value(node);
     }
     stringer.endArray();
     stringer.endObject();
     JSONObject obj = new JSONObject(stringer.toString());
     tmLog.debug("Writing live nodes to ZK: " + obj.toString(4));
     m_zk.setData(VoltZK.lastKnownLiveNodes, obj.toString(4).getBytes("UTF-8"), -1);
   } catch (Exception e) {
     VoltDB.crashLocalVoltDB(
         "Unable to update known live nodes at ZK path: " + VoltZK.lastKnownLiveNodes, true, e);
   }
 }
  /**
   * Once participating host count is set, SnapshotCompletionMonitor can check this ZK node to
   * determine whether the snapshot has finished or not.
   *
   * <p>This should only be called when all participants have responded. It is possible that some
   * hosts finish taking snapshot before the coordinator logs the participating host count. In this
   * case, the host count would have been decremented multiple times already. To make sure finished
   * hosts are logged correctly, this method adds participating host count + 1 to the current host
   * count.
   *
   * @param txnId The snapshot txnId
   * @param participantCount The number of hosts participating in this snapshot
   */
  public static void logParticipatingHostCount(long txnId, int participantCount) {
    ZooKeeper zk = VoltDB.instance().getHostMessenger().getZK();
    final String snapshotPath = VoltZK.completed_snapshots + "/" + txnId;

    boolean success = false;
    while (!success) {
      Stat stat = new Stat();
      byte data[] = null;
      try {
        data = zk.getData(snapshotPath, false, stat);
      } catch (KeeperException e) {
        if (e.code() == KeeperException.Code.NONODE) {
          // If snapshot creation failed for some reason, the node won't exist. ignore
          return;
        }
        VoltDB.crashLocalVoltDB("Failed to get snapshot completion node", true, e);
      } catch (InterruptedException e) {
        VoltDB.crashLocalVoltDB("Interrupted getting snapshot completion node", true, e);
      }
      if (data == null) {
        VoltDB.crashLocalVoltDB("Data should not be null if the node exists", false, null);
      }

      try {
        JSONObject jsonObj = new JSONObject(new String(data, Charsets.UTF_8));
        if (jsonObj.getLong("txnId") != txnId) {
          VoltDB.crashLocalVoltDB("TxnId should match", false, null);
        }

        int hostCount = jsonObj.getInt("hostCount");
        // +1 because hostCount was initialized to -1
        jsonObj.put("hostCount", hostCount + participantCount + 1);
        zk.setData(snapshotPath, jsonObj.toString(4).getBytes(Charsets.UTF_8), stat.getVersion());
      } catch (KeeperException.BadVersionException e) {
        continue;
      } catch (Exception e) {
        VoltDB.crashLocalVoltDB("This ZK call should never fail", true, e);
      }

      success = true;
    }
  }
  public DefaultSnapshotDataTarget(
      final File file,
      final int hostId,
      final String clusterName,
      final String databaseName,
      final String tableName,
      final int numPartitions,
      final boolean isReplicated,
      final List<Integer> partitionIds,
      final VoltTable schemaTable,
      final long txnId,
      final long timestamp,
      int version[])
      throws IOException {
    String hostname = CoreUtils.getHostnameOrAddress();
    m_file = file;
    m_tableName = tableName;
    m_fos = new FileOutputStream(file);
    m_channel = m_fos.getChannel();
    m_needsFinalClose = !isReplicated;
    final FastSerializer fs = new FastSerializer();
    fs.writeInt(0); // CRC
    fs.writeInt(0); // Header length placeholder
    fs.writeByte(
        1); // Indicate the snapshot was not completed, set to true for the CRC calculation, false
    // later
    for (int ii = 0; ii < 4; ii++) {
      fs.writeInt(version[ii]); // version
    }
    JSONStringer stringer = new JSONStringer();
    byte jsonBytes[] = null;
    try {
      stringer.object();
      stringer.key("txnId").value(txnId);
      stringer.key("hostId").value(hostId);
      stringer.key("hostname").value(hostname);
      stringer.key("clusterName").value(clusterName);
      stringer.key("databaseName").value(databaseName);
      stringer.key("tableName").value(tableName.toUpperCase());
      stringer.key("isReplicated").value(isReplicated);
      stringer.key("isCompressed").value(true);
      stringer.key("checksumType").value("CRC32C");
      stringer.key("timestamp").value(timestamp);
      /*
       * The timestamp string is for human consumption, automated stuff should use
       * the actual timestamp
       */
      stringer.key("timestampString").value(SnapshotUtil.formatHumanReadableDate(timestamp));
      if (!isReplicated) {
        stringer.key("partitionIds").array();
        for (int partitionId : partitionIds) {
          stringer.value(partitionId);
        }
        stringer.endArray();

        stringer.key("numPartitions").value(numPartitions);
      }
      stringer.endObject();
      String jsonString = stringer.toString();
      JSONObject jsonObj = new JSONObject(jsonString);
      jsonString = jsonObj.toString(4);
      jsonBytes = jsonString.getBytes("UTF-8");
    } catch (Exception e) {
      throw new IOException(e);
    }
    fs.writeInt(jsonBytes.length);
    fs.write(jsonBytes);

    final BBContainer container = fs.getBBContainer();
    container.b.position(4);
    container.b.putInt(container.b.remaining() - 4);
    container.b.position(0);

    final byte schemaBytes[] = PrivateVoltTableFactory.getSchemaBytes(schemaTable);

    final PureJavaCrc32 crc = new PureJavaCrc32();
    ByteBuffer aggregateBuffer = ByteBuffer.allocate(container.b.remaining() + schemaBytes.length);
    aggregateBuffer.put(container.b);
    aggregateBuffer.put(schemaBytes);
    aggregateBuffer.flip();
    crc.update(aggregateBuffer.array(), 4, aggregateBuffer.capacity() - 4);

    final int crcValue = (int) crc.getValue();
    aggregateBuffer.putInt(crcValue).position(8);
    aggregateBuffer.put((byte) 0).position(0); // Haven't actually finished writing file

    if (m_simulateFullDiskWritingHeader) {
      m_writeException = new IOException("Disk full");
      m_writeFailed = true;
      m_fos.close();
      throw m_writeException;
    }

    /*
     * Be completely sure the write succeeded. If it didn't
     * the disk is probably full or the path is bunk etc.
     */
    m_acceptOneWrite = true;
    ListenableFuture<?> writeFuture =
        write(Callables.returning((BBContainer) DBBPool.wrapBB(aggregateBuffer)), false);
    try {
      writeFuture.get();
    } catch (InterruptedException e) {
      m_fos.close();
      throw new java.io.InterruptedIOException();
    } catch (ExecutionException e) {
      m_fos.close();
      throw m_writeException;
    }
    if (m_writeFailed) {
      m_fos.close();
      throw m_writeException;
    }

    ScheduledFuture<?> syncTask = null;
    syncTask =
        m_syncService.scheduleAtFixedRate(
            new Runnable() {
              @Override
              public void run() {
                // Only sync for at least 4 megabyte of data, enough to amortize the cost of seeking
                // on ye olden platters. Since we are appending to a file it's actually 2 seeks.
                while (m_bytesWrittenSinceLastSync.get() > (1024 * 1024 * 4)) {
                  final int bytesSinceLastSync = m_bytesWrittenSinceLastSync.getAndSet(0);
                  try {
                    m_channel.force(false);
                  } catch (IOException e) {
                    if (!(e instanceof java.nio.channels.AsynchronousCloseException)) {
                      SNAP_LOG.error("Error syncing snapshot", e);
                    } else {
                      SNAP_LOG.debug(
                          "Asynchronous close syncing snasphot data, presumably graceful", e);
                    }
                  }
                  m_bytesAllowedBeforeSync.release(bytesSinceLastSync);
                }
              }
            },
            SNAPSHOT_SYNC_FREQUENCY,
            SNAPSHOT_SYNC_FREQUENCY,
            TimeUnit.MILLISECONDS);
    m_syncTask = syncTask;
  }
  /**
   * Compile and cache the statement and plan and return the final plan graph.
   *
   * @param sql
   * @param paramCount
   */
  public List<AbstractPlanNode> compile(
      String sql,
      int paramCount,
      String joinOrder,
      Object partitionParameter,
      boolean inferSP,
      boolean lockInSP) {
    Statement catalogStmt = proc.getStatements().add("stmt-" + String.valueOf(compileCounter++));
    catalogStmt.setSqltext(sql);
    catalogStmt.setSinglepartition(partitionParameter != null);
    catalogStmt.setBatched(false);
    catalogStmt.setParamnum(paramCount);

    // determine the type of the query
    QueryType qtype = QueryType.SELECT;
    catalogStmt.setReadonly(true);
    if (sql.toLowerCase().startsWith("insert")) {
      qtype = QueryType.INSERT;
      catalogStmt.setReadonly(false);
    }
    if (sql.toLowerCase().startsWith("update")) {
      qtype = QueryType.UPDATE;
      catalogStmt.setReadonly(false);
    }
    if (sql.toLowerCase().startsWith("delete")) {
      qtype = QueryType.DELETE;
      catalogStmt.setReadonly(false);
    }
    catalogStmt.setQuerytype(qtype.getValue());
    // name will look like "basename-stmt-#"
    String name = catalogStmt.getParent().getTypeName() + "-" + catalogStmt.getTypeName();

    DatabaseEstimates estimates = new DatabaseEstimates();
    TrivialCostModel costModel = new TrivialCostModel();
    PartitioningForStatement partitioning =
        new PartitioningForStatement(partitionParameter, inferSP, lockInSP);
    QueryPlanner planner =
        new QueryPlanner(
            catalogStmt.getSqltext(),
            catalogStmt.getTypeName(),
            catalogStmt.getParent().getTypeName(),
            catalog.getClusters().get("cluster"),
            db,
            partitioning,
            hsql,
            estimates,
            false,
            StatementCompiler.DEFAULT_MAX_JOIN_TABLES,
            costModel,
            null,
            joinOrder);

    CompiledPlan plan = null;
    planner.parse();
    plan = planner.plan();
    assert (plan != null);

    // Input Parameters
    // We will need to update the system catalogs with this new information
    // If this is an adhoc query then there won't be any parameters
    for (int i = 0; i < plan.parameters.length; ++i) {
      StmtParameter catalogParam = catalogStmt.getParameters().add(String.valueOf(i));
      catalogParam.setJavatype(plan.parameters[i].getValue());
      catalogParam.setIndex(i);
    }

    // Output Columns
    int index = 0;
    for (SchemaColumn col : plan.columns.getColumns()) {
      Column catColumn = catalogStmt.getOutput_columns().add(String.valueOf(index));
      catColumn.setNullable(false);
      catColumn.setIndex(index);
      catColumn.setName(col.getColumnName());
      catColumn.setType(col.getType().getValue());
      catColumn.setSize(col.getSize());
      index++;
    }

    List<PlanNodeList> nodeLists = new ArrayList<PlanNodeList>();
    nodeLists.add(new PlanNodeList(plan.rootPlanGraph));
    if (plan.subPlanGraph != null) {
      nodeLists.add(new PlanNodeList(plan.subPlanGraph));
    }

    // Store the list of parameters types and indexes in the plan node list.
    List<Pair<Integer, VoltType>> parameters = nodeLists.get(0).getParameters();
    for (int i = 0; i < plan.parameters.length; ++i) {
      Pair<Integer, VoltType> parameter = new Pair<Integer, VoltType>(i, plan.parameters[i]);
      parameters.add(parameter);
    }

    // Now update our catalog information
    // HACK: We're using the node_tree's hashCode() as it's name. It would be really
    //     nice if the Catalog code give us an guid without needing a name first...

    String json = null;
    try {
      JSONObject jobj = new JSONObject(nodeLists.get(0).toJSONString());
      json = jobj.toString(4);
    } catch (JSONException e2) {
      // TODO Auto-generated catch block
      e2.printStackTrace();
      System.exit(-1);
    }

    //
    // We then stick a serialized version of PlanNodeTree into a PlanFragment
    //
    try {
      BuildDirectoryUtils.writeFile("statement-plans", name + "_json.txt", json);
      BuildDirectoryUtils.writeFile(
          "statement-plans", name + ".dot", nodeLists.get(0).toDOTString("name"));
    } catch (Exception e) {
      e.printStackTrace();
    }

    List<AbstractPlanNode> plannodes = new ArrayList<AbstractPlanNode>();
    for (PlanNodeList nodeList : nodeLists) {
      plannodes.add(nodeList.getRootPlanNode());
    }

    m_currentPlan = plan;
    return plannodes;
  }