Exemplo n.º 1
0
/**
 * Coprocessor protocol for Phoenix DDL. Phoenix stores the table metadata in an HBase table named
 * SYSTEM.TABLE. Each table is represented by: - one row for the table - one row per column in the
 * tabe Upto {@link #DEFAULT_MAX_META_DATA_VERSIONS} versions are kept. The time stamp of the
 * metadata must always be increasing. The timestamp of the key values in the data row corresponds
 * to the schema that it's using.
 *
 * <p>TODO: dynamically prune number of schema version kept based on whether or not the data table
 * still uses it (based on the min time stamp of the data table).
 *
 * @author jtaylor
 * @since 0.1
 */
public interface MetaDataProtocol extends CoprocessorProtocol {
  public static final int PHOENIX_MAJOR_VERSION = 2;
  public static final int PHOENIX_MINOR_VERSION = 1;
  public static final int PHOENIX_PATCH_NUMBER = 0;
  public static final int PHOENIX_VERSION =
      MetaDataUtil.encodeVersion(
          PHOENIX_MAJOR_VERSION, PHOENIX_MINOR_VERSION, PHOENIX_PATCH_NUMBER);

  public static final long MIN_TABLE_TIMESTAMP = 0;
  // Increase MIN_SYSTEM_TABLE_TIMESTAMP by one for each schema change SYSTEM.TABLE schema changes.
  // For 1.0,1.1,1.2,and 1.2.1 we used MetaDataProtocol.MIN_TABLE_TIMESTAMP+1
  // For 2.0 and above, we use MetaDataProtocol.MIN_TABLE_TIMESTAMP+7 so that we can add the five
  // new
  // columns to the existing system table (three new columns in 1.2.1 and two new columns in 1.2)
  // For 3.0 and above, we use MIN_TABLE_TIMESTAMP + 8 so that we can add the tenant_id column
  // as the first column to the existing system table.
  public static final long MIN_SYSTEM_TABLE_TIMESTAMP = MIN_TABLE_TIMESTAMP + 8;
  public static final int DEFAULT_MAX_META_DATA_VERSIONS = 1000;

  // TODO: pare this down to minimum, as we don't need duplicates for both table and column errors,
  // nor should we need
  // a different code for every type of error.
  // ENTITY_ALREADY_EXISTS, ENTITY_NOT_FOUND, NEWER_ENTITY_FOUND, ENTITY_NOT_IN_REGION,
  // CONCURRENT_MODIFICATION
  // ILLEGAL_MUTATION (+ sql code)
  public enum MutationCode {
    TABLE_ALREADY_EXISTS,
    TABLE_NOT_FOUND,
    COLUMN_NOT_FOUND,
    COLUMN_ALREADY_EXISTS,
    CONCURRENT_TABLE_MUTATION,
    TABLE_NOT_IN_REGION,
    NEWER_TABLE_FOUND,
    UNALLOWED_TABLE_MUTATION,
    NO_PK_COLUMNS,
    PARENT_TABLE_NOT_FOUND
  };

  public static class MetaDataMutationResult implements Writable {
    private MutationCode returnCode;
    private long mutationTime;
    private PTable table;
    private List<byte[]> tableNamesToDelete;
    private byte[] columnName;
    private byte[] familyName;

    public MetaDataMutationResult() {}

    public MetaDataMutationResult(
        MutationCode returnCode, long currentTime, PTable table, PColumn column) {
      this(returnCode, currentTime, table);
      if (column != null) {
        this.columnName = column.getName().getBytes();
        this.familyName = column.getFamilyName().getBytes();
      }
    }

    public MetaDataMutationResult(MutationCode returnCode, long currentTime, PTable table) {
      this(returnCode, currentTime, table, Collections.<byte[]>emptyList());
    }

    public MetaDataMutationResult(
        MutationCode returnCode, long currentTime, PTable table, List<byte[]> tableNamesToDelete) {
      this.returnCode = returnCode;
      this.mutationTime = currentTime;
      this.table = table;
      this.tableNamesToDelete = tableNamesToDelete;
    }

    public MutationCode getMutationCode() {
      return returnCode;
    }

    public long getMutationTime() {
      return mutationTime;
    }

    public PTable getTable() {
      return table;
    }

    public List<byte[]> getTableNamesToDelete() {
      return tableNamesToDelete;
    }

    public byte[] getColumnName() {
      return columnName;
    }

    public byte[] getFamilyName() {
      return familyName;
    }

    @Override
    public void readFields(DataInput input) throws IOException {
      this.returnCode = MutationCode.values()[WritableUtils.readVInt(input)];
      this.mutationTime = input.readLong();
      boolean hasTable = input.readBoolean();
      if (hasTable) {
        this.table = new PTableImpl();
        this.table.readFields(input);
      }
      columnName = Bytes.readByteArray(input);
      if (columnName.length > 0) {
        familyName = Bytes.readByteArray(input);
      }
      boolean hasTablesToDelete = input.readBoolean();
      if (hasTablesToDelete) {
        int count = input.readInt();
        tableNamesToDelete = Lists.newArrayListWithExpectedSize(count);
        for (int i = 0; i < count; i++) {
          byte[] tableName = Bytes.readByteArray(input);
          tableNamesToDelete.add(tableName);
        }
      }
    }

    @Override
    public void write(DataOutput output) throws IOException {
      WritableUtils.writeVInt(output, returnCode.ordinal());
      output.writeLong(mutationTime);
      output.writeBoolean(table != null);
      if (table != null) {
        table.write(output);
      }
      Bytes.writeByteArray(output, columnName == null ? ByteUtil.EMPTY_BYTE_ARRAY : columnName);
      if (columnName != null) {
        Bytes.writeByteArray(output, familyName == null ? ByteUtil.EMPTY_BYTE_ARRAY : familyName);
      }
      if (tableNamesToDelete != null && tableNamesToDelete.size() > 0) {
        output.writeBoolean(true);
        output.writeInt(tableNamesToDelete.size());
        for (byte[] tableName : tableNamesToDelete) {
          Bytes.writeByteArray(output, tableName);
        }

      } else {
        output.writeBoolean(false);
      }
    }
  }

  /**
   * The the latest Phoenix table at or before the given clientTimestamp. If the client already has
   * the latest (based on the tableTimestamp), then no table is returned.
   *
   * @param tenantId
   * @param schemaName
   * @param tableName
   * @param tableTimestamp
   * @param clientTimestamp
   * @return MetaDataMutationResult
   * @throws IOException
   */
  MetaDataMutationResult getTable(
      byte[] tenantId,
      byte[] schemaName,
      byte[] tableName,
      long tableTimestamp,
      long clientTimestamp)
      throws IOException;

  /**
   * Create a new Phoenix table
   *
   * @param tableMetadata
   * @return MetaDataMutationResult
   * @throws IOException
   */
  MetaDataMutationResult createTable(List<Mutation> tableMetadata) throws IOException;

  /**
   * Drop an existing Phoenix table
   *
   * @param tableMetadata
   * @param tableType
   * @return MetaDataMutationResult
   * @throws IOException
   */
  MetaDataMutationResult dropTable(List<Mutation> tableMetadata, String tableType)
      throws IOException;

  /**
   * Add a column to an existing Phoenix table
   *
   * @param tableMetadata
   * @return MetaDataMutationResult
   * @throws IOException
   */
  MetaDataMutationResult addColumn(List<Mutation> tableMetadata) throws IOException;

  /**
   * Drop a column from an existing Phoenix table
   *
   * @param tableMetadata
   * @return MetaDataMutationResult
   * @throws IOException
   */
  MetaDataMutationResult dropColumn(List<Mutation> tableMetadata) throws IOException;

  MetaDataMutationResult updateIndexState(List<Mutation> tableMetadata) throws IOException;

  /**
   * Clears the server-side cache of table meta data. Used between test runs to ensure no side
   * effects.
   */
  void clearCache();

  /**
   * Get the version of the server-side HBase and phoenix.jar. Used when initially connecting to a
   * cluster to ensure that the client and server jars are compatible.
   */
  long getVersion();
}
  /**
   * Override the preAppend for checkAndPut and checkAndDelete, as we need the ability to a) set the
   * TimeRange for the Get being done and b) return something back to the client to indicate
   * success/failure
   */
  @SuppressWarnings("deprecation")
  @Override
  public Result preAppend(
      final ObserverContext<RegionCoprocessorEnvironment> e, final Append append)
      throws IOException {
    byte[] opBuf = append.getAttribute(OPERATION_ATTRIB);
    if (opBuf == null) {
      return null;
    }
    Op op = Op.values()[opBuf[0]];

    long clientTimestamp = HConstants.LATEST_TIMESTAMP;
    byte[] clientTimestampBuf = append.getAttribute(MAX_TIMERANGE_ATTRIB);
    if (clientTimestampBuf != null) {
      clientTimestamp = Bytes.toLong(clientTimestampBuf);
    }
    boolean hadClientTimestamp = (clientTimestamp != HConstants.LATEST_TIMESTAMP);
    if (hadClientTimestamp) {
      // Prevent race condition of creating two sequences at the same timestamp
      // by looking for a sequence at or after the timestamp at which it'll be
      // created.
      if (op == Op.CREATE_SEQUENCE) {
        clientTimestamp++;
      }
    } else {
      clientTimestamp = EnvironmentEdgeManager.currentTimeMillis();
      clientTimestampBuf = Bytes.toBytes(clientTimestamp);
    }

    RegionCoprocessorEnvironment env = e.getEnvironment();
    // We need to set this to prevent region.append from being called
    e.bypass();
    e.complete();
    HRegion region = env.getRegion();
    byte[] row = append.getRow();
    region.startRegionOperation();
    try {
      Integer lid = region.getLock(null, row, true);
      try {
        KeyValue keyValue = append.getFamilyMap().values().iterator().next().iterator().next();
        byte[] family = keyValue.getFamily();
        byte[] qualifier = keyValue.getQualifier();

        Get get = new Get(row);
        get.setTimeRange(MetaDataProtocol.MIN_TABLE_TIMESTAMP, clientTimestamp);
        get.addColumn(family, qualifier);
        Result result = region.get(get);
        if (result.isEmpty()) {
          if (op == Op.DROP_SEQUENCE || op == Op.RESET_SEQUENCE) {
            return getErrorResult(
                row, clientTimestamp, SQLExceptionCode.SEQUENCE_UNDEFINED.getErrorCode());
          }
        } else {
          if (op == Op.CREATE_SEQUENCE) {
            return getErrorResult(
                row, clientTimestamp, SQLExceptionCode.SEQUENCE_ALREADY_EXIST.getErrorCode());
          }
        }
        Mutation m = null;
        switch (op) {
          case RESET_SEQUENCE:
            KeyValue currentValueKV = result.raw()[0];
            long expectedValue =
                PDataType.LONG
                    .getCodec()
                    .decodeLong(append.getAttribute(CURRENT_VALUE_ATTRIB), 0, null);
            long value =
                PDataType.LONG
                    .getCodec()
                    .decodeLong(currentValueKV.getBuffer(), currentValueKV.getValueOffset(), null);
            // Timestamp should match exactly, or we may have the wrong sequence
            if (expectedValue != value || currentValueKV.getTimestamp() != clientTimestamp) {
              return new Result(
                  Collections.singletonList(
                      KeyValueUtil.newKeyValue(
                          row,
                          PhoenixDatabaseMetaData.SEQUENCE_FAMILY_BYTES,
                          QueryConstants.EMPTY_COLUMN_BYTES,
                          currentValueKV.getTimestamp(),
                          ByteUtil.EMPTY_BYTE_ARRAY)));
            }
            m = new Put(row, currentValueKV.getTimestamp());
            m.getFamilyMap().putAll(append.getFamilyMap());
            break;
          case DROP_SEQUENCE:
            m = new Delete(row, clientTimestamp, null);
            break;
          case CREATE_SEQUENCE:
            m = new Put(row, clientTimestamp);
            m.getFamilyMap().putAll(append.getFamilyMap());
            break;
        }
        if (!hadClientTimestamp) {
          for (List<KeyValue> kvs : m.getFamilyMap().values()) {
            for (KeyValue kv : kvs) {
              kv.updateLatestStamp(clientTimestampBuf);
            }
          }
        }
        @SuppressWarnings("unchecked")
        Pair<Mutation, Integer>[] mutations = new Pair[1];
        mutations[0] = new Pair<Mutation, Integer>(m, lid);
        region.batchMutate(mutations);
        long serverTimestamp = MetaDataUtil.getClientTimeStamp(m);
        // Return result with single KeyValue. The only piece of information
        // the client cares about is the timestamp, which is the timestamp of
        // when the mutation was actually performed (useful in the case of .
        return new Result(
            Collections.singletonList(
                KeyValueUtil.newKeyValue(
                    row,
                    PhoenixDatabaseMetaData.SEQUENCE_FAMILY_BYTES,
                    QueryConstants.EMPTY_COLUMN_BYTES,
                    serverTimestamp,
                    SUCCESS_VALUE)));
      } finally {
        region.releaseRowLock(lid);
      }
    } catch (Throwable t) {
      ServerUtil.throwIOException("Increment of sequence " + Bytes.toStringBinary(row), t);
      return null; // Impossible
    } finally {
      region.closeRegionOperation();
    }
  }