public static boolean checkForColocation(HMaster master, String tableName, String indexTableName) throws IOException, InterruptedException { List<Pair<byte[], ServerName>> uTableStartKeysAndLocations = getStartKeysAndLocations(master, tableName); List<Pair<byte[], ServerName>> iTableStartKeysAndLocations = getStartKeysAndLocations(master, indexTableName); boolean regionsColocated = true; if (uTableStartKeysAndLocations.size() != iTableStartKeysAndLocations.size()) { regionsColocated = false; } else { for (int i = 0; i < uTableStartKeysAndLocations.size(); i++) { Pair<byte[], ServerName> uStartKeyAndLocation = uTableStartKeysAndLocations.get(i); Pair<byte[], ServerName> iStartKeyAndLocation = iTableStartKeysAndLocations.get(i); if (Bytes.compareTo(uStartKeyAndLocation.getFirst(), iStartKeyAndLocation.getFirst()) == 0) { if (uStartKeyAndLocation.getSecond().equals(iStartKeyAndLocation.getSecond())) { continue; } } regionsColocated = false; } } return regionsColocated; }
@Override public void mutateMany(Map<String, Map<ByteBuffer, KCVMutation>> mutations, StoreTransaction txh) throws StorageException { final long delTS = System.currentTimeMillis(); final long putTS = delTS + 1; Map<ByteBuffer, Pair<Put, Delete>> commandsPerKey = convertToCommands(mutations, putTS, delTS); List<Row> batch = new ArrayList<Row>(commandsPerKey.size()); // actual batch operation // convert sorted commands into representation required for 'batch' operation for (Pair<Put, Delete> commands : commandsPerKey.values()) { if (commands.getFirst() != null) batch.add(commands.getFirst()); if (commands.getSecond() != null) batch.add(commands.getSecond()); } try { HTableInterface table = null; try { table = connectionPool.getTable(tableName); table.batch(batch); table.flushCommits(); } finally { IOUtils.closeQuietly(table); } } catch (IOException e) { throw new TemporaryStorageException(e); } catch (InterruptedException e) { throw new TemporaryStorageException(e); } waitUntil(putTS); }
private void addANDColsToFinalList(FilterList filterList) { for (Entry<Column, Pair<Value, Value>> entry : colWithOperators.entrySet()) { Pair<Value, Value> value = entry.getValue(); if (value.getFirst() != null && value.getSecond() != null) { // Here we are introducing a new Filter SingleColumnRangeFilter rangeFltr = new SingleColumnRangeFilter( entry.getKey().getFamily(), entry.getKey().getQualifier(), entry.getKey().getValuePartition(), value.getFirst().getValue(), value.getFirst().getOperator(), value.getSecond().getValue(), value.getSecond().getOperator()); filterList.addFilter(rangeFltr); } else if (value.getFirst() != null) { if (value.getFirst().getOperator() == CompareOp.EQUAL) { filterList.addFilter(value.getFirst().getFilter()); } else { SingleColumnRangeFilter rangeFltr = new SingleColumnRangeFilter( entry.getKey().getFamily(), entry.getKey().getQualifier(), entry.getKey().getValuePartition(), value.getFirst().getValue(), value.getFirst().getOperator(), null, null); filterList.addFilter(rangeFltr); } } } }
@Override public ByteBuffer getValueShallowCopy() { currentBuffer.asSubByteBuffer(current.valueOffset, current.valueLength, tmpPair); ByteBuffer dup = tmpPair.getFirst().duplicate(); dup.position(tmpPair.getSecond()); dup.limit(tmpPair.getSecond() + current.valueLength); return dup.slice(); }
/** * Convert Titan internal Mutation representation into HBase native commands. * * @param mutations Mutations to convert into HBase commands. * @param putTimestamp The timestamp to use for Put commands. * @param delTimestamp The timestamp to use for Delete commands. * @return Commands sorted by key converted from Titan internal representation. */ private static Map<ByteBuffer, Pair<Put, Delete>> convertToCommands( Map<String, Map<ByteBuffer, KCVMutation>> mutations, final long putTimestamp, final long delTimestamp) { Map<ByteBuffer, Pair<Put, Delete>> commandsPerKey = new HashMap<ByteBuffer, Pair<Put, Delete>>(); for (Map.Entry<String, Map<ByteBuffer, KCVMutation>> entry : mutations.entrySet()) { byte[] cfName = entry.getKey().getBytes(); for (Map.Entry<ByteBuffer, KCVMutation> m : entry.getValue().entrySet()) { ByteBuffer key = m.getKey(); KCVMutation mutation = m.getValue(); Pair<Put, Delete> commands = commandsPerKey.get(key); if (commands == null) { commands = new Pair<Put, Delete>(); commandsPerKey.put(key, commands); } if (mutation.hasDeletions()) { if (commands.getSecond() == null) commands.setSecond(new Delete(ByteBufferUtil.getArray(key), delTimestamp, null)); for (ByteBuffer b : mutation.getDeletions()) { commands.getSecond().deleteColumns(cfName, ByteBufferUtil.getArray(b), delTimestamp); } } if (mutation.hasAdditions()) { if (commands.getFirst() == null) commands.setFirst(new Put(ByteBufferUtil.getArray(key), putTimestamp)); for (Entry e : mutation.getAdditions()) { commands .getFirst() .add( cfName, ByteBufferUtil.getArray(e.getColumn()), putTimestamp, ByteBufferUtil.getArray(e.getValue())); } } } } return commandsPerKey; }
@Override public RowFilter adapt(FilterAdapterContext context, FuzzyRowFilter filter) throws IOException { Interleave.Builder interleaveBuilder = Interleave.newBuilder(); List<Pair<byte[], byte[]>> pairs = extractFuzzyRowFilterPairs(filter); if (pairs.isEmpty()) { return ALL_VALUES_FILTER; } for (Pair<byte[], byte[]> pair : pairs) { Preconditions.checkArgument( pair.getFirst().length == pair.getSecond().length, "Fuzzy info and match mask must have the same length"); interleaveBuilder.addFilters(createSingleRowFilter(pair.getFirst(), pair.getSecond())); } return RowFilter.newBuilder().setInterleave(interleaveBuilder).build(); }
private ReplicateWALEntryResponse replayToServer(List<Entry> entries, int timeout) throws IOException { // check whether we should still replay this entry. If the regions are changed, or the // entry is not coming form the primary region, filter it out because we do not need it. // Regions can change because of (1) region split (2) region merge (3) table recreated boolean skip = false; if (!Bytes.equals( location.getRegionInfo().getEncodedNameAsBytes(), initialEncodedRegionName)) { skip = true; } if (!entries.isEmpty() && !skip) { Entry[] entriesArray = new Entry[entries.size()]; entriesArray = entries.toArray(entriesArray); // set the region name for the target region replica Pair<AdminProtos.ReplicateWALEntryRequest, CellScanner> p = ReplicationProtbufUtil.buildReplicateWALEntryRequest( entriesArray, location.getRegionInfo().getEncodedNameAsBytes(), null, null, null); try { PayloadCarryingRpcController controller = rpcControllerFactory.newController(p.getSecond()); controller.setCallTimeout(timeout); controller.setPriority(tableName); return stub.replay(controller, p.getFirst()); } catch (ServiceException se) { throw ProtobufUtil.getRemoteException(se); } } if (skip) { if (LOG.isTraceEnabled()) { LOG.trace( "Skipping " + entries.size() + " entries in table " + tableName + " because located region " + location.getRegionInfo().getEncodedName() + " is different than the original region " + Bytes.toStringBinary(initialEncodedRegionName) + " from WALEdit"); for (Entry entry : entries) { LOG.trace("Skipping : " + entry); } } skippedEntries.addAndGet(entries.size()); } return ReplicateWALEntryResponse.newBuilder().build(); }
/** * Checks if the given region has merge qualifier in hbase:meta * * @param services * @param regionName name of specified region * @return true if the given region has merge qualifier in META.(It will be cleaned by * CatalogJanitor) * @throws IOException */ boolean hasMergeQualifierInMeta(final RegionServerServices services, final byte[] regionName) throws IOException { if (services == null) return false; // Get merge regions if it is a merged region and already has merge // qualifier Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaTableAccessor.getRegionsFromMergeQualifier(services.getConnection(), regionName); if (mergeRegions != null && (mergeRegions.getFirst() != null || mergeRegions.getSecond() != null)) { // It has merge qualifier return true; } return false; }
@Override public SingleByteBuff put(int offset, ByteBuff src, int srcOffset, int length) { if (src instanceof SingleByteBuff) { ByteBufferUtils.copyFromBufferToBuffer( ((SingleByteBuff) src).buf, this.buf, srcOffset, offset, length); } else { // TODO we can do some optimization here? Call to asSubByteBuffer might // create a copy. Pair<ByteBuffer, Integer> pair = new Pair<ByteBuffer, Integer>(); src.asSubByteBuffer(srcOffset, length, pair); ByteBufferUtils.copyFromBufferToBuffer( pair.getFirst(), this.buf, pair.getSecond(), offset, length); } return this; }
public static List<Pair<byte[], ServerName>> getStartKeysAndLocations( HMaster master, String tableName) throws IOException, InterruptedException { List<Pair<HRegionInfo, ServerName>> tableRegionsAndLocations = MetaReader.getTableRegionsAndLocations( master.getCatalogTracker(), TableName.valueOf(tableName)); List<Pair<byte[], ServerName>> startKeyAndLocationPairs = new ArrayList<Pair<byte[], ServerName>>(tableRegionsAndLocations.size()); Pair<byte[], ServerName> startKeyAndLocation = null; for (Pair<HRegionInfo, ServerName> regionAndLocation : tableRegionsAndLocations) { startKeyAndLocation = new Pair<byte[], ServerName>( regionAndLocation.getFirst().getStartKey(), regionAndLocation.getSecond()); startKeyAndLocationPairs.add(startKeyAndLocation); } return startKeyAndLocationPairs; }
static GetResponse doMetaGetResponse( final SortedMap<byte[], Pair<HRegionInfo, ServerName>> meta, final GetRequest request) { ClientProtos.Result.Builder resultBuilder = ClientProtos.Result.newBuilder(); ByteString row = request.getGet().getRow(); Pair<HRegionInfo, ServerName> p = meta.get(row.toByteArray()); if (p == null) { if (request.getGet().getClosestRowBefore()) { byte[] bytes = row.toByteArray(); SortedMap<byte[], Pair<HRegionInfo, ServerName>> head = bytes != null ? meta.headMap(bytes) : meta; p = head == null ? null : head.get(head.lastKey()); } } if (p != null) { resultBuilder.addCell(getRegionInfo(row, p.getFirst())); resultBuilder.addCell(getServer(row, p.getSecond())); } resultBuilder.addCell(getStartCode(row)); GetResponse.Builder builder = GetResponse.newBuilder(); builder.setResult(resultBuilder.build()); return builder.build(); }
public RegionServerAccounting(Configuration conf) { this.conf = conf; Pair<Long, MemoryType> globalMemstoreSizePair = MemorySizeUtil.getGlobalMemstoreSize(conf); this.globalMemStoreLimit = globalMemstoreSizePair.getFirst(); this.memType = globalMemstoreSizePair.getSecond(); this.globalMemStoreLimitLowMarkPercent = MemorySizeUtil.getGlobalMemStoreHeapLowerMark(conf, this.memType == MemoryType.HEAP); // When off heap memstore in use we configure the global off heap space for memstore as bytes // not as % of max memory size. In such case, the lower water mark should be specified using the // key "hbase.regionserver.global.memstore.size.lower.limit" which says % of the global upper // bound and defaults to 95%. In on heap case also specifying this way is ideal. But in the past // we used to take lower bound also as the % of xmx (38% as default). For backward compatibility // for this deprecated config,we will fall back to read that config when new one is missing. // Only for on heap case, do this fallback mechanism. For off heap it makes no sense. // TODO When to get rid of the deprecated config? ie // "hbase.regionserver.global.memstore.lowerLimit". Can get rid of this boolean passing then. this.globalMemStoreLimitLowMark = (long) (this.globalMemStoreLimit * this.globalMemStoreLimitLowMarkPercent); this.globalOnHeapMemstoreLimit = MemorySizeUtil.getOnheapGlobalMemstoreSize(conf); this.globalOnHeapMemstoreLimitLowMark = (long) (this.globalOnHeapMemstoreLimit * this.globalMemStoreLimitLowMarkPercent); }
/** * Check if the server should be considered as bad. Clean the old entries of the list. * * @return true if the server is in the failed servers list */ public synchronized boolean isFailedServer(final InetSocketAddress address) { if (failedServers.isEmpty()) { return false; } final String lookup = address.toString(); final long now = EnvironmentEdgeManager.currentTimeMillis(); // iterate, looking for the search entry and cleaning expired entries Iterator<Pair<Long, String>> it = failedServers.iterator(); while (it.hasNext()) { Pair<Long, String> cur = it.next(); if (cur.getFirst() < now) { it.remove(); } else { if (lookup.equals(cur.getSecond())) { return true; } } } return false; }
private List<CompactionRequest> requestCompactionInternal( final HRegion r, final String why, int p, List<Pair<CompactionRequest, Store>> requests, boolean selectNow) throws IOException { // not a special compaction request, so make our own list List<CompactionRequest> ret = null; if (requests == null) { ret = selectNow ? new ArrayList<CompactionRequest>(r.getStores().size()) : null; for (Store s : r.getStores().values()) { CompactionRequest cr = requestCompactionInternal(r, s, why, p, null, selectNow); if (selectNow) ret.add(cr); } } else { Preconditions.checkArgument(selectNow); // only system requests have selectNow == false ret = new ArrayList<CompactionRequest>(requests.size()); for (Pair<CompactionRequest, Store> pair : requests) { ret.add(requestCompaction(r, pair.getSecond(), why, p, pair.getFirst())); } } return ret; }
/** * Initialize PhoenixPigConfiguration if it is null. Called by {@link #setLocation} and {@link * #getSchema} * * @param location * @param configuration * @throws PigException */ private void initializePhoenixPigConfiguration( final String location, final Configuration configuration) throws PigException { if (this.config != null) { return; } this.config = new PhoenixPigConfiguration(configuration); this.config.setServerName(this.zkQuorum); Pair<String, String> pair = null; try { if (location.startsWith(PHOENIX_TABLE_NAME_SCHEME)) { String tableSchema = location.substring(PHOENIX_TABLE_NAME_SCHEME.length()); final TableSchemaParserFunction parseFunction = new TableSchemaParserFunction(); pair = parseFunction.apply(tableSchema); this.config.setSchemaType(SchemaType.TABLE); } else if (location.startsWith(PHOENIX_QUERY_SCHEME)) { this.selectQuery = location.substring(PHOENIX_QUERY_SCHEME.length()); final QuerySchemaParserFunction queryParseFunction = new QuerySchemaParserFunction(this.config); pair = queryParseFunction.apply(this.selectQuery); config.setSelectStatement(this.selectQuery); this.config.setSchemaType(SchemaType.QUERY); } this.tableName = pair.getFirst(); final String selectedColumns = pair.getSecond(); if (isEmpty(this.tableName) && isEmpty(this.selectQuery)) { printUsage(location); } this.config.setTableName(this.tableName); if (!isEmpty(selectedColumns)) { this.config.setSelectColumns(selectedColumns); } } catch (IllegalArgumentException iae) { printUsage(location); } }
@Override public Result[] call(int timeout) throws IOException { // If the active replica callable was closed somewhere, invoke the RPC to // really close it. In the case of regular scanners, this applies. We make couple // of RPCs to a RegionServer, and when that region is exhausted, we set // the closed flag. Then an RPC is required to actually close the scanner. if (currentScannerCallable != null && currentScannerCallable.closed) { // For closing we target that exact scanner (and not do replica fallback like in // the case of normal reads) if (LOG.isDebugEnabled()) { LOG.debug("Closing scanner " + currentScannerCallable.scannerId); } Result[] r = currentScannerCallable.call(timeout); currentScannerCallable = null; return r; } // We need to do the following: // 1. When a scan goes out to a certain replica (default or not), we need to // continue to hit that until there is a failure. So store the last successfully invoked // replica // 2. We should close the "losing" scanners (scanners other than the ones we hear back // from first) // RegionLocations rl = RpcRetryingCallerWithReadReplicas.getRegionLocations( true, RegionReplicaUtil.DEFAULT_REPLICA_ID, cConnection, tableName, currentScannerCallable.getRow()); // allocate a boundedcompletion pool of some multiple of number of replicas. // We want to accomodate some RPCs for redundant replica scans (but are still in progress) BoundedCompletionService<Pair<Result[], ScannerCallable>> cs = new BoundedCompletionService<Pair<Result[], ScannerCallable>>(pool, rl.size() * 5); List<ExecutionException> exceptions = null; int submitted = 0, completed = 0; AtomicBoolean done = new AtomicBoolean(false); replicaSwitched.set(false); // submit call for the primary replica. submitted += addCallsForCurrentReplica(cs, rl); try { // wait for the timeout to see whether the primary responds back Future<Pair<Result[], ScannerCallable>> f = cs.poll(timeBeforeReplicas, TimeUnit.MICROSECONDS); // Yes, microseconds if (f != null) { Pair<Result[], ScannerCallable> r = f.get(); if (r != null && r.getSecond() != null) { updateCurrentlyServingReplica(r.getSecond(), r.getFirst(), done, pool); } return r == null ? null : r.getFirst(); // great we got a response } } catch (ExecutionException e) { // the primary call failed with RetriesExhaustedException or DoNotRetryIOException // but the secondaries might still succeed. Continue on the replica RPCs. exceptions = new ArrayList<ExecutionException>(rl.size()); exceptions.add(e); completed++; } catch (CancellationException e) { throw new InterruptedIOException(e.getMessage()); } catch (InterruptedException e) { throw new InterruptedIOException(e.getMessage()); } // submit call for the all of the secondaries at once // TODO: this may be an overkill for large region replication submitted += addCallsForOtherReplicas(cs, rl, 0, rl.size() - 1); try { while (completed < submitted) { try { Future<Pair<Result[], ScannerCallable>> f = cs.take(); Pair<Result[], ScannerCallable> r = f.get(); if (r != null && r.getSecond() != null) { updateCurrentlyServingReplica(r.getSecond(), r.getFirst(), done, pool); } return r == null ? null : r.getFirst(); // great we got an answer } catch (ExecutionException e) { // if not cancel or interrupt, wait until all RPC's are done // one of the tasks failed. Save the exception for later. if (exceptions == null) exceptions = new ArrayList<ExecutionException>(rl.size()); exceptions.add(e); completed++; } } } catch (CancellationException e) { throw new InterruptedIOException(e.getMessage()); } catch (InterruptedException e) { throw new InterruptedIOException(e.getMessage()); } finally { // We get there because we were interrupted or because one or more of the // calls succeeded or failed. In all case, we stop all our tasks. cs.cancelAll(true); } if (exceptions != null && !exceptions.isEmpty()) { RpcRetryingCallerWithReadReplicas.throwEnrichedException( exceptions.get(0), retries); // just rethrow the first exception for now. } return null; // unreachable }
@Override public Integer apply(Pair<RequestHeader, Message> headerAndParam) { RequestHeader header = headerAndParam.getFirst(); String methodName = header.getMethodName(); Integer priorityByAnnotation = annotatedQos.get(methodName); if (priorityByAnnotation != null) { return priorityByAnnotation; } Message param = headerAndParam.getSecond(); if (param == null) { return HConstants.NORMAL_QOS; } if (methodName.equalsIgnoreCase("multi") && param instanceof MultiRequest) { // The multi call has its priority set in the header. All calls should work this way but // only this one has been converted so far. No priority == NORMAL_QOS. return header.hasPriority() ? header.getPriority() : HConstants.NORMAL_QOS; } String cls = param.getClass().getName(); Class<? extends Message> rpcArgClass = argumentToClassMap.get(cls); RegionSpecifier regionSpecifier = null; // check whether the request has reference to meta region or now. try { // Check if the param has a region specifier; the pb methods are hasRegion and getRegion if // hasRegion returns true. Not all listed methods have region specifier each time. For // example, the ScanRequest has it on setup but thereafter relies on the scannerid rather than // send the region over every time. Method hasRegion = methodMap.get("hasRegion").get(rpcArgClass); if (hasRegion != null && (Boolean) hasRegion.invoke(param, (Object[]) null)) { Method getRegion = methodMap.get("getRegion").get(rpcArgClass); regionSpecifier = (RegionSpecifier) getRegion.invoke(param, (Object[]) null); HRegion region = hRegionServer.getRegion(regionSpecifier); if (region.getRegionInfo().isMetaTable()) { if (LOG.isTraceEnabled()) { LOG.trace("High priority because region=" + region.getRegionNameAsString()); } return HConstants.HIGH_QOS; } } } catch (Exception ex) { // Not good throwing an exception out of here, a runtime anyways. Let the query go into the // server and have it throw the exception if still an issue. Just mark it normal priority. if (LOG.isTraceEnabled()) LOG.trace("Marking normal priority after getting exception=" + ex); return HConstants.NORMAL_QOS; } if (methodName.equals("scan")) { // scanner methods... ScanRequest request = (ScanRequest) param; if (!request.hasScannerId()) { return HConstants.NORMAL_QOS; } RegionScanner scanner = hRegionServer.getScanner(request.getScannerId()); if (scanner != null && scanner.getRegionInfo().isMetaRegion()) { if (LOG.isTraceEnabled()) { // Scanner requests are small in size so TextFormat version should not overwhelm log. LOG.trace("High priority scanner request " + TextFormat.shortDebugString(request)); } return HConstants.HIGH_QOS; } } return HConstants.NORMAL_QOS; }
public MutationState createTable(CreateTableStatement statement, byte[][] splits) throws SQLException { PTableType tableType = statement.getTableType(); boolean isView = tableType == PTableType.VIEW; if (isView && !statement.getProps().isEmpty()) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.VIEW_WITH_TABLE_CONFIG) .build() .buildException(); } connection.rollback(); boolean wasAutoCommit = connection.getAutoCommit(); try { connection.setAutoCommit(false); TableName tableNameNode = statement.getTableName(); String schemaName = tableNameNode.getSchemaName(); String tableName = tableNameNode.getTableName(); PrimaryKeyConstraint pkConstraint = statement.getPrimaryKeyConstraint(); String pkName = null; Set<String> pkColumns = Collections.<String>emptySet(); Iterator<String> pkColumnsIterator = Iterators.emptyIterator(); if (pkConstraint != null) { pkColumns = pkConstraint.getColumnNames(); pkColumnsIterator = pkColumns.iterator(); pkName = pkConstraint.getName(); } List<ColumnDef> colDefs = statement.getColumnDefs(); List<PColumn> columns = Lists.newArrayListWithExpectedSize(colDefs.size()); PreparedStatement colUpsert = connection.prepareStatement(INSERT_COLUMN); int columnOrdinal = 0; Map<String, PName> familyNames = Maps.newLinkedHashMap(); boolean isPK = false; for (ColumnDef colDef : colDefs) { if (colDef.isPK()) { if (isPK) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.PRIMARY_KEY_ALREADY_EXISTS) .setColumnName(colDef.getColumnDefName().getColumnName().getName()) .build() .buildException(); } isPK = true; } PColumn column = newColumn(columnOrdinal++, colDef, pkConstraint); if (SchemaUtil.isPKColumn(column)) { // TODO: remove this constraint? if (!pkColumns.isEmpty() && !column.getName().getString().equals(pkColumnsIterator.next())) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.PRIMARY_KEY_OUT_OF_ORDER) .setSchemaName(schemaName) .setTableName(tableName) .setColumnName(column.getName().getString()) .build() .buildException(); } } columns.add(column); if (colDef.getDataType() == PDataType.BINARY && colDefs.size() > 1) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.BINARY_IN_ROW_KEY) .setSchemaName(schemaName) .setTableName(tableName) .setColumnName(column.getName().getString()) .build() .buildException(); } if (column.getFamilyName() != null) { familyNames.put(column.getFamilyName().getString(), column.getFamilyName()); } } if (!isPK && pkColumns.isEmpty()) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.PRIMARY_KEY_MISSING) .setSchemaName(schemaName) .setTableName(tableName) .build() .buildException(); } List<Pair<byte[], Map<String, Object>>> familyPropList = Lists.newArrayListWithExpectedSize(familyNames.size()); Map<String, Object> commonFamilyProps = Collections.emptyMap(); Map<String, Object> tableProps = Collections.emptyMap(); if (!statement.getProps().isEmpty()) { if (statement.isView()) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.VIEW_WITH_PROPERTIES) .build() .buildException(); } for (String familyName : statement.getProps().keySet()) { if (!familyName.equals(QueryConstants.ALL_FAMILY_PROPERTIES_KEY)) { if (familyNames.get(familyName) == null) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.PROPERTIES_FOR_FAMILY) .setFamilyName(familyName) .build() .buildException(); } } } commonFamilyProps = Maps.newHashMapWithExpectedSize(statement.getProps().size()); tableProps = Maps.newHashMapWithExpectedSize(statement.getProps().size()); Collection<Pair<String, Object>> props = statement.getProps().get(QueryConstants.ALL_FAMILY_PROPERTIES_KEY); // Somewhat hacky way of determining if property is for HColumnDescriptor or // HTableDescriptor HColumnDescriptor defaultDescriptor = new HColumnDescriptor(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES); for (Pair<String, Object> prop : props) { if (defaultDescriptor.getValue(prop.getFirst()) != null) { commonFamilyProps.put(prop.getFirst(), prop.getSecond()); } else { tableProps.put(prop.getFirst(), prop.getSecond()); } } } for (PName familyName : familyNames.values()) { Collection<Pair<String, Object>> props = statement.getProps().get(familyName.getString()); if (props.isEmpty()) { familyPropList.add( new Pair<byte[], Map<String, Object>>(familyName.getBytes(), commonFamilyProps)); } else { Map<String, Object> combinedFamilyProps = Maps.newHashMapWithExpectedSize(props.size() + commonFamilyProps.size()); combinedFamilyProps.putAll(commonFamilyProps); for (Pair<String, Object> prop : props) { combinedFamilyProps.put(prop.getFirst(), prop.getSecond()); } familyPropList.add( new Pair<byte[], Map<String, Object>>(familyName.getBytes(), combinedFamilyProps)); } } // Bootstrapping for our SYSTEM.TABLE that creates itself before it exists if (tableType == PTableType.SYSTEM) { PTable table = new PTableImpl( new PNameImpl(tableName), tableType, MetaDataProtocol.MIN_TABLE_TIMESTAMP, 0, QueryConstants.SYSTEM_TABLE_PK_NAME, null, columns); connection.addTable(schemaName, table); } for (PColumn column : columns) { addColumnMutation(schemaName, tableName, column, colUpsert); } Integer saltBucketNum = (Integer) tableProps.remove(PhoenixDatabaseMetaData.SALT_BUCKETS); if (saltBucketNum != null && (saltBucketNum <= 0 || saltBucketNum > SaltingUtil.MAX_BUCKET_NUM)) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.INVALID_BUCKET_NUM) .build() .buildException(); } PreparedStatement tableUpsert = connection.prepareStatement(CREATE_TABLE); tableUpsert.setString(1, schemaName); tableUpsert.setString(2, tableName); tableUpsert.setString(3, tableType.getSerializedValue()); tableUpsert.setInt(4, 0); tableUpsert.setInt(5, columnOrdinal); if (saltBucketNum != null) { tableUpsert.setInt(6, saltBucketNum); } else { tableUpsert.setNull(6, Types.INTEGER); } tableUpsert.setString(7, pkName); tableUpsert.execute(); final List<Mutation> tableMetaData = connection.getMutationState().toMutations(); connection.rollback(); MetaDataMutationResult result = connection .getQueryServices() .createTable(tableMetaData, isView, tableProps, familyPropList, splits); MutationCode code = result.getMutationCode(); switch (code) { case TABLE_ALREADY_EXISTS: connection.addTable(schemaName, result.getTable()); if (!statement.ifNotExists()) { throw new TableAlreadyExistsException(schemaName, tableName); } break; case NEWER_TABLE_FOUND: // TODO: add table if in result? throw new NewerTableAlreadyExistsException(schemaName, tableName); case UNALLOWED_TABLE_MUTATION: throw new SQLExceptionInfo.Builder(SQLExceptionCode.CANNOT_MUTATE_TABLE) .setSchemaName(schemaName) .setTableName(tableName) .build() .buildException(); default: PTable table = new PTableImpl( new PNameImpl(tableName), tableType, result.getMutationTime(), 0, pkName, saltBucketNum, columns); connection.addTable(schemaName, table); if (tableType == PTableType.USER) { connection.setAutoCommit(true); // Delete everything in the column. You'll still be able to do queries at earlier // timestamps Long scn = connection.getSCN(); long ts = (scn == null ? result.getMutationTime() : scn); PSchema schema = new PSchemaImpl( schemaName, ImmutableMap.<String, PTable>of(table.getName().getString(), table)); TableRef tableRef = new TableRef(null, table, schema, ts); byte[] emptyCF = SchemaUtil.getEmptyColumnFamily(table.getColumnFamilies()); MutationPlan plan = new PostDDLCompiler(connection).compile(tableRef, emptyCF, null, ts); return connection.getQueryServices().updateData(plan); } break; } return new MutationState(0, connection); } finally { connection.setAutoCommit(wasAutoCommit); } }
private void handleScvf(SingleColumnValueFilter scvf) { ValuePartition vp = null; if (scvf instanceof SingleColumnValuePartitionFilter) { vp = ((SingleColumnValuePartitionFilter) scvf).getValuePartition(); } Column column = new Column(scvf.getFamily(), scvf.getQualifier(), vp); Pair<Value, Value> pair = colWithOperators.get(column); if (pair == null) { pair = new Pair<Value, Value>(); // The first operator should be set here pair.setFirst(new Value(scvf.getOperator(), scvf.getComparator().getValue(), scvf)); colWithOperators.put(column, pair); } else { if (pair.getFirst() != null && pair.getSecond() == null) { // TODO As Anoop said we may have to check the Value type also.. // We can not compare and validate this way. btw "a" and "K". // Only in case of Numeric col type we can have this check. byte[] curBoundValue = scvf.getComparator().getValue(); byte[] prevBoundValue = pair.getFirst().getValue(); int result = Bytes.compareTo(prevBoundValue, curBoundValue); CompareOp curBoundOperator = scvf.getOperator(); CompareOp prevBoundOperator = pair.getFirst().getOperator(); switch (curBoundOperator) { case GREATER: case GREATER_OR_EQUAL: if (prevBoundOperator == CompareOp.GREATER || prevBoundOperator == CompareOp.GREATER_OR_EQUAL) { LOG.warn("Wrong usage. It should be < > || > <. Cannot be > >"); if (result > 1) { pair.setFirst(new Value(curBoundOperator, curBoundValue, scvf)); } pair.setSecond(null); } else if (prevBoundOperator == CompareOp.LESS || prevBoundOperator == CompareOp.LESS_OR_EQUAL) { if (result < 1) { LOG.warn("Possible wrong usage as there cannot be a value < 10 and > 20"); pair.setFirst(null); pair.setSecond(null); } else { pair.setSecond(new Value(curBoundOperator, curBoundValue, scvf)); } } else if (prevBoundOperator == CompareOp.EQUAL) { LOG.warn("Use the equal operator and ignore the current one"); pair.setSecond(null); } break; case LESS: case LESS_OR_EQUAL: if (prevBoundOperator == CompareOp.LESS || prevBoundOperator == CompareOp.LESS_OR_EQUAL) { LOG.warn("Wrong usage. It should be < > || > <. Cannot be > >"); if (result < 1) { pair.setFirst(new Value(curBoundOperator, curBoundValue, scvf)); } pair.setSecond(null); } else if (prevBoundOperator == CompareOp.GREATER || prevBoundOperator == CompareOp.GREATER_OR_EQUAL) { if (result > 1) { LOG.warn("Possible wrong usage as there cannot be a value < 10 and > 20"); pair.setFirst(null); pair.setSecond(null); } else { pair.setSecond(new Value(curBoundOperator, curBoundValue, scvf)); } } else if (prevBoundOperator == CompareOp.EQUAL) { LOG.warn("Use the EQUAL operator only and ignore the current one."); pair.setSecond(null); } break; case EQUAL: // For equal condition give priority to equals only.. // If the prevOperator is also == and the current is also == // take the second one.(Currently) if (prevBoundOperator == CompareOp.LESS || prevBoundOperator == CompareOp.LESS_OR_EQUAL || prevBoundOperator == CompareOp.EQUAL || prevBoundOperator == CompareOp.GREATER || prevBoundOperator == CompareOp.GREATER_OR_EQUAL) { pair.setFirst(new Value(curBoundOperator, curBoundValue, scvf)); pair.setSecond(null); } break; case NOT_EQUAL: case NO_OP: // Need to check this break; } } else { LOG.warn( "Am getting an extra comparison coming for the same col family." + "I cannot have 3 conditions on the same column"); pair.setFirst(null); pair.setSecond(null); } } }
@Test public void testAllocateByteBuffToReadInto() throws Exception { int maxBuffersInPool = 10; ByteBufferPool pool = new ByteBufferPool(6 * 1024, maxBuffersInPool); initPoolWithAllBuffers(pool, maxBuffersInPool); ByteBuff buff = null; Pair<ByteBuff, CallCleanup> pair; // When the request size is less than 1/6th of the pool buffer size. We should use on demand // created on heap Buffer pair = RpcServer.allocateByteBuffToReadInto(pool, RpcServer.getMinSizeForReservoirUse(pool), 200); buff = pair.getFirst(); assertTrue(buff.hasArray()); assertEquals(maxBuffersInPool, pool.getQueueSize()); assertNull(pair.getSecond()); // When the request size is > 1/6th of the pool buffer size. pair = RpcServer.allocateByteBuffToReadInto(pool, RpcServer.getMinSizeForReservoirUse(pool), 1024); buff = pair.getFirst(); assertFalse(buff.hasArray()); assertEquals(maxBuffersInPool - 1, pool.getQueueSize()); assertNotNull(pair.getSecond()); pair.getSecond().run(); // CallCleanup#run should put back the BB to pool. assertEquals(maxBuffersInPool, pool.getQueueSize()); // Request size> pool buffer size pair = RpcServer.allocateByteBuffToReadInto( pool, RpcServer.getMinSizeForReservoirUse(pool), 7 * 1024); buff = pair.getFirst(); assertFalse(buff.hasArray()); assertTrue(buff instanceof MultiByteBuff); ByteBuffer[] bbs = ((MultiByteBuff) buff).getEnclosingByteBuffers(); assertEquals(2, bbs.length); assertTrue(bbs[0].isDirect()); assertTrue(bbs[1].isDirect()); assertEquals(6 * 1024, bbs[0].limit()); assertEquals(1024, bbs[1].limit()); assertEquals(maxBuffersInPool - 2, pool.getQueueSize()); assertNotNull(pair.getSecond()); pair.getSecond().run(); // CallCleanup#run should put back the BB to pool. assertEquals(maxBuffersInPool, pool.getQueueSize()); pair = RpcServer.allocateByteBuffToReadInto( pool, RpcServer.getMinSizeForReservoirUse(pool), 6 * 1024 + 200); buff = pair.getFirst(); assertFalse(buff.hasArray()); assertTrue(buff instanceof MultiByteBuff); bbs = ((MultiByteBuff) buff).getEnclosingByteBuffers(); assertEquals(2, bbs.length); assertTrue(bbs[0].isDirect()); assertFalse(bbs[1].isDirect()); assertEquals(6 * 1024, bbs[0].limit()); assertEquals(200, bbs[1].limit()); assertEquals(maxBuffersInPool - 1, pool.getQueueSize()); assertNotNull(pair.getSecond()); pair.getSecond().run(); // CallCleanup#run should put back the BB to pool. assertEquals(maxBuffersInPool, pool.getQueueSize()); ByteBuffer[] buffers = new ByteBuffer[maxBuffersInPool - 1]; for (int i = 0; i < maxBuffersInPool - 1; i++) { buffers[i] = pool.getBuffer(); } pair = RpcServer.allocateByteBuffToReadInto( pool, RpcServer.getMinSizeForReservoirUse(pool), 20 * 1024); buff = pair.getFirst(); assertFalse(buff.hasArray()); assertTrue(buff instanceof MultiByteBuff); bbs = ((MultiByteBuff) buff).getEnclosingByteBuffers(); assertEquals(2, bbs.length); assertTrue(bbs[0].isDirect()); assertFalse(bbs[1].isDirect()); assertEquals(6 * 1024, bbs[0].limit()); assertEquals(14 * 1024, bbs[1].limit()); assertEquals(0, pool.getQueueSize()); assertNotNull(pair.getSecond()); pair.getSecond().run(); // CallCleanup#run should put back the BB to pool. assertEquals(1, pool.getQueueSize()); pool.getBuffer(); pair = RpcServer.allocateByteBuffToReadInto( pool, RpcServer.getMinSizeForReservoirUse(pool), 7 * 1024); buff = pair.getFirst(); assertTrue(buff.hasArray()); assertTrue(buff instanceof SingleByteBuff); assertEquals(7 * 1024, ((SingleByteBuff) buff).getEnclosingByteBuffer().limit()); assertNull(pair.getSecond()); }
@VisibleForTesting public PairOfSameType<Region> stepsBeforePONR( final Server server, final RegionServerServices services, boolean testing) throws IOException { if (services != null && !services.reportRegionStateTransition( TransitionCode.READY_TO_SPLIT, parent.getRegionInfo(), hri_a, hri_b)) { throw new IOException( "Failed to get ok from master to split " + parent.getRegionInfo().getRegionNameAsString()); } transition(SplitTransactionPhase.SET_SPLITTING); this.parent.getRegionFileSystem().createSplitsDir(); transition(SplitTransactionPhase.CREATE_SPLIT_DIR); Map<byte[], List<StoreFile>> hstoreFilesToSplit = null; Exception exceptionToThrow = null; try { hstoreFilesToSplit = this.parent.close(false); } catch (Exception e) { exceptionToThrow = e; } if (exceptionToThrow == null && hstoreFilesToSplit == null) { // The region was closed by a concurrent thread. We can't continue // with the split, instead we must just abandon the split. If we // reopen or split this could cause problems because the region has // probably already been moved to a different server, or is in the // process of moving to a different server. exceptionToThrow = closedByOtherException; } if (exceptionToThrow != closedByOtherException) { transition(SplitTransactionPhase.CLOSED_PARENT_REGION); } if (exceptionToThrow != null) { if (exceptionToThrow instanceof IOException) throw (IOException) exceptionToThrow; throw new IOException(exceptionToThrow); } if (!testing) { services.removeFromOnlineRegions(this.parent, null); } transition(SplitTransactionPhase.OFFLINED_PARENT); // TODO: If splitStoreFiles were multithreaded would we complete steps in // less elapsed time? St.Ack 20100920 // // splitStoreFiles creates daughter region dirs under the parent splits dir // Nothing to unroll here if failure -- clean up of CREATE_SPLIT_DIR will // clean this up. Pair<Integer, Integer> expectedReferences = splitStoreFiles(hstoreFilesToSplit); // Log to the journal that we are creating region A, the first daughter // region. We could fail halfway through. If we do, we could have left // stuff in fs that needs cleanup -- a storefile or two. Thats why we // add entry to journal BEFORE rather than AFTER the change. transition(SplitTransactionPhase.STARTED_REGION_A_CREATION); assertReferenceFileCount( expectedReferences.getFirst(), this.parent.getRegionFileSystem().getSplitsDir(this.hri_a)); HRegion a = this.parent.createDaughterRegionFromSplits(this.hri_a); assertReferenceFileCount( expectedReferences.getFirst(), new Path(this.parent.getRegionFileSystem().getTableDir(), this.hri_a.getEncodedName())); // Ditto transition(SplitTransactionPhase.STARTED_REGION_B_CREATION); assertReferenceFileCount( expectedReferences.getSecond(), this.parent.getRegionFileSystem().getSplitsDir(this.hri_b)); HRegion b = this.parent.createDaughterRegionFromSplits(this.hri_b); assertReferenceFileCount( expectedReferences.getSecond(), new Path(this.parent.getRegionFileSystem().getTableDir(), this.hri_b.getEncodedName())); return new PairOfSameType<Region>(a, b); }
/** * Reads the location of the specified region * * @param catalogTracker * @param regionName region whose location we are after * @return location of region as a {@link ServerName} or null if not found * @throws IOException */ static ServerName readRegionLocation(CatalogTracker catalogTracker, byte[] regionName) throws IOException { Pair<HRegionInfo, ServerName> pair = getRegion(catalogTracker, regionName); return (pair == null || pair.getSecond() == null) ? null : pair.getSecond(); }
/** * Creates reference files for top and bottom half of the * * @param hstoreFilesToSplit map of store files to create half file references for. * @return the number of reference files that were created. * @throws IOException */ private Pair<Integer, Integer> splitStoreFiles( final Map<byte[], List<StoreFile>> hstoreFilesToSplit) throws IOException { if (hstoreFilesToSplit == null) { // Could be null because close didn't succeed -- for now consider it fatal throw new IOException("Close returned empty list of StoreFiles"); } // The following code sets up a thread pool executor with as many slots as // there's files to split. It then fires up everything, waits for // completion and finally checks for any exception int nbFiles = 0; for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesToSplit.entrySet()) { nbFiles += entry.getValue().size(); } if (nbFiles == 0) { // no file needs to be splitted. return new Pair<Integer, Integer>(0, 0); } // Default max #threads to use is the smaller of table's configured number of blocking store // files or the available number of logical cores. int defMaxThreads = Math.min( parent.conf.getInt( HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT), Runtime.getRuntime().availableProcessors()); // Max #threads is the smaller of the number of storefiles or the default max determined above. int maxThreads = Math.min(parent.conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, defMaxThreads), nbFiles); LOG.info( "Preparing to split " + nbFiles + " storefiles for region " + this.parent + " using " + maxThreads + " threads"); ThreadFactoryBuilder builder = new ThreadFactoryBuilder(); builder.setNameFormat("StoreFileSplitter-%1$d"); ThreadFactory factory = builder.build(); ThreadPoolExecutor threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(maxThreads, factory); List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles); // Split each store file. for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesToSplit.entrySet()) { for (StoreFile sf : entry.getValue()) { StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf); futures.add(threadPool.submit(sfs)); } } // Shutdown the pool threadPool.shutdown(); // Wait for all the tasks to finish try { boolean stillRunning = !threadPool.awaitTermination(this.fileSplitTimeout, TimeUnit.MILLISECONDS); if (stillRunning) { threadPool.shutdownNow(); // wait for the thread to shutdown completely. while (!threadPool.isTerminated()) { Thread.sleep(50); } throw new IOException( "Took too long to split the" + " files and create the references, aborting split"); } } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } int created_a = 0; int created_b = 0; // Look for any exception for (Future<Pair<Path, Path>> future : futures) { try { Pair<Path, Path> p = future.get(); created_a += p.getFirst() != null ? 1 : 0; created_b += p.getSecond() != null ? 1 : 0; } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } catch (ExecutionException e) { throw new IOException(e); } } if (LOG.isDebugEnabled()) { LOG.debug( "Split storefiles for region " + this.parent + " Daughter A: " + created_a + " storefiles, Daughter B: " + created_b + " storefiles."); } return new Pair<Integer, Integer>(created_a, created_b); }
/** * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)} call * should get implemented for each snapshot flavor. */ @Override @edu.umd.cs.findbugs.annotations.SuppressWarnings( value = "REC_CATCH_EXCEPTION", justification = "Intentional") public void process() { String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " " + eventType + " on table " + snapshotTable; LOG.info(msg); status.setStatus(msg); try { // If regions move after this meta scan, the region specific snapshot should fail, triggering // an external exception that gets captured here. // write down the snapshot info in the working directory SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs); snapshotManifest.addTableDescriptor(this.htd); monitor.rethrowException(); List<Pair<HRegionInfo, ServerName>> regionsAndLocations; if (TableName.META_TABLE_NAME.equals(snapshotTable)) { regionsAndLocations = new MetaTableLocator().getMetaRegionsAndLocations(server.getZooKeeper()); } else { regionsAndLocations = MetaTableAccessor.getTableRegionsAndLocations( server.getConnection(), snapshotTable, false); } // run the snapshot snapshotRegions(regionsAndLocations); monitor.rethrowException(); // extract each pair to separate lists Set<String> serverNames = new HashSet<String>(); for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) { if (p != null && p.getFirst() != null && p.getSecond() != null) { HRegionInfo hri = p.getFirst(); if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue; serverNames.add(p.getSecond().toString()); } } // flush the in-memory state, and write the single manifest status.setStatus("Consolidate snapshot: " + snapshot.getName()); snapshotManifest.consolidate(); // verify the snapshot is valid status.setStatus("Verifying snapshot: " + snapshot.getName()); verifier.verifySnapshot(this.workingDir, serverNames); // complete the snapshot, atomically moving from tmp to .snapshot dir. completeSnapshot(this.snapshotDir, this.workingDir, this.fs); msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed"; status.markComplete(msg); LOG.info(msg); metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime()); } catch (Exception e) { // FindBugs: REC_CATCH_EXCEPTION status.abort( "Failed to complete snapshot " + snapshot.getName() + " on table " + snapshotTable + " because " + e.getMessage()); String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " due to exception:" + e.getMessage(); LOG.error(reason, e); ForeignException ee = new ForeignException(reason, e); monitor.receive(ee); // need to mark this completed to close off and allow cleanup to happen. cancel(reason); } finally { LOG.debug("Launching cleanup of working dir:" + workingDir); try { // if the working dir is still present, the snapshot has failed. it is present we delete // it. if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) { LOG.error("Couldn't delete snapshot working directory:" + workingDir); } } catch (IOException e) { LOG.error("Couldn't delete snapshot working directory:" + workingDir); } releaseTableLock(); } }