/** * Use the Catalog to set the workers for fragments that have scans, and verify that the workers * are consistent with existing constraints. * * @see #assignWorkersToFragments(List, ConstructArgs) * @param fragments the fragments of the plan * @param args other arguments necessary for query construction * @throws CatalogException if there is an error getting information from the Catalog */ private static void setAndVerifyScans( final List<PlanFragmentEncoding> fragments, final ConstructArgs args) throws CatalogException { Server server = args.getServer(); for (PlanFragmentEncoding fragment : fragments) { for (OperatorEncoding<?> operator : fragment.operators) { Set<Integer> scanWorkers; String scanRelation; if (operator instanceof TableScanEncoding) { TableScanEncoding scan = ((TableScanEncoding) operator); scanRelation = scan.relationKey.toString(); scanWorkers = server.getWorkersForRelation(scan.relationKey, scan.storedRelationId); } else if (operator instanceof TempTableScanEncoding) { TempTableScanEncoding scan = ((TempTableScanEncoding) operator); scanRelation = "temporary relation " + scan.table; scanWorkers = server .getQueryManager() .getWorkersForTempRelation( args.getQueryId(), RelationKey.ofTemp(args.getQueryId(), scan.table)); } else { continue; } Preconditions.checkArgument( scanWorkers != null, "Unable to find workers that store %s", scanRelation); /* * Note: the current assumption is that all the partitions need to be scanned. This will not be true if we have * data replication, or allow to scan only a subset of the partitions. Revise if needed. */ setOrVerifyFragmentWorkers(fragment, scanWorkers, "Setting workers for " + scanRelation); } } }
/** * Given an abstract execution plan, assign the workers to the fragments. * * <p>This assignment follows the following five rules, in precedence order: * * <ol> * <li>Obey user-overrides of fragment workers. * <li>Fragments that scan tables must use the workers that contain the data. * <li>Edge constraints between fragments. E.g., a {@link LocalMultiwayProducerEncoding} must * use the same set of workers as its consumer. * <li>Singleton constraints: Fragments with a {@link CollectConsumerEncoding} or a {@link * SingletonEncoding} must run on a single worker. If none is still set, choose an arbitrary * worker. * <li>Unspecified: Any fragments that still have unspecified worker sets will use all workers * in the cluster. * </ol> * * @param fragments * @param args * @throws CatalogException if there is an error getting information about existing relations from * the catalog */ private static void assignWorkersToFragments( final List<PlanFragmentEncoding> fragments, final ConstructArgs args) throws CatalogException { /* 1. Honor user overrides. Note this is unchecked, but we may find constraint violations later. */ for (PlanFragmentEncoding fragment : fragments) { if (fragment.overrideWorkers != null && fragment.overrideWorkers.size() > 0) { /* The workers are set in the plan. */ fragment.workers = fragment.overrideWorkers; } } /* 2. Use scans to set workers, and verify constraints. */ setAndVerifyScans(fragments, args); /* 3. Verify and propagate worker assignments using LocalMultiwayProducer/Consumer constraints. */ verifyAndPropagateLocalEdgeConstraints(fragments); /* 4. Use singletons to set worker, and verify constraints. */ setAndVerifySingletonConstraints(fragments, args); /* 5. Again, verify and propagate worker assignments using LocalMultiwayProducer/Consumer constraints. */ verifyAndPropagateLocalEdgeConstraints(fragments); /* Last-1. For all remaining fragments, fill them in with all workers. */ Server server = args.getServer(); ImmutableList<Integer> allWorkers = ImmutableList.copyOf(server.getAliveWorkers()); for (PlanFragmentEncoding fragment : fragments) { if (fragment.workers == null) { fragment.workers = allWorkers; } } // We don't need to verify and propagate LocalMultiwayProducer/Consumer constraints again since // all the new ones // have all workers. /* Fill in the #realOperatorIDs and the #realWorkerIDs fields for the producers and consumers. */ fillInRealOperatorAndWorkerIDs(fragments); }
/** * Builds the query plan to update the {@link Server}'s master catalog with the number of tuples * in every relation written by a subquery. The query plan is basically "SELECT RelationKey, * COUNT(*)" -> Collect at master -> "SELECT RelationKey, SUM(counts)". * * @param relationsWritten the metadata about which relations were written during the execution of * this subquery. * @param server the server on which the catalog will be updated * @return the query plan to update the master's catalog with the new number of tuples for all * written relations. */ public static SubQuery getRelationTupleUpdateSubQuery( final Map<RelationKey, RelationWriteMetadata> relationsWritten, final Server server) { ExchangePairID collectId = ExchangePairID.newID(); Schema schema = Schema.ofFields( "userName", Type.STRING_TYPE, "programName", Type.STRING_TYPE, "relationName", Type.STRING_TYPE, "tupleCount", Type.LONG_TYPE); String dbms = server.getDBMS(); Preconditions.checkState( dbms != null, "Server must have a configured DBMS environment variable"); /* * Worker plans: for each relation, create a {@link DbQueryScan} to get the count, an {@link Apply} to add the * {@link RelationKey}, then a {@link CollectProducer} to send the count to the master. */ Map<Integer, SubQueryPlan> workerPlans = Maps.newHashMap(); for (RelationWriteMetadata meta : relationsWritten.values()) { Set<Integer> workers = meta.getWorkers(); RelationKey relation = meta.getRelationKey(); for (Integer worker : workers) { DbQueryScan localCount = new DbQueryScan( "SELECT COUNT(*) FROM " + relation.toString(dbms), Schema.ofFields("tupleCount", Type.LONG_TYPE)); List<Expression> expressions = ImmutableList.of( new Expression( schema.getColumnName(0), new ConstantExpression(relation.getUserName())), new Expression( schema.getColumnName(1), new ConstantExpression(relation.getProgramName())), new Expression( schema.getColumnName(2), new ConstantExpression(relation.getRelationName())), new Expression(schema.getColumnName(3), new VariableExpression(0))); Apply addRelationName = new Apply(localCount, expressions); CollectProducer producer = new CollectProducer(addRelationName, collectId, MyriaConstants.MASTER_ID); if (!workerPlans.containsKey(worker)) { workerPlans.put(worker, new SubQueryPlan(producer)); } else { workerPlans.get(worker).addRootOp(producer); } } } /* Master plan: collect, sum, insert the updates. */ CollectConsumer consumer = new CollectConsumer(schema, collectId, workerPlans.keySet()); MultiGroupByAggregate aggCounts = new MultiGroupByAggregate( consumer, new int[] {0, 1, 2}, new SingleColumnAggregatorFactory(3, AggregationOp.SUM)); UpdateCatalog catalog = new UpdateCatalog(aggCounts, server); SubQueryPlan masterPlan = new SubQueryPlan(catalog); return new SubQuery(masterPlan, workerPlans); }