Exemplo n.º 1
0
  /**
   * Use the Catalog to set the workers for fragments that have scans, and verify that the workers
   * are consistent with existing constraints.
   *
   * @see #assignWorkersToFragments(List, ConstructArgs)
   * @param fragments the fragments of the plan
   * @param args other arguments necessary for query construction
   * @throws CatalogException if there is an error getting information from the Catalog
   */
  private static void setAndVerifyScans(
      final List<PlanFragmentEncoding> fragments, final ConstructArgs args)
      throws CatalogException {
    Server server = args.getServer();

    for (PlanFragmentEncoding fragment : fragments) {
      for (OperatorEncoding<?> operator : fragment.operators) {
        Set<Integer> scanWorkers;
        String scanRelation;

        if (operator instanceof TableScanEncoding) {
          TableScanEncoding scan = ((TableScanEncoding) operator);
          scanRelation = scan.relationKey.toString();
          scanWorkers = server.getWorkersForRelation(scan.relationKey, scan.storedRelationId);
        } else if (operator instanceof TempTableScanEncoding) {
          TempTableScanEncoding scan = ((TempTableScanEncoding) operator);
          scanRelation = "temporary relation " + scan.table;
          scanWorkers =
              server
                  .getQueryManager()
                  .getWorkersForTempRelation(
                      args.getQueryId(), RelationKey.ofTemp(args.getQueryId(), scan.table));
        } else {
          continue;
        }
        Preconditions.checkArgument(
            scanWorkers != null, "Unable to find workers that store %s", scanRelation);
        /*
         * Note: the current assumption is that all the partitions need to be scanned. This will not be true if we have
         * data replication, or allow to scan only a subset of the partitions. Revise if needed.
         */
        setOrVerifyFragmentWorkers(fragment, scanWorkers, "Setting workers for " + scanRelation);
      }
    }
  }
Exemplo n.º 2
0
  /**
   * Given an abstract execution plan, assign the workers to the fragments.
   *
   * <p>This assignment follows the following five rules, in precedence order:
   *
   * <ol>
   *   <li>Obey user-overrides of fragment workers.
   *   <li>Fragments that scan tables must use the workers that contain the data.
   *   <li>Edge constraints between fragments. E.g., a {@link LocalMultiwayProducerEncoding} must
   *       use the same set of workers as its consumer.
   *   <li>Singleton constraints: Fragments with a {@link CollectConsumerEncoding} or a {@link
   *       SingletonEncoding} must run on a single worker. If none is still set, choose an arbitrary
   *       worker.
   *   <li>Unspecified: Any fragments that still have unspecified worker sets will use all workers
   *       in the cluster.
   * </ol>
   *
   * @param fragments
   * @param args
   * @throws CatalogException if there is an error getting information about existing relations from
   *     the catalog
   */
  private static void assignWorkersToFragments(
      final List<PlanFragmentEncoding> fragments, final ConstructArgs args)
      throws CatalogException {

    /* 1. Honor user overrides. Note this is unchecked, but we may find constraint violations later. */
    for (PlanFragmentEncoding fragment : fragments) {
      if (fragment.overrideWorkers != null && fragment.overrideWorkers.size() > 0) {
        /* The workers are set in the plan. */
        fragment.workers = fragment.overrideWorkers;
      }
    }

    /* 2. Use scans to set workers, and verify constraints. */
    setAndVerifyScans(fragments, args);

    /* 3. Verify and propagate worker assignments using LocalMultiwayProducer/Consumer constraints. */
    verifyAndPropagateLocalEdgeConstraints(fragments);

    /* 4. Use singletons to set worker, and verify constraints. */
    setAndVerifySingletonConstraints(fragments, args);

    /* 5. Again, verify and propagate worker assignments using LocalMultiwayProducer/Consumer constraints. */
    verifyAndPropagateLocalEdgeConstraints(fragments);

    /* Last-1. For all remaining fragments, fill them in with all workers. */
    Server server = args.getServer();
    ImmutableList<Integer> allWorkers = ImmutableList.copyOf(server.getAliveWorkers());
    for (PlanFragmentEncoding fragment : fragments) {
      if (fragment.workers == null) {
        fragment.workers = allWorkers;
      }
    }
    // We don't need to verify and propagate LocalMultiwayProducer/Consumer constraints again since
    // all the new ones
    // have all workers.

    /* Fill in the #realOperatorIDs and the #realWorkerIDs fields for the producers and consumers. */
    fillInRealOperatorAndWorkerIDs(fragments);
  }
Exemplo n.º 3
0
  /**
   * Builds the query plan to update the {@link Server}'s master catalog with the number of tuples
   * in every relation written by a subquery. The query plan is basically "SELECT RelationKey,
   * COUNT(*)" -> Collect at master -> "SELECT RelationKey, SUM(counts)".
   *
   * @param relationsWritten the metadata about which relations were written during the execution of
   *     this subquery.
   * @param server the server on which the catalog will be updated
   * @return the query plan to update the master's catalog with the new number of tuples for all
   *     written relations.
   */
  public static SubQuery getRelationTupleUpdateSubQuery(
      final Map<RelationKey, RelationWriteMetadata> relationsWritten, final Server server) {
    ExchangePairID collectId = ExchangePairID.newID();
    Schema schema =
        Schema.ofFields(
            "userName",
            Type.STRING_TYPE,
            "programName",
            Type.STRING_TYPE,
            "relationName",
            Type.STRING_TYPE,
            "tupleCount",
            Type.LONG_TYPE);

    String dbms = server.getDBMS();
    Preconditions.checkState(
        dbms != null, "Server must have a configured DBMS environment variable");

    /*
     * Worker plans: for each relation, create a {@link DbQueryScan} to get the count, an {@link Apply} to add the
     * {@link RelationKey}, then a {@link CollectProducer} to send the count to the master.
     */
    Map<Integer, SubQueryPlan> workerPlans = Maps.newHashMap();
    for (RelationWriteMetadata meta : relationsWritten.values()) {
      Set<Integer> workers = meta.getWorkers();
      RelationKey relation = meta.getRelationKey();
      for (Integer worker : workers) {
        DbQueryScan localCount =
            new DbQueryScan(
                "SELECT COUNT(*) FROM " + relation.toString(dbms),
                Schema.ofFields("tupleCount", Type.LONG_TYPE));
        List<Expression> expressions =
            ImmutableList.of(
                new Expression(
                    schema.getColumnName(0), new ConstantExpression(relation.getUserName())),
                new Expression(
                    schema.getColumnName(1), new ConstantExpression(relation.getProgramName())),
                new Expression(
                    schema.getColumnName(2), new ConstantExpression(relation.getRelationName())),
                new Expression(schema.getColumnName(3), new VariableExpression(0)));
        Apply addRelationName = new Apply(localCount, expressions);
        CollectProducer producer =
            new CollectProducer(addRelationName, collectId, MyriaConstants.MASTER_ID);
        if (!workerPlans.containsKey(worker)) {
          workerPlans.put(worker, new SubQueryPlan(producer));
        } else {
          workerPlans.get(worker).addRootOp(producer);
        }
      }
    }

    /* Master plan: collect, sum, insert the updates. */
    CollectConsumer consumer = new CollectConsumer(schema, collectId, workerPlans.keySet());
    MultiGroupByAggregate aggCounts =
        new MultiGroupByAggregate(
            consumer, new int[] {0, 1, 2}, new SingleColumnAggregatorFactory(3, AggregationOp.SUM));
    UpdateCatalog catalog = new UpdateCatalog(aggCounts, server);
    SubQueryPlan masterPlan = new SubQueryPlan(catalog);

    return new SubQuery(masterPlan, workerPlans);
  }