private static void setupBucketMapJoinInfo( MapredWork plan, AbstractMapJoinOperator<? extends MapJoinDesc> currMapJoinOp, boolean createLocalPlan) { if (currMapJoinOp != null) { LinkedHashMap<String, LinkedHashMap<String, ArrayList<String>>> aliasBucketFileNameMapping = currMapJoinOp.getConf().getAliasBucketFileNameMapping(); if (aliasBucketFileNameMapping != null) { MapredLocalWork localPlan = plan.getMapLocalWork(); if (localPlan == null) { if (currMapJoinOp instanceof SMBMapJoinOperator) { localPlan = ((SMBMapJoinOperator) currMapJoinOp).getConf().getLocalWork(); } if (localPlan == null && createLocalPlan) { localPlan = new MapredLocalWork( new LinkedHashMap<String, Operator<? extends Serializable>>(), new LinkedHashMap<String, FetchWork>()); } } else { // local plan is not null, we want to merge it into SMBMapJoinOperator's local work if (currMapJoinOp instanceof SMBMapJoinOperator) { MapredLocalWork smbLocalWork = ((SMBMapJoinOperator) currMapJoinOp).getConf().getLocalWork(); if (smbLocalWork != null) { localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork()); localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork()); } } } if (localPlan == null) { return; } if (currMapJoinOp instanceof SMBMapJoinOperator) { plan.setMapLocalWork(null); ((SMBMapJoinOperator) currMapJoinOp).getConf().setLocalWork(localPlan); } else { plan.setMapLocalWork(localPlan); } BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext(); localPlan.setBucketMapjoinContext(bucketMJCxt); bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); bucketMJCxt.setBucketFileNameMapping(currMapJoinOp.getConf().getBucketFileNameMapping()); localPlan.setInputFileChangeSensitive(true); bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); bucketMJCxt.setBucketMatcherClass( org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class); } } }
/** * set the current task in the mapredWork. * * @param alias current alias * @param topOp the top operator of the stack * @param plan current plan * @param local whether you need to add to map-reduce or local work * @param tt_desc table descriptor */ public static void setTaskPlan( String path, String alias, Operator<? extends Serializable> topOp, MapredWork plan, boolean local, TableDesc tt_desc) throws SemanticException { if (path == null || alias == null) { return; } if (!local) { if (plan.getPathToAliases().get(path) == null) { plan.getPathToAliases().put(path, new ArrayList<String>()); } plan.getPathToAliases().get(path).add(alias); plan.getPathToPartitionInfo().put(path, new PartitionDesc(tt_desc, null)); plan.getAliasToWork().put(alias, topOp); } else { // populate local work if needed MapredLocalWork localPlan = plan.getMapLocalWork(); if (localPlan == null) { localPlan = new MapredLocalWork( new LinkedHashMap<String, Operator<? extends Serializable>>(), new LinkedHashMap<String, FetchWork>()); } assert localPlan.getAliasToWork().get(alias) == null; assert localPlan.getAliasToFetchWork().get(alias) == null; localPlan.getAliasToWork().put(alias, topOp); localPlan.getAliasToFetchWork().put(alias, new FetchWork(alias, tt_desc)); plan.setMapLocalWork(localPlan); } }
/** * set the current task in the mapredWork. * * @param alias_id current alias * @param topOp the top operator of the stack * @param plan current plan * @param local whether you need to add to map-reduce or local work * @param opProcCtx processing context * @param pList pruned partition list. If it is null it will be computed on-the-fly. */ public static void setTaskPlan( String alias_id, Operator<? extends Serializable> topOp, MapredWork plan, boolean local, GenMRProcContext opProcCtx, PrunedPartitionList pList) throws SemanticException { ParseContext parseCtx = opProcCtx.getParseCtx(); Set<ReadEntity> inputs = opProcCtx.getInputs(); ArrayList<Path> partDir = new ArrayList<Path>(); ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>(); Path tblDir = null; TableDesc tblDesc = null; PrunedPartitionList partsList = pList; if (partsList == null) { try { partsList = parseCtx.getOpToPartList().get((TableScanOperator) topOp); if (partsList == null) { partsList = PartitionPruner.prune( parseCtx.getTopToTable().get(topOp), parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), alias_id, parseCtx.getPrunedPartitions()); parseCtx.getOpToPartList().put((TableScanOperator) topOp, partsList); } } catch (SemanticException e) { throw e; } catch (HiveException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); } } // Generate the map work for this alias_id Set<Partition> parts = null; // pass both confirmed and unknown partitions through the map-reduce // framework parts = partsList.getConfirmedPartns(); parts.addAll(partsList.getUnknownPartns()); PartitionDesc aliasPartnDesc = null; try { if (!parts.isEmpty()) { aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next()); } } catch (HiveException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); } // The table does not have any partitions if (aliasPartnDesc == null) { aliasPartnDesc = new PartitionDesc(Utilities.getTableDesc(parseCtx.getTopToTable().get(topOp)), null); } plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc); for (Partition part : parts) { if (part.getTable().isPartitioned()) { inputs.add(new ReadEntity(part)); } else { inputs.add(new ReadEntity(part.getTable())); } // Later the properties have to come from the partition as opposed // to from the table in order to support versioning. Path[] paths; sampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(topOp); if (sampleDescr != null) { paths = SamplePruner.prune(part, sampleDescr); } else { paths = part.getPath(); } // is it a partitioned table ? if (!part.getTable().isPartitioned()) { assert ((tblDir == null) && (tblDesc == null)); tblDir = paths[0]; tblDesc = Utilities.getTableDesc(part.getTable()); } for (Path p : paths) { if (p == null) { continue; } String path = p.toString(); if (LOG.isDebugEnabled()) { LOG.debug("Adding " + path + " of table" + alias_id); } partDir.add(p); try { partDesc.add(Utilities.getPartitionDesc(part)); } catch (HiveException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); } } } Iterator<Path> iterPath = partDir.iterator(); Iterator<PartitionDesc> iterPartnDesc = partDesc.iterator(); if (!local) { while (iterPath.hasNext()) { assert iterPartnDesc.hasNext(); String path = iterPath.next().toString(); PartitionDesc prtDesc = iterPartnDesc.next(); // Add the path to alias mapping if (plan.getPathToAliases().get(path) == null) { plan.getPathToAliases().put(path, new ArrayList<String>()); } plan.getPathToAliases().get(path).add(alias_id); plan.getPathToPartitionInfo().put(path, prtDesc); if (LOG.isDebugEnabled()) { LOG.debug("Information added for path " + path); } } assert plan.getAliasToWork().get(alias_id) == null; plan.getAliasToWork().put(alias_id, topOp); } else { // populate local work if needed MapredLocalWork localPlan = plan.getMapLocalWork(); if (localPlan == null) { localPlan = new MapredLocalWork( new LinkedHashMap<String, Operator<? extends Serializable>>(), new LinkedHashMap<String, FetchWork>()); } assert localPlan.getAliasToWork().get(alias_id) == null; assert localPlan.getAliasToFetchWork().get(alias_id) == null; localPlan.getAliasToWork().put(alias_id, topOp); if (tblDir == null) { localPlan .getAliasToFetchWork() .put(alias_id, new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc)); } else { localPlan.getAliasToFetchWork().put(alias_id, new FetchWork(tblDir.toString(), tblDesc)); } plan.setMapLocalWork(localPlan); } }