/** * destination table : a table whose columns are getting queried intermediate table : a table * which is only used as a link between cube and destination table * * @param alias * @param tblName * @param isOptional pass false when it's a destination table pass true when it's an intermediate * table when join chain destination is being added, this will be false. * @param isChainedDimension pass true when you're adding the dimension as a joinchain * destination, pass false when this table is mentioned by name in the user query * @return true if added * @throws LensException */ private boolean addQueriedTable( String alias, String tblName, boolean isOptional, boolean isChainedDimension) throws LensException { alias = alias.toLowerCase(); if (cubeTbls.containsKey(alias)) { return true; } try { if (metastoreClient.isCube(tblName)) { if (cube != null) { if (!cube.getName().equalsIgnoreCase(tblName)) { throw new LensException( LensCubeErrorCode.MORE_THAN_ONE_CUBE.getLensErrorInfo(), cube.getName(), tblName); } } cube = metastoreClient.getCube(tblName); cubeTbls.put(alias, (AbstractCubeTable) cube); } else if (metastoreClient.isDimension(tblName)) { Dimension dim = metastoreClient.getDimension(tblName); if (!isOptional) { dimensions.add(dim); } if (!isChainedDimension) { nonChainedDimensions.add(dim); } cubeTbls.put(alias, dim); } else { return false; } } catch (HiveException e) { return false; } return true; }
// When candidate table does not have the field, this method checks // if the field can be reached through reference, // if yes adds the ref usage and returns to true, if not returns false. boolean addRefUsage(CandidateTable table, String col, String srcTbl) throws LensException { // available as referenced col if (referencedCols.containsKey(col)) { for (ReferencedQueriedColumn refer : referencedCols.get(col)) { if (refer.srcTable.getName().equalsIgnoreCase(srcTbl)) { // check if reference source column is available in src table? // should not be required here. Join resolution will figure out if // there is no path // to the source table log.info("Adding denormalized column for column:{} for table:{}", col, table); Set<ReferencedQueriedColumn> refCols = tableToRefCols.get(table.getName()); if (refCols == null) { refCols = new HashSet<>(); tableToRefCols.put(table.getName(), refCols); } refCols.add(refer); // Add to optional tables for (ChainRefCol refCol : refer.col.getChainRefColumns()) { cubeql.addOptionalDimTable( refCol.getChainName(), table, false, refer.col.getName(), true, refCol.getRefColumn()); } return true; } } } return false; }
public Set<String> getPartitionColumnsQueried() { Set<String> partsQueried = Sets.newHashSet(); for (TimeRange range : getTimeRanges()) { partsQueried.add(range.getPartitionColumn()); } return partsQueried; }
public Set<Dimension> rewriteDenormctx( CandidateFact cfact, Map<Dimension, CandidateDim> dimsToQuery, boolean replaceFact) throws LensException { Set<Dimension> refTbls = new HashSet<>(); if (!tableToRefCols.isEmpty()) { // pick referenced columns for fact if (cfact != null) { pickColumnsForTable(cfact.getName()); } // pick referenced columns for dimensions if (dimsToQuery != null && !dimsToQuery.isEmpty()) { for (CandidateDim cdim : dimsToQuery.values()) { pickColumnsForTable(cdim.getName()); } } // Replace picked reference in all the base trees replaceReferencedColumns(cfact, replaceFact); // Add the picked references to dimsToQuery for (PickedReference picked : pickedRefs) { if (isPickedFor(picked, cfact, dimsToQuery)) { refTbls.add( (Dimension) cubeql.getCubeTableForAlias(picked.getChainRef().getChainName())); cubeql.addColumnsQueried( picked.getChainRef().getChainName(), picked.getChainRef().getRefColumn()); } } } return refTbls; }
void addReferencedCol(String col, ReferencedQueriedColumn refer) { Set<ReferencedQueriedColumn> refCols = referencedCols.get(col); if (refCols == null) { refCols = new HashSet<>(); referencedCols.put(col, refCols); } refCols.add(refer); }
private void addPickedReference(String col, PickedReference refer) { Set<PickedReference> refCols = pickedReferences.get(col); if (refCols == null) { refCols = new HashSet<>(); pickedReferences.put(col, refCols); } refCols.add(refer); }
public void addColumnsQueried(String alias, String column) { Set<String> cols = tblAliasToColumns.get(alias.toLowerCase()); if (cols == null) { cols = new LinkedHashSet<String>(); tblAliasToColumns.put(alias.toLowerCase(), cols); } cols.add(column); }
public void pruneCandidateFactWithCandidateSet(CandidateTablePruneCause pruneCause) { // remove candidate facts that are not part of any covering set Set<CandidateFact> allCoveringFacts = new HashSet<CandidateFact>(); for (Set<CandidateFact> set : candidateFactSets) { allCoveringFacts.addAll(set); } for (Iterator<CandidateFact> i = candidateFacts.iterator(); i.hasNext(); ) { CandidateFact cfact = i.next(); if (!allCoveringFacts.contains(cfact)) { log.info("Not considering fact table:{} as {}", cfact, pruneCause); addFactPruningMsgs(cfact.fact, pruneCause); i.remove(); } } }
private void pickColumnsForTable(String tbl) throws LensException { if (tableToRefCols.containsKey(tbl)) { for (ReferencedQueriedColumn refered : tableToRefCols.get(tbl)) { Iterator<ChainRefCol> iter = refered.chainRefCols.iterator(); while (iter.hasNext()) { // remove unreachable references ChainRefCol reference = iter.next(); if (!cubeql .getAutoJoinCtx() .isReachableDim( (Dimension) cubeql.getCubeTableForAlias(reference.getChainName()), reference.getChainName())) { iter.remove(); } } if (refered.chainRefCols.isEmpty()) { throw new LensException( LensCubeErrorCode.NO_REF_COL_AVAILABLE.getLensErrorInfo(), refered.col.getName()); } PickedReference picked = new PickedReference( refered.chainRefCols.iterator().next(), cubeql.getAliasForTableName(refered.srcTable.getName()), tbl); addPickedReference(refered.col.getName(), picked); pickedRefs.add(picked); } } }
public CubeQueryContext(ASTNode ast, QB qb, Configuration queryConf, HiveConf metastoreConf) throws LensException { this.ast = ast; this.qb = qb; this.conf = queryConf; this.clauseName = getClause(); this.timeRanges = new ArrayList<TimeRange>(); try { metastoreClient = CubeMetastoreClient.getInstance(metastoreConf); } catch (HiveException e) { throw new LensException(e); } if (qb.getParseInfo().getWhrForClause(clauseName) != null) { this.whereAST = qb.getParseInfo().getWhrForClause(clauseName); } if (qb.getParseInfo().getHavingForClause(clauseName) != null) { this.havingAST = qb.getParseInfo().getHavingForClause(clauseName); } if (qb.getParseInfo().getOrderByForClause(clauseName) != null) { this.orderByAST = qb.getParseInfo().getOrderByForClause(clauseName); } if (qb.getParseInfo().getGroupByForClause(clauseName) != null) { this.groupByAST = qb.getParseInfo().getGroupByForClause(clauseName); } if (qb.getParseInfo().getSelForClause(clauseName) != null) { this.selectAST = qb.getParseInfo().getSelForClause(clauseName); } for (ASTNode aggrTree : qb.getParseInfo().getAggregationExprsForClause(clauseName).values()) { String aggr = HQLParser.getString(aggrTree); aggregateExprs.add(aggr); } extractMetaTables(); }
private void extractMetaTables() throws LensException { List<String> tabAliases = new ArrayList<String>(qb.getTabAliases()); Set<String> missing = new HashSet<String>(); for (String alias : tabAliases) { boolean added = addQueriedTable(alias); if (!added) { missing.add(alias); } } for (String alias : missing) { // try adding them as joinchains boolean added = addJoinChain(alias, false); if (!added) { log.info("Queried tables do not exist. Missing table:{}", alias); throw new LensException(LensCubeErrorCode.NEITHER_CUBE_NOR_DIMENSION.getLensErrorInfo()); } } }
String getQBFromString(CandidateFact fact, Map<Dimension, CandidateDim> dimsToQuery) throws LensException { String fromString; if (getJoinAST() == null) { if (cube != null) { if (dimensions.size() > 0) { throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAILABLE.getLensErrorInfo()); } fromString = fact.getStorageString(getAliasForTableName(cube.getName())); } else { if (dimensions.size() != 1) { throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAILABLE.getLensErrorInfo()); } Dimension dim = dimensions.iterator().next(); fromString = dimsToQuery.get(dim).getStorageString(getAliasForTableName(dim.getName())); } } else { StringBuilder builder = new StringBuilder(); getQLString(qb.getQbJoinTree(), builder, fact, dimsToQuery); fromString = builder.toString(); } return fromString; }
public boolean isCubeMeasure(String col) { if (col == null) { return false; } col = col.trim(); // Take care of brackets added around col names in HQLParsrer.getString if (col.startsWith("(") && col.endsWith(")") && col.length() > 2) { col = col.substring(1, col.length() - 1); } String[] split = StringUtils.split(col, "."); if (split.length <= 1) { col = col.trim().toLowerCase(); if (queriedExprs.contains(col)) { return exprCtx .getExpressionContext(col, getAliasForTableName(cube.getName())) .hasMeasures(); } else { return cube.getMeasureNames().contains(col); } } else { String cubeName = split[0].trim().toLowerCase(); String colName = split[1].trim().toLowerCase(); if (cubeName.equalsIgnoreCase(cube.getName()) || cubeName.equals(getAliasForTableName(cube.getName()))) { if (queriedExprs.contains(colName)) { return exprCtx.getExpressionContext(colName, cubeName).hasMeasures(); } else { return cube.getMeasureNames().contains(colName.toLowerCase()); } } else { return false; } } }
/** * Prune candidate fact sets with respect to available candidate facts. * * <p>Prune a candidate set, if any of the fact is missing. * * @param pruneCause */ public void pruneCandidateFactSet(CandidateTablePruneCode pruneCause) { // remove candidate fact sets that have missing facts for (Iterator<Set<CandidateFact>> i = candidateFactSets.iterator(); i.hasNext(); ) { Set<CandidateFact> cfacts = i.next(); if (!candidateFacts.containsAll(cfacts)) { log.info( "Not considering fact table set:{} as they have non candidate tables and facts missing because of {}", cfacts, pruneCause); i.remove(); } } // prune candidate facts pruneCandidateFactWithCandidateSet(CandidateTablePruneCode.ELEMENT_IN_SET_PRUNED); }
private HQLContextInterface createHQLContext( Set<CandidateFact> facts, Map<Dimension, CandidateDim> dimsToQuery, Map<CandidateFact, Set<Dimension>> factDimMap) throws LensException { if (facts == null || facts.size() == 0) { return new DimOnlyHQLContext(dimsToQuery, this, this); } else if (facts.size() == 1 && facts.iterator().next().getStorageTables().size() > 1) { // create single fact with multiple storage context return new SingleFactMultiStorageHQLContext(facts.iterator().next(), dimsToQuery, this, this); } else if (facts.size() == 1 && facts.iterator().next().getStorageTables().size() == 1) { // create single fact context return new SingleFactSingleStorageHQLContext( facts.iterator().next(), dimsToQuery, this, this); } else { return new MultiFactHQLContext(facts, dimsToQuery, factDimMap, this); } }
private Map<Dimension, CandidateDim> pickCandidateDimsToQuery(Set<Dimension> dimensions) throws LensException { Map<Dimension, CandidateDim> dimsToQuery = new HashMap<Dimension, CandidateDim>(); if (!dimensions.isEmpty()) { for (Dimension dim : dimensions) { if (candidateDims.get(dim) != null && candidateDims.get(dim).size() > 0) { CandidateDim cdim = candidateDims.get(dim).iterator().next(); log.info( "Available candidate dims are:{}, picking up {} for querying.", candidateDims.get(dim), cdim.dimtable); dimsToQuery.put(dim, cdim); } else { String reason = ""; if (dimPruningMsgs.get(dim) != null && !dimPruningMsgs.get(dim).isEmpty()) { ByteArrayOutputStream out = null; try { ObjectMapper mapper = new ObjectMapper(); out = new ByteArrayOutputStream(); mapper.writeValue(out, dimPruningMsgs.get(dim).getJsonObject()); reason = out.toString("UTF-8"); } catch (Exception e) { throw new LensException("Error writing dim pruning messages", e); } finally { if (out != null) { try { out.close(); } catch (IOException e) { throw new LensException(e); } } } } log.error( "Query rewrite failed due to NO_CANDIDATE_DIM_AVAILABLE, Cause {}", dimPruningMsgs.get(dim).toJsonObject()); throw new NoCandidateDimAvailableException(dimPruningMsgs.get(dim)); } } } return dimsToQuery; }
public boolean hasDimensionInQuery() { return dimensions != null && !dimensions.isEmpty(); }
public boolean isAggregateExpr(String expr) { return aggregateExprs.contains(expr == null ? null : expr.toLowerCase()); }
public boolean hasAggregates() { return !aggregateExprs.isEmpty() || getExprCtx().hasAggregates(); }
public void addAggregateExpr(String expr) { aggregateExprs.add(expr); }
public void addQueriedDimAttrs(Set<String> dimAttrs) { queriedDimAttrs.addAll(dimAttrs); }
public String toHQL() throws LensException { Set<CandidateFact> cfacts = pickCandidateFactToQuery(); Map<Dimension, CandidateDim> dimsToQuery = pickCandidateDimsToQuery(dimensions); log.info("facts:{}, dimsToQuery: {}", cfacts, dimsToQuery); if (autoJoinCtx != null) { // prune join paths for picked fact and dimensions autoJoinCtx.pruneAllPaths(cube, cfacts, dimsToQuery); } Map<CandidateFact, Set<Dimension>> factDimMap = new HashMap<>(); if (cfacts != null) { if (cfacts.size() > 1) { // copy ASTs for each fact for (CandidateFact cfact : cfacts) { cfact.copyASTs(this); factDimMap.put(cfact, new HashSet<>(dimsToQuery.keySet())); } } for (CandidateFact fact : cfacts) { addRangeClauses(fact); } } // pick dimension tables required during expression expansion for the picked fact and dimensions Set<Dimension> exprDimensions = new HashSet<Dimension>(); if (cfacts != null) { for (CandidateFact cfact : cfacts) { Set<Dimension> factExprDimTables = exprCtx.rewriteExprCtx(cfact, dimsToQuery, cfacts.size() > 1 ? cfact : this); exprDimensions.addAll(factExprDimTables); if (cfacts.size() > 1) { factDimMap.get(cfact).addAll(factExprDimTables); } } if (cfacts.size() > 1) { havingAST = MultiFactHQLContext.pushDownHaving(havingAST, this, cfacts); } } else { // dim only query exprDimensions.addAll(exprCtx.rewriteExprCtx(null, dimsToQuery, this)); } dimsToQuery.putAll(pickCandidateDimsToQuery(exprDimensions)); log.info("facts:{}, dimsToQuery: {}", cfacts, dimsToQuery); // pick denorm tables for the picked fact and dimensions Set<Dimension> denormTables = new HashSet<Dimension>(); if (cfacts != null) { for (CandidateFact cfact : cfacts) { Set<Dimension> factDenormTables = deNormCtx.rewriteDenormctx(cfact, dimsToQuery, cfacts.size() > 1); denormTables.addAll(factDenormTables); if (cfacts.size() > 1) { factDimMap.get(cfact).addAll(factDenormTables); } } } else { denormTables.addAll(deNormCtx.rewriteDenormctx(null, dimsToQuery, false)); } dimsToQuery.putAll(pickCandidateDimsToQuery(denormTables)); log.info("facts:{}, dimsToQuery: {}", cfacts, dimsToQuery); // Prune join paths once denorm tables are picked if (autoJoinCtx != null) { // prune join paths for picked fact and dimensions autoJoinCtx.pruneAllPaths(cube, cfacts, dimsToQuery); } if (autoJoinCtx != null) { // add optional dims from Join resolver Set<Dimension> joiningTables = new HashSet<Dimension>(); if (cfacts != null && cfacts.size() > 1) { for (CandidateFact cfact : cfacts) { Set<Dimension> factJoiningTables = autoJoinCtx.pickOptionalTables(cfact, factDimMap.get(cfact), this); factDimMap.get(cfact).addAll(factJoiningTables); joiningTables.addAll(factJoiningTables); } } else { joiningTables.addAll(autoJoinCtx.pickOptionalTables(null, dimsToQuery.keySet(), this)); } dimsToQuery.putAll(pickCandidateDimsToQuery(joiningTables)); } log.info("Picked Fact:{} dimsToQuery: {}", cfacts, dimsToQuery); pickedDimTables = dimsToQuery.values(); pickedFacts = cfacts; if (cfacts != null) { if (cfacts.size() > 1) { // Update ASTs for each fact for (CandidateFact cfact : cfacts) { cfact.updateASTs(this); } whereAST = MultiFactHQLContext.convertHavingToWhere( havingAST, this, cfacts, new DefaultAliasDecider()); } } hqlContext = createHQLContext(cfacts, dimsToQuery, factDimMap); return hqlContext.toHQL(); }
public void addQueriedMsrs(Set<String> msrs) { queriedMsrs.addAll(msrs); }
public void addQueriedExprs(Set<String> exprs) { queriedExprs.addAll(exprs); }
public void addQueriedExprsWithMeasures(Set<String> exprs) { queriedExprsWithMeasures.addAll(exprs); }