/** * Check whether a join is valid. Invalid joins are CROSS JOIN, FULL OUTER JOIN, any join without * criteria, any join with no equality criteria, and any outer join that has the outer side not * the same as the dependent. * * @param joinNode The join node to check * @param sourceNode The access node being considered * @param analysisRecord TODO * @return True if valid for making dependent * @throws TeiidComponentException * @throws QueryMetadataException */ boolean isValidJoin(PlanNode joinNode, PlanNode sourceNode, AnalysisRecord analysisRecord) throws QueryMetadataException, TeiidComponentException { JoinType jtype = (JoinType) joinNode.getProperty(NodeConstants.Info.JOIN_TYPE); // Check that join is not a CROSS join or FULL OUTER join if (jtype.equals(JoinType.JOIN_CROSS) || jtype.equals(JoinType.JOIN_FULL_OUTER)) { sourceNode.recordDebugAnnotation( "parent join is CROSS or FULL OUTER", null, "Rejecting dependent join", analysisRecord, null); //$NON-NLS-1$ //$NON-NLS-2$ return false; } if (!joinNode.getExportedCorrelatedReferences().isEmpty()) { sourceNode.recordDebugAnnotation( "parent join has a correlated nested table", null, "Rejecting dependent join", analysisRecord, null); //$NON-NLS-1$ //$NON-NLS-2$ return false; } // Check that join criteria exist List jcrit = (List) joinNode.getProperty(NodeConstants.Info.JOIN_CRITERIA); if (jcrit == null || jcrit.size() == 0) { sourceNode.recordDebugAnnotation( "parent join has has no join criteria", null, "Rejecting dependent join", analysisRecord, null); //$NON-NLS-1$ //$NON-NLS-2$ return false; } if (joinNode.getProperty(NodeConstants.Info.LEFT_EXPRESSIONS) == null) { sourceNode.recordDebugAnnotation( "parent join has no equa-join predicates", null, "Rejecting dependent join", analysisRecord, null); //$NON-NLS-1$ //$NON-NLS-2$ return false; } // Check that for a left or right outer join the dependent side must be the inner if (jtype.isOuter() && JoinUtil.getInnerSideJoinNodes(joinNode)[0] != sourceNode) { sourceNode.recordDebugAnnotation( "node is on outer side of the join", null, "Rejecting dependent join", analysisRecord, null); //$NON-NLS-1$ //$NON-NLS-2$ return false; } return true; }
@Override public void processOp(Object row, int tag) throws HiveException { try { if (firstRow) { // generate the map metadata generateMapMetaData(); firstRow = false; } // get alias alias = order[tag]; // alias = (byte)tag; if ((lastAlias == null) || (!lastAlias.equals(alias))) { nextSz = joinEmitInterval; } // compute keys and values as StandardObjects AbstractMapJoinKey key = JoinUtil.computeMapJoinKeys( row, joinKeys.get(alias), joinKeysObjectInspectors.get(alias)); ArrayList<Object> value = JoinUtil.computeValues( row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors.get(alias), noOuterJoin); // Add the value to the ArrayList storage.get((byte) tag).add(value); for (Byte pos : order) { if (pos.intValue() != tag) { MapJoinObjectValue o = mapJoinTables.get(pos).get(key); MapJoinRowContainer<ArrayList<Object>> rowContainer = rowContainerMap.get(pos); // there is no join-value or join-key has all null elements if (o == null || key.hasAnyNulls()) { if (noOuterJoin) { storage.put(pos, emptyList); } else { storage.put(pos, dummyObjVectors[pos.intValue()]); } } else { rowContainer.reset(o.getObj()); storage.put(pos, rowContainer); } } } // generate the output records checkAndGenObject(); // done with the row storage.get((byte) tag).clear(); for (Byte pos : order) { if (pos.intValue() != tag) { storage.put(pos, null); } } } catch (SerDeException e) { e.printStackTrace(); throw new HiveException(e); } }
@Override public void processOp(Object row, int tag) throws HiveException { try { reportProgress(); // get alias alias = (byte) tag; if ((lastAlias == null) || (!lastAlias.equals(alias))) { nextSz = joinEmitInterval; } ArrayList<Object> nr = JoinUtil.computeValues( row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors.get(alias), noOuterJoin); if (handleSkewJoin) { skewJoinKeyContext.handleSkew(tag); } // number of rows for the key in the given table int sz = storage.get(alias).size(); StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString()); Object keyObject = soi.getStructFieldData(row, sf); // Are we consuming too much memory if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0)) { if (sz == joinEmitInterval) { // The input is sorted by alias, so if we are already in the last join // operand, // we can emit some results now. // Note this has to be done before adding the current row to the // storage, // to preserve the correctness for outer joins. checkAndGenObject(); storage.get(alias).clear(); } } else { if (sz == nextSz) { // Output a warning if we reached at least 1000 rows for a join // operand // We won't output a warning for the last join operand since the size // will never goes to joinEmitInterval. LOG.warn("table " + alias + " has " + sz + " rows for join key " + keyObject); nextSz = getNextSize(nextSz); } } // Add the value to the vector storage.get(alias).add(nr); // if join-key is null, process each row in different group. if (SerDeUtils.hasAnyNullObject(keyObject, sf.getFieldObjectInspector())) { endGroup(); startGroup(); } } catch (Exception e) { e.printStackTrace(); throw new HiveException(e); } }
public void testSimple() throws Exception { final String idField = "id"; final String toField = "productId"; Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter( random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy())); // 0 Document doc = new Document(); doc.add(new TextField("description", "random text", Field.Store.NO)); doc.add(new TextField("name", "name1", Field.Store.NO)); doc.add(new TextField(idField, "1", Field.Store.NO)); w.addDocument(doc); // 1 doc = new Document(); doc.add(new TextField("price", "10.0", Field.Store.NO)); doc.add(new TextField(idField, "2", Field.Store.NO)); doc.add(new TextField(toField, "1", Field.Store.NO)); w.addDocument(doc); // 2 doc = new Document(); doc.add(new TextField("price", "20.0", Field.Store.NO)); doc.add(new TextField(idField, "3", Field.Store.NO)); doc.add(new TextField(toField, "1", Field.Store.NO)); w.addDocument(doc); // 3 doc = new Document(); doc.add(new TextField("description", "more random text", Field.Store.NO)); doc.add(new TextField("name", "name2", Field.Store.NO)); doc.add(new TextField(idField, "4", Field.Store.NO)); w.addDocument(doc); w.commit(); // 4 doc = new Document(); doc.add(new TextField("price", "10.0", Field.Store.NO)); doc.add(new TextField(idField, "5", Field.Store.NO)); doc.add(new TextField(toField, "4", Field.Store.NO)); w.addDocument(doc); // 5 doc = new Document(); doc.add(new TextField("price", "20.0", Field.Store.NO)); doc.add(new TextField(idField, "6", Field.Store.NO)); doc.add(new TextField(toField, "4", Field.Store.NO)); w.addDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); w.close(); // Search for product Query joinQuery = JoinUtil.createJoinQuery( idField, false, toField, new TermQuery(new Term("name", "name2")), indexSearcher, ScoreMode.None); TopDocs result = indexSearcher.search(joinQuery, 10); assertEquals(2, result.totalHits); assertEquals(4, result.scoreDocs[0].doc); assertEquals(5, result.scoreDocs[1].doc); joinQuery = JoinUtil.createJoinQuery( idField, false, toField, new TermQuery(new Term("name", "name1")), indexSearcher, ScoreMode.None); result = indexSearcher.search(joinQuery, 10); assertEquals(2, result.totalHits); assertEquals(1, result.scoreDocs[0].doc); assertEquals(2, result.scoreDocs[1].doc); // Search for offer joinQuery = JoinUtil.createJoinQuery( toField, false, idField, new TermQuery(new Term("id", "5")), indexSearcher, ScoreMode.None); result = indexSearcher.search(joinQuery, 10); assertEquals(1, result.totalHits); assertEquals(3, result.scoreDocs[0].doc); indexSearcher.getIndexReader().close(); dir.close(); }
private void executeRandomJoin( boolean multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) throws Exception { for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) { if (VERBOSE) { System.out.println("indexIter=" + indexIter); } Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter( random(), dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)) .setMergePolicy(newLogMergePolicy())); final boolean scoreDocsInOrder = TestJoinUtil.random().nextBoolean(); IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, scoreDocsInOrder); IndexReader topLevelReader = w.getReader(); w.close(); for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) { if (VERBOSE) { System.out.println("searchIter=" + searchIter); } IndexSearcher indexSearcher = newSearcher(topLevelReader); int r = random().nextInt(context.randomUniqueValues.length); boolean from = context.randomFrom[r]; String randomValue = context.randomUniqueValues[r]; FixedBitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context); final Query actualQuery = new TermQuery(new Term("value", randomValue)); if (VERBOSE) { System.out.println("actualQuery=" + actualQuery); } final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)]; if (VERBOSE) { System.out.println("scoreMode=" + scoreMode); } final Query joinQuery; if (from) { joinQuery = JoinUtil.createJoinQuery( "from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode); } else { joinQuery = JoinUtil.createJoinQuery( "to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode); } if (VERBOSE) { System.out.println("joinQuery=" + joinQuery); } // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd // be also testing TopDocsCollector... final FixedBitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc()); final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, false); indexSearcher.search( joinQuery, new Collector() { int docBase; @Override public void collect(int doc) throws IOException { actualResult.set(doc + docBase); topScoreDocCollector.collect(doc); } @Override public void setNextReader(AtomicReaderContext context) { docBase = context.docBase; topScoreDocCollector.setNextReader(context); } @Override public void setScorer(Scorer scorer) throws IOException { topScoreDocCollector.setScorer(scorer); } @Override public boolean acceptsDocsOutOfOrder() { return scoreDocsInOrder; } }); // Asserting bit set... if (VERBOSE) { System.out.println("expected cardinality:" + expectedResult.cardinality()); DocIdSetIterator iterator = expectedResult.iterator(); for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { System.out.println( String.format( Locale.ROOT, "Expected doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id"))); } System.out.println("actual cardinality:" + actualResult.cardinality()); iterator = actualResult.iterator(); for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { System.out.println( String.format( Locale.ROOT, "Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id"))); } } assertEquals(expectedResult, actualResult); // Asserting TopDocs... TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context); TopDocs actualTopDocs = topScoreDocCollector.topDocs(); assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits); assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length); if (scoreMode == ScoreMode.None) { continue; } assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f); for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) { if (VERBOSE) { System.out.printf( Locale.ENGLISH, "Expected doc: %d | Actual doc: %d\n", expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc); System.out.printf( Locale.ENGLISH, "Expected score: %f | Actual score: %f\n", expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score); } assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc); assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f); Explanation explanation = indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc); assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f); } } topLevelReader.close(); dir.close(); } }
public void testSimpleWithScoring() throws Exception { final String idField = "id"; final String toField = "movieId"; Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter( random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy())); // 0 Document doc = new Document(); doc.add(new TextField("description", "A random movie", Field.Store.NO)); doc.add(new TextField("name", "Movie 1", Field.Store.NO)); doc.add(new TextField(idField, "1", Field.Store.NO)); w.addDocument(doc); // 1 doc = new Document(); doc.add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO)); doc.add(new TextField(idField, "2", Field.Store.NO)); doc.add(new TextField(toField, "1", Field.Store.NO)); w.addDocument(doc); // 2 doc = new Document(); doc.add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO)); doc.add(new TextField(idField, "3", Field.Store.NO)); doc.add(new TextField(toField, "1", Field.Store.NO)); w.addDocument(doc); // 3 doc = new Document(); doc.add(new TextField("description", "A second random movie", Field.Store.NO)); doc.add(new TextField("name", "Movie 2", Field.Store.NO)); doc.add(new TextField(idField, "4", Field.Store.NO)); w.addDocument(doc); w.commit(); // 4 doc = new Document(); doc.add( new TextField( "subtitle", "a very random event happened during christmas night", Field.Store.NO)); doc.add(new TextField(idField, "5", Field.Store.NO)); doc.add(new TextField(toField, "4", Field.Store.NO)); w.addDocument(doc); // 5 doc = new Document(); doc.add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO)); doc.add(new TextField(idField, "6", Field.Store.NO)); doc.add(new TextField(toField, "4", Field.Store.NO)); w.addDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); w.close(); // Search for movie via subtitle Query joinQuery = JoinUtil.createJoinQuery( toField, false, idField, new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max); TopDocs result = indexSearcher.search(joinQuery, 10); assertEquals(2, result.totalHits); assertEquals(0, result.scoreDocs[0].doc); assertEquals(3, result.scoreDocs[1].doc); // Score mode max. joinQuery = JoinUtil.createJoinQuery( toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Max); result = indexSearcher.search(joinQuery, 10); assertEquals(2, result.totalHits); assertEquals(3, result.scoreDocs[0].doc); assertEquals(0, result.scoreDocs[1].doc); // Score mode total joinQuery = JoinUtil.createJoinQuery( toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Total); result = indexSearcher.search(joinQuery, 10); assertEquals(2, result.totalHits); assertEquals(0, result.scoreDocs[0].doc); assertEquals(3, result.scoreDocs[1].doc); // Score mode avg joinQuery = JoinUtil.createJoinQuery( toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Avg); result = indexSearcher.search(joinQuery, 10); assertEquals(2, result.totalHits); assertEquals(3, result.scoreDocs[0].doc); assertEquals(0, result.scoreDocs[1].doc); indexSearcher.getIndexReader().close(); dir.close(); }
/** * Mark the specified access node to be made dependent * * @param sourceNode Node to make dependent * @param dca * @throws TeiidComponentException * @throws QueryMetadataException */ boolean markDependent( PlanNode sourceNode, PlanNode joinNode, QueryMetadataInterface metadata, DependentCostAnalysis dca, Boolean bound) throws QueryMetadataException, TeiidComponentException { boolean isLeft = joinNode.getFirstChild() == sourceNode; // Get new access join node properties based on join criteria List independentExpressions = (List) (isLeft ? joinNode.getProperty(NodeConstants.Info.RIGHT_EXPRESSIONS) : joinNode.getProperty(NodeConstants.Info.LEFT_EXPRESSIONS)); List dependentExpressions = (List) (isLeft ? joinNode.getProperty(NodeConstants.Info.LEFT_EXPRESSIONS) : joinNode.getProperty(NodeConstants.Info.RIGHT_EXPRESSIONS)); if (independentExpressions == null || independentExpressions.isEmpty()) { return false; } String id = "$dsc/id" + ID.getAndIncrement(); // $NON-NLS-1$ // Create DependentValueSource and set on the independent side as this will feed the values joinNode.setProperty(NodeConstants.Info.DEPENDENT_VALUE_SOURCE, id); PlanNode indNode = isLeft ? joinNode.getLastChild() : joinNode.getFirstChild(); if (bound == null) { List<PlanNode> sources = NodeEditor.findAllNodes(indNode, NodeConstants.Types.SOURCE); for (PlanNode planNode : sources) { for (GroupSymbol gs : planNode.getGroups()) { if (gs.isTempTable() && metadata.getCardinality(gs.getMetadataID()) == QueryMetadataInterface.UNKNOWN_CARDINALITY) { bound = true; break; } } } if (bound == null) { bound = false; } } PlanNode crit = getDependentCriteriaNode( id, independentExpressions, dependentExpressions, indNode, metadata, dca, bound, (MakeDep) sourceNode.getProperty(Info.MAKE_DEP)); sourceNode.addAsParent(crit); if (isLeft) { JoinUtil.swapJoinChildren(joinNode); } return true; }