Пример #1
0
  /**
   * Check whether a join is valid. Invalid joins are CROSS JOIN, FULL OUTER JOIN, any join without
   * criteria, any join with no equality criteria, and any outer join that has the outer side not
   * the same as the dependent.
   *
   * @param joinNode The join node to check
   * @param sourceNode The access node being considered
   * @param analysisRecord TODO
   * @return True if valid for making dependent
   * @throws TeiidComponentException
   * @throws QueryMetadataException
   */
  boolean isValidJoin(PlanNode joinNode, PlanNode sourceNode, AnalysisRecord analysisRecord)
      throws QueryMetadataException, TeiidComponentException {
    JoinType jtype = (JoinType) joinNode.getProperty(NodeConstants.Info.JOIN_TYPE);

    // Check that join is not a CROSS join or FULL OUTER join
    if (jtype.equals(JoinType.JOIN_CROSS) || jtype.equals(JoinType.JOIN_FULL_OUTER)) {
      sourceNode.recordDebugAnnotation(
          "parent join is CROSS or FULL OUTER",
          null,
          "Rejecting dependent join",
          analysisRecord,
          null); //$NON-NLS-1$ //$NON-NLS-2$
      return false;
    }

    if (!joinNode.getExportedCorrelatedReferences().isEmpty()) {
      sourceNode.recordDebugAnnotation(
          "parent join has a correlated nested table",
          null,
          "Rejecting dependent join",
          analysisRecord,
          null); //$NON-NLS-1$ //$NON-NLS-2$
      return false;
    }

    // Check that join criteria exist
    List jcrit = (List) joinNode.getProperty(NodeConstants.Info.JOIN_CRITERIA);
    if (jcrit == null || jcrit.size() == 0) {
      sourceNode.recordDebugAnnotation(
          "parent join has has no join criteria",
          null,
          "Rejecting dependent join",
          analysisRecord,
          null); //$NON-NLS-1$ //$NON-NLS-2$
      return false;
    }

    if (joinNode.getProperty(NodeConstants.Info.LEFT_EXPRESSIONS) == null) {
      sourceNode.recordDebugAnnotation(
          "parent join has no equa-join predicates",
          null,
          "Rejecting dependent join",
          analysisRecord,
          null); //$NON-NLS-1$ //$NON-NLS-2$
      return false;
    }

    // Check that for a left or right outer join the dependent side must be the inner
    if (jtype.isOuter() && JoinUtil.getInnerSideJoinNodes(joinNode)[0] != sourceNode) {
      sourceNode.recordDebugAnnotation(
          "node is on outer side of the join",
          null,
          "Rejecting dependent join",
          analysisRecord,
          null); //$NON-NLS-1$ //$NON-NLS-2$
      return false;
    }

    return true;
  }
  @Override
  public void processOp(Object row, int tag) throws HiveException {

    try {
      if (firstRow) {
        // generate the map metadata
        generateMapMetaData();
        firstRow = false;
      }

      // get alias
      alias = order[tag];
      // alias = (byte)tag;

      if ((lastAlias == null) || (!lastAlias.equals(alias))) {
        nextSz = joinEmitInterval;
      }

      // compute keys and values as StandardObjects
      AbstractMapJoinKey key =
          JoinUtil.computeMapJoinKeys(
              row, joinKeys.get(alias), joinKeysObjectInspectors.get(alias));
      ArrayList<Object> value =
          JoinUtil.computeValues(
              row,
              joinValues.get(alias),
              joinValuesObjectInspectors.get(alias),
              joinFilters.get(alias),
              joinFilterObjectInspectors.get(alias),
              noOuterJoin);

      // Add the value to the ArrayList
      storage.get((byte) tag).add(value);

      for (Byte pos : order) {
        if (pos.intValue() != tag) {

          MapJoinObjectValue o = mapJoinTables.get(pos).get(key);
          MapJoinRowContainer<ArrayList<Object>> rowContainer = rowContainerMap.get(pos);

          // there is no join-value or join-key has all null elements
          if (o == null || key.hasAnyNulls()) {
            if (noOuterJoin) {
              storage.put(pos, emptyList);
            } else {
              storage.put(pos, dummyObjVectors[pos.intValue()]);
            }
          } else {
            rowContainer.reset(o.getObj());
            storage.put(pos, rowContainer);
          }
        }
      }

      // generate the output records
      checkAndGenObject();

      // done with the row
      storage.get((byte) tag).clear();

      for (Byte pos : order) {
        if (pos.intValue() != tag) {
          storage.put(pos, null);
        }
      }

    } catch (SerDeException e) {
      e.printStackTrace();
      throw new HiveException(e);
    }
  }
Пример #3
0
  @Override
  public void processOp(Object row, int tag) throws HiveException {
    try {
      reportProgress();

      // get alias
      alias = (byte) tag;

      if ((lastAlias == null) || (!lastAlias.equals(alias))) {
        nextSz = joinEmitInterval;
      }

      ArrayList<Object> nr =
          JoinUtil.computeValues(
              row,
              joinValues.get(alias),
              joinValuesObjectInspectors.get(alias),
              joinFilters.get(alias),
              joinFilterObjectInspectors.get(alias),
              noOuterJoin);

      if (handleSkewJoin) {
        skewJoinKeyContext.handleSkew(tag);
      }

      // number of rows for the key in the given table
      int sz = storage.get(alias).size();
      StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag];
      StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
      Object keyObject = soi.getStructFieldData(row, sf);

      // Are we consuming too much memory
      if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0)) {
        if (sz == joinEmitInterval) {
          // The input is sorted by alias, so if we are already in the last join
          // operand,
          // we can emit some results now.
          // Note this has to be done before adding the current row to the
          // storage,
          // to preserve the correctness for outer joins.
          checkAndGenObject();
          storage.get(alias).clear();
        }
      } else {
        if (sz == nextSz) {
          // Output a warning if we reached at least 1000 rows for a join
          // operand
          // We won't output a warning for the last join operand since the size
          // will never goes to joinEmitInterval.
          LOG.warn("table " + alias + " has " + sz + " rows for join key " + keyObject);
          nextSz = getNextSize(nextSz);
        }
      }

      // Add the value to the vector
      storage.get(alias).add(nr);
      // if join-key is null, process each row in different group.
      if (SerDeUtils.hasAnyNullObject(keyObject, sf.getFieldObjectInspector())) {
        endGroup();
        startGroup();
      }
    } catch (Exception e) {
      e.printStackTrace();
      throw new HiveException(e);
    }
  }
Пример #4
0
  public void testSimple() throws Exception {
    final String idField = "id";
    final String toField = "productId";

    Directory dir = newDirectory();
    RandomIndexWriter w =
        new RandomIndexWriter(
            random(),
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMergePolicy(newLogMergePolicy()));

    // 0
    Document doc = new Document();
    doc.add(new TextField("description", "random text", Field.Store.NO));
    doc.add(new TextField("name", "name1", Field.Store.NO));
    doc.add(new TextField(idField, "1", Field.Store.NO));
    w.addDocument(doc);

    // 1
    doc = new Document();
    doc.add(new TextField("price", "10.0", Field.Store.NO));
    doc.add(new TextField(idField, "2", Field.Store.NO));
    doc.add(new TextField(toField, "1", Field.Store.NO));
    w.addDocument(doc);

    // 2
    doc = new Document();
    doc.add(new TextField("price", "20.0", Field.Store.NO));
    doc.add(new TextField(idField, "3", Field.Store.NO));
    doc.add(new TextField(toField, "1", Field.Store.NO));
    w.addDocument(doc);

    // 3
    doc = new Document();
    doc.add(new TextField("description", "more random text", Field.Store.NO));
    doc.add(new TextField("name", "name2", Field.Store.NO));
    doc.add(new TextField(idField, "4", Field.Store.NO));
    w.addDocument(doc);
    w.commit();

    // 4
    doc = new Document();
    doc.add(new TextField("price", "10.0", Field.Store.NO));
    doc.add(new TextField(idField, "5", Field.Store.NO));
    doc.add(new TextField(toField, "4", Field.Store.NO));
    w.addDocument(doc);

    // 5
    doc = new Document();
    doc.add(new TextField("price", "20.0", Field.Store.NO));
    doc.add(new TextField(idField, "6", Field.Store.NO));
    doc.add(new TextField(toField, "4", Field.Store.NO));
    w.addDocument(doc);

    IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
    w.close();

    // Search for product
    Query joinQuery =
        JoinUtil.createJoinQuery(
            idField,
            false,
            toField,
            new TermQuery(new Term("name", "name2")),
            indexSearcher,
            ScoreMode.None);

    TopDocs result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(4, result.scoreDocs[0].doc);
    assertEquals(5, result.scoreDocs[1].doc);

    joinQuery =
        JoinUtil.createJoinQuery(
            idField,
            false,
            toField,
            new TermQuery(new Term("name", "name1")),
            indexSearcher,
            ScoreMode.None);
    result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(1, result.scoreDocs[0].doc);
    assertEquals(2, result.scoreDocs[1].doc);

    // Search for offer
    joinQuery =
        JoinUtil.createJoinQuery(
            toField,
            false,
            idField,
            new TermQuery(new Term("id", "5")),
            indexSearcher,
            ScoreMode.None);
    result = indexSearcher.search(joinQuery, 10);
    assertEquals(1, result.totalHits);
    assertEquals(3, result.scoreDocs[0].doc);

    indexSearcher.getIndexReader().close();
    dir.close();
  }
Пример #5
0
  private void executeRandomJoin(
      boolean multipleValuesPerDocument,
      int maxIndexIter,
      int maxSearchIter,
      int numberOfDocumentsToIndex)
      throws Exception {
    for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) {
      if (VERBOSE) {
        System.out.println("indexIter=" + indexIter);
      }
      Directory dir = newDirectory();
      RandomIndexWriter w =
          new RandomIndexWriter(
              random(),
              dir,
              newIndexWriterConfig(
                      TEST_VERSION_CURRENT,
                      new MockAnalyzer(random(), MockTokenizer.KEYWORD, false))
                  .setMergePolicy(newLogMergePolicy()));
      final boolean scoreDocsInOrder = TestJoinUtil.random().nextBoolean();
      IndexIterationContext context =
          createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, scoreDocsInOrder);

      IndexReader topLevelReader = w.getReader();
      w.close();
      for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) {
        if (VERBOSE) {
          System.out.println("searchIter=" + searchIter);
        }
        IndexSearcher indexSearcher = newSearcher(topLevelReader);

        int r = random().nextInt(context.randomUniqueValues.length);
        boolean from = context.randomFrom[r];
        String randomValue = context.randomUniqueValues[r];
        FixedBitSet expectedResult =
            createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);

        final Query actualQuery = new TermQuery(new Term("value", randomValue));
        if (VERBOSE) {
          System.out.println("actualQuery=" + actualQuery);
        }
        final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
        if (VERBOSE) {
          System.out.println("scoreMode=" + scoreMode);
        }

        final Query joinQuery;
        if (from) {
          joinQuery =
              JoinUtil.createJoinQuery(
                  "from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode);
        } else {
          joinQuery =
              JoinUtil.createJoinQuery(
                  "to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode);
        }
        if (VERBOSE) {
          System.out.println("joinQuery=" + joinQuery);
        }

        // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd
        // be also testing TopDocsCollector...
        final FixedBitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
        final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, false);
        indexSearcher.search(
            joinQuery,
            new Collector() {

              int docBase;

              @Override
              public void collect(int doc) throws IOException {
                actualResult.set(doc + docBase);
                topScoreDocCollector.collect(doc);
              }

              @Override
              public void setNextReader(AtomicReaderContext context) {
                docBase = context.docBase;
                topScoreDocCollector.setNextReader(context);
              }

              @Override
              public void setScorer(Scorer scorer) throws IOException {
                topScoreDocCollector.setScorer(scorer);
              }

              @Override
              public boolean acceptsDocsOutOfOrder() {
                return scoreDocsInOrder;
              }
            });
        // Asserting bit set...
        if (VERBOSE) {
          System.out.println("expected cardinality:" + expectedResult.cardinality());
          DocIdSetIterator iterator = expectedResult.iterator();
          for (int doc = iterator.nextDoc();
              doc != DocIdSetIterator.NO_MORE_DOCS;
              doc = iterator.nextDoc()) {
            System.out.println(
                String.format(
                    Locale.ROOT,
                    "Expected doc[%d] with id value %s",
                    doc,
                    indexSearcher.doc(doc).get("id")));
          }
          System.out.println("actual cardinality:" + actualResult.cardinality());
          iterator = actualResult.iterator();
          for (int doc = iterator.nextDoc();
              doc != DocIdSetIterator.NO_MORE_DOCS;
              doc = iterator.nextDoc()) {
            System.out.println(
                String.format(
                    Locale.ROOT,
                    "Actual doc[%d] with id value %s",
                    doc,
                    indexSearcher.doc(doc).get("id")));
          }
        }
        assertEquals(expectedResult, actualResult);

        // Asserting TopDocs...
        TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
        TopDocs actualTopDocs = topScoreDocCollector.topDocs();
        assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits);
        assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length);
        if (scoreMode == ScoreMode.None) {
          continue;
        }

        assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f);
        for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) {
          if (VERBOSE) {
            System.out.printf(
                Locale.ENGLISH,
                "Expected doc: %d | Actual doc: %d\n",
                expectedTopDocs.scoreDocs[i].doc,
                actualTopDocs.scoreDocs[i].doc);
            System.out.printf(
                Locale.ENGLISH,
                "Expected score: %f | Actual score: %f\n",
                expectedTopDocs.scoreDocs[i].score,
                actualTopDocs.scoreDocs[i].score);
          }
          assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
          assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f);
          Explanation explanation =
              indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc);
          assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f);
        }
      }
      topLevelReader.close();
      dir.close();
    }
  }
Пример #6
0
  public void testSimpleWithScoring() throws Exception {
    final String idField = "id";
    final String toField = "movieId";

    Directory dir = newDirectory();
    RandomIndexWriter w =
        new RandomIndexWriter(
            random(),
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMergePolicy(newLogMergePolicy()));

    // 0
    Document doc = new Document();
    doc.add(new TextField("description", "A random movie", Field.Store.NO));
    doc.add(new TextField("name", "Movie 1", Field.Store.NO));
    doc.add(new TextField(idField, "1", Field.Store.NO));
    w.addDocument(doc);

    // 1
    doc = new Document();
    doc.add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO));
    doc.add(new TextField(idField, "2", Field.Store.NO));
    doc.add(new TextField(toField, "1", Field.Store.NO));
    w.addDocument(doc);

    // 2
    doc = new Document();
    doc.add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO));
    doc.add(new TextField(idField, "3", Field.Store.NO));
    doc.add(new TextField(toField, "1", Field.Store.NO));
    w.addDocument(doc);

    // 3
    doc = new Document();
    doc.add(new TextField("description", "A second random movie", Field.Store.NO));
    doc.add(new TextField("name", "Movie 2", Field.Store.NO));
    doc.add(new TextField(idField, "4", Field.Store.NO));
    w.addDocument(doc);
    w.commit();

    // 4
    doc = new Document();
    doc.add(
        new TextField(
            "subtitle", "a very random event happened during christmas night", Field.Store.NO));
    doc.add(new TextField(idField, "5", Field.Store.NO));
    doc.add(new TextField(toField, "4", Field.Store.NO));
    w.addDocument(doc);

    // 5
    doc = new Document();
    doc.add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO));
    doc.add(new TextField(idField, "6", Field.Store.NO));
    doc.add(new TextField(toField, "4", Field.Store.NO));
    w.addDocument(doc);

    IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
    w.close();

    // Search for movie via subtitle
    Query joinQuery =
        JoinUtil.createJoinQuery(
            toField,
            false,
            idField,
            new TermQuery(new Term("subtitle", "random")),
            indexSearcher,
            ScoreMode.Max);
    TopDocs result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(0, result.scoreDocs[0].doc);
    assertEquals(3, result.scoreDocs[1].doc);

    // Score mode max.
    joinQuery =
        JoinUtil.createJoinQuery(
            toField,
            false,
            idField,
            new TermQuery(new Term("subtitle", "movie")),
            indexSearcher,
            ScoreMode.Max);
    result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(3, result.scoreDocs[0].doc);
    assertEquals(0, result.scoreDocs[1].doc);

    // Score mode total
    joinQuery =
        JoinUtil.createJoinQuery(
            toField,
            false,
            idField,
            new TermQuery(new Term("subtitle", "movie")),
            indexSearcher,
            ScoreMode.Total);
    result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(0, result.scoreDocs[0].doc);
    assertEquals(3, result.scoreDocs[1].doc);

    // Score mode avg
    joinQuery =
        JoinUtil.createJoinQuery(
            toField,
            false,
            idField,
            new TermQuery(new Term("subtitle", "movie")),
            indexSearcher,
            ScoreMode.Avg);
    result = indexSearcher.search(joinQuery, 10);
    assertEquals(2, result.totalHits);
    assertEquals(3, result.scoreDocs[0].doc);
    assertEquals(0, result.scoreDocs[1].doc);

    indexSearcher.getIndexReader().close();
    dir.close();
  }
Пример #7
0
  /**
   * Mark the specified access node to be made dependent
   *
   * @param sourceNode Node to make dependent
   * @param dca
   * @throws TeiidComponentException
   * @throws QueryMetadataException
   */
  boolean markDependent(
      PlanNode sourceNode,
      PlanNode joinNode,
      QueryMetadataInterface metadata,
      DependentCostAnalysis dca,
      Boolean bound)
      throws QueryMetadataException, TeiidComponentException {

    boolean isLeft = joinNode.getFirstChild() == sourceNode;

    // Get new access join node properties based on join criteria
    List independentExpressions =
        (List)
            (isLeft
                ? joinNode.getProperty(NodeConstants.Info.RIGHT_EXPRESSIONS)
                : joinNode.getProperty(NodeConstants.Info.LEFT_EXPRESSIONS));
    List dependentExpressions =
        (List)
            (isLeft
                ? joinNode.getProperty(NodeConstants.Info.LEFT_EXPRESSIONS)
                : joinNode.getProperty(NodeConstants.Info.RIGHT_EXPRESSIONS));

    if (independentExpressions == null || independentExpressions.isEmpty()) {
      return false;
    }

    String id = "$dsc/id" + ID.getAndIncrement(); // $NON-NLS-1$
    // Create DependentValueSource and set on the independent side as this will feed the values
    joinNode.setProperty(NodeConstants.Info.DEPENDENT_VALUE_SOURCE, id);

    PlanNode indNode = isLeft ? joinNode.getLastChild() : joinNode.getFirstChild();

    if (bound == null) {
      List<PlanNode> sources = NodeEditor.findAllNodes(indNode, NodeConstants.Types.SOURCE);
      for (PlanNode planNode : sources) {
        for (GroupSymbol gs : planNode.getGroups()) {
          if (gs.isTempTable()
              && metadata.getCardinality(gs.getMetadataID())
                  == QueryMetadataInterface.UNKNOWN_CARDINALITY) {
            bound = true;
            break;
          }
        }
      }
      if (bound == null) {
        bound = false;
      }
    }

    PlanNode crit =
        getDependentCriteriaNode(
            id,
            independentExpressions,
            dependentExpressions,
            indNode,
            metadata,
            dca,
            bound,
            (MakeDep) sourceNode.getProperty(Info.MAKE_DEP));

    sourceNode.addAsParent(crit);

    if (isLeft) {
      JoinUtil.swapJoinChildren(joinNode);
    }
    return true;
  }