public ChunkTask(final BOpContext<IBindingSet> context, final HashJoinOp<E> op) { this.context = context; this.stats = (BaseJoinStats) context.getStats(); this.pred = op.getPredicate(); this.relation = context.getRelation(pred); this.sink = context.getSink(); this.sink2 = context.getSink2(); this.op = op; { /* * First, see if the map already exists. * * Note: Since the operator is not thread-safe, we do not need * to use a putIfAbsent pattern here. * * Note: Publishing the [state] as a query attribute provides * visibility into the hash join against the access path even * for implementations (such as the JVMHashJoinOp) where the * entire operation will occur within a single evaluation pass. */ final INamedSolutionSetRef namedSetRef = (INamedSolutionSetRef) op.getRequiredProperty(Annotations.NAMED_SET_REF); // Lookup the attributes for the query on which we will hang the // solution set. final IQueryAttributes attrs = context.getQueryAttributes(namedSetRef.getQueryId()); IHashJoinUtility state = (IHashJoinUtility) attrs.get(namedSetRef); if (state == null) { state = op.newState( context, namedSetRef, op.isOptional() ? JoinTypeEnum.Optional : JoinTypeEnum.Normal); attrs.put(namedSetRef, state); } this.state = state; } }
@Override public Void call() throws Exception { boolean didRun = false; try { acceptSolutions(); if (op.runHashJoin(context, state)) { didRun = true; doHashJoin(); } // Done. return null; } finally { if (didRun) { /* * The state needs to be released each time this operator * runs in order to discard the intermediate solutions * buffered on the hash index that were just joined against * the access path. If we do not discard the state after * processing the intermediate solutions, then they will * continue to accumulate and we will over-report joins * (duplicate solutions will be output for things already in * the hash index the next time we evaluate the hash join * against the access path). */ state.release(); } sink.close(); if (sink2 != null) sink2.close(); } }
/** Do a hash join of the buffered solutions with the access path. */ private void doHashJoin() { if (state.isEmpty()) return; final IBindingSetAccessPath<?> accessPath = getAccessPath(); if (log.isInfoEnabled()) log.info("accessPath=" + accessPath); stats.accessPathCount.increment(); stats.accessPathRangeCount.add(accessPath.rangeCount(false /* exact */)); final UnsyncLocalOutputBuffer<IBindingSet> unsyncBuffer = new UnsyncLocalOutputBuffer<IBindingSet>(op.getChunkCapacity(), sink); final long cutoffLimit = pred.getProperty( IPredicate.Annotations.CUTOFF_LIMIT, IPredicate.Annotations.DEFAULT_CUTOFF_LIMIT); // Obtain the iterator for the current join dimension. final ICloseableIterator<IBindingSet[]> itr = accessPath.solutions(cutoffLimit, stats); /* * Note: The [stats] are NOT passed in here since the chunksIn and * unitsIn were updated when the pipeline solutions were accepted * into the hash index. If we passed in stats here, they would be * double counted when we executed the hash join against the access * path. */ state.hashJoin( itr, // left null, // stats unsyncBuffer // out ); switch (state.getJoinType()) { case Normal: /* * Nothing to do. */ break; case Optional: case NotExists: { /* * Output the optional solutions. */ // where to write the optional solutions. final AbstractUnsynchronizedArrayBuffer<IBindingSet> unsyncBuffer2 = sink2 == null ? unsyncBuffer : new UnsyncLocalOutputBuffer<IBindingSet>(op.getChunkCapacity(), sink2); state.outputOptionals(unsyncBuffer2); unsyncBuffer2.flush(); if (sink2 != null) sink2.flush(); break; } case Exists: { /* * Output the join set. */ state.outputJoinSet(unsyncBuffer); break; } default: throw new AssertionError(); } unsyncBuffer.flush(); sink.flush(); }