/** * Returns Doc Ids by searching the index for document having the correct spatial hash cell id at * given grid level * * @param reader reader to the index */ @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { if (spatialHashCellsIds.size() == 0) { return null; } final AtomicReader atomicReader = context.reader(); OpenBitSet matchedDocumentsIds = new OpenBitSet(atomicReader.maxDoc()); Boolean found = false; for (int i = 0; i < spatialHashCellsIds.size(); i++) { Term spatialHashCellTerm = new Term(fieldName, spatialHashCellsIds.get(i)); DocsEnum spatialHashCellsDocs = atomicReader.termDocsEnum(spatialHashCellTerm); if (spatialHashCellsDocs != null) { while (true) { final int docId = spatialHashCellsDocs.nextDoc(); if (docId == DocIdSetIterator.NO_MORE_DOCS) { break; } else { if (acceptDocs == null || acceptDocs.get(docId)) { matchedDocumentsIds.fastSet(docId); found = true; } } } } } if (found) { return matchedDocumentsIds; } else { return null; } }
@Override public void collect(int doc) throws IOException { BytesWrap parentId = typeCache.parentIdByDoc(doc); if (parentId == null) { return; } for (Tuple<IndexReader, IdReaderTypeCache> tuple : readers) { IndexReader indexReader = tuple.v1(); IdReaderTypeCache idReaderTypeCache = tuple.v2(); if (idReaderTypeCache == null) { // might be if we don't have that doc with that type in this reader continue; } int parentDocId = idReaderTypeCache.docById(parentId); if (parentDocId != -1 && !indexReader.isDeleted(parentDocId)) { OpenBitSet docIdSet = parentDocs().get(indexReader.getCoreCacheKey()); if (docIdSet == null) { docIdSet = new OpenBitSet(indexReader.maxDoc()); parentDocs.put(indexReader.getCoreCacheKey(), docIdSet); } docIdSet.fastSet(parentDocId); return; } } }
public OpenBitSet getRandomSet(int sz, int bitsToSet) { OpenBitSet bs = new OpenBitSet(sz); if (sz == 0) return bs; for (int i = 0; i < bitsToSet; i++) { bs.fastSet(rand.nextInt(sz)); } return bs; }
// convert an openBitSet to an array list private ArrayList<Integer> bitSetToArrayList(OpenBitSet bs) { ArrayList<Integer> listRes = new ArrayList<Integer>(); for (int i = 0; i < bs.capacity(); i++) { if (bs.get(i)) { listRes.add(i); } } return listRes; }
@Test public void _testAndIntersections() throws Exception { System.out.println("Running test case: intersections, PForDeltaAndDocIdSet.nextDoc() ..."); ArrayList<OpenBitSet> obs = new ArrayList<OpenBitSet>(); ArrayList<DocIdSet> docs = new ArrayList<DocIdSet>(); ArrayList<Integer> expectedIntersectionResult = new ArrayList<Integer>(); int maxDoc = 5000; int numDoc1 = 1000; int numDoc2 = 2000; int numDoc3 = 4000; int[] originalInput = null; int[] input1 = null; int[] input2 = null; int[] input3 = null; originalInput = new int[maxDoc]; for (int i = 0; i < maxDoc; ++i) { originalInput[i] = i; } // generate random numbers and add them into PForDeltaDocIdSets input1 = generateRandomDataNew(originalInput, maxDoc, numDoc1); loadRandomDataSets(input1, obs, docs, numDoc1); input2 = generateRandomDataNew(originalInput, maxDoc, numDoc2); loadRandomDataSets(input2, obs, docs, numDoc2); input3 = generateRandomDataNew(originalInput, maxDoc, numDoc3); loadRandomDataSets(input3, obs, docs, numDoc3); // get the expected result OpenBitSet base = obs.get(0); for (int i = 1; i < obs.size(); ++i) { base.intersect(obs.get(i)); } for (int k = 0; k < base.size(); ++k) { if (base.get(k)) expectedIntersectionResult.add(k); } // get the results from PForDeltaAndDocIdSet ArrayList<Integer> intersectionResult = new ArrayList<Integer>(); AndDocIdSet ands = new AndDocIdSet(docs); DocIdSetIterator iter = ands.iterator(); int docId = iter.nextDoc(); while (docId != DocIdSetIterator.NO_MORE_DOCS) { intersectionResult.add(docId); docId = iter.nextDoc(); } if (!compareTwoLists(intersectionResult, expectedIntersectionResult)) { System.out.println("The result for the new version does not match the expectation"); } System.out.println("----------------completed---------------------------"); }
// print a openBitSet object private ArrayList<Integer> printBitSet(OpenBitSet bs) { ArrayList<Integer> listRes = new ArrayList<Integer>(); System.out.print("bitSet(" + bs.capacity() + ") ["); for (int i = 0; i < bs.capacity(); i++) { if (bs.get(i)) { listRes.add(i); System.out.print(i); System.out.print(" "); } } System.out.println("]"); return listRes; }
@Override public RandomAccessDocIdSet getRandomAccessDocIdSet(BoboIndexReader reader) throws IOException { final MultiValueFacetDataCache dataCache = (MultiValueFacetDataCache) _facetHandler.getFacetData(reader); final int[] index = _valueConverter.convert(dataCache, _vals); final BigNestedIntArray nestedArray = dataCache._nestedArray; final OpenBitSet bitset = new OpenBitSet(dataCache.valArray.size()); for (int i : index) { bitset.fastSet(i); } if (_takeCompliment) { // flip the bits int size = dataCache.valArray.size(); for (int i = 0; i < size; ++i) { bitset.fastFlip(i); } } long count = bitset.cardinality(); if (count == 0) { final DocIdSet empty = EmptyDocIdSet.getInstance(); return new RandomAccessDocIdSet() { @Override public boolean get(int docId) { return false; } @Override public DocIdSetIterator iterator() throws IOException { return empty.iterator(); } }; } else { return new RandomAccessDocIdSet() { @Override public DocIdSetIterator iterator() { return new MultiValueOrFacetDocIdSetIterator(dataCache, bitset); } @Override public final boolean get(int docId) { return nestedArray.contains(docId, bitset); } }; } }
public boolean isPresent(ByteBuffer key) { for (long bucketIndex : getHashBuckets(key)) { if (!bitset.fastGet(bucketIndex)) { return false; } } return true; }
public DocSet getIntDocSet(OpenBitSet bs) { int[] docs = new int[(int) bs.cardinality()]; OpenBitSetIterator iter = new OpenBitSetIterator(bs); for (int i = 0; i < docs.length; i++) { docs[i] = iter.nextDoc(); } return new SortedIntDocSet(docs); }
@Override public boolean isPresent(byte[] key, int offset, int length) { for (long bucketIndex : getHashBuckets(key, offset, length)) { if (!bitset.fastGet(bucketIndex)) { return false; } } return true; }
long emptyBuckets() { long n = 0; for (long i = 0; i < buckets(); i++) { if (!bitset.get(i)) { n++; } } return n; }
@Override public double getFacetSelectivity(BoboSegmentReader reader) { FacetDataCache<?> dataCache = facetDataCacheBuilder.build(reader); final OpenBitSet openBitSet = getBitSet(dataCache); int[] frequencies = dataCache.freqs; double selectivity = 0; int accumFreq = 0; int index = openBitSet.nextSetBit(0); while (index >= 0) { accumFreq += frequencies[index]; index = openBitSet.nextSetBit(index + 1); } int total = reader.maxDoc(); selectivity = (double) accumFreq / (double) total; if (selectivity > 0.999) { selectivity = 1.0; } return selectivity; }
public <V extends ConceptComponent<?, ?>.Version> List<V> locateLatest( List<V> parts, ViewCoordinate vc) throws IOException { V latest = null; OpenBitSet resultsPartSet = new OpenBitSet(parts.size()); for (PositionBI pos : vc.getPositionSet()) { RelativePositionComputerBI mapper = RelativePositionComputer.getComputer(pos); OpenBitSet iteratorPartSet = new OpenBitSet(parts.size()); for (int i = 0; i < parts.size(); i++) { V part = parts.get(i); if (mapper.onRoute(part)) { if (latest == null) { latest = part; iteratorPartSet.set(i); } else { switch (mapper.relativePosition(latest, part)) { case BEFORE: // nothing to do break; case CONTRADICTION: iteratorPartSet.set(i); break; case AFTER: latest = part; iteratorPartSet.clear(0, Integer.MAX_VALUE); iteratorPartSet.set(i); break; default: break; } } } } resultsPartSet.or(iteratorPartSet); } List<V> resultsList = new ArrayList<>((int) resultsPartSet.cardinality()); DocIdSetIterator resultsItr = resultsPartSet.iterator(); int id = resultsItr.nextDoc(); while (id != DocIdSetIterator.NO_MORE_DOCS) { resultsList.add(parts.get(id)); id = resultsItr.nextDoc(); } return resultsList; }
/* */ public DocIdSet getDocIdSet(IndexReader reader) /* */ throws IOException /* */ { /* 103 */ TermEnum enumerator = this.query.getEnum(reader); /* */ try /* */ { /* 106 */ if (enumerator.term() == null) { /* 107 */ return DocIdSet.EMPTY_DOCIDSET; } OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); /* 110 */ int[] docs = new int[32]; /* 111 */ int[] freqs = new int[32]; /* 112 */ TermDocs termDocs = reader.termDocs(); /* */ int termCount; /* */ try { termCount = 0; /* */ do { /* 116 */ Term term = enumerator.term(); /* 117 */ if (term == null) /* */ break; /* 119 */ termCount++; /* 120 */ termDocs.seek(term); /* */ while (true) { /* 122 */ int count = termDocs.read(docs, freqs); /* 123 */ if (count == 0) break; /* 124 */ for (int i = 0; i < count; i++) { /* 125 */ bitSet.set(docs[i]); /* */ } /* */ } /* */ /* */ } /* */ /* 131 */ while (enumerator.next()); /* */ /* 133 */ this.query.incTotalNumberOfTerms(termCount); /* */ } finally /* */ { /* 136 */ termDocs.close(); /* */ } /* 138 */ return bitSet; /* */ } finally { /* 140 */ enumerator.close(); /* */ } /* */ }
@Override public DocIdSet getDocIdSet(IndexReader subReader) throws IOException { if (bases == null || !bases.containsKey(subReader)) { return docs; } int docBase = bases.get(subReader); int readerSize = subReader.maxDoc(); OpenBitSet filter = new OpenBitSet(readerSize); DocIdSetIterator iterator = docs.iterator(); int doc = iterator.advance(docBase); while (doc < docBase + readerSize) { filter.set(doc - docBase); doc = iterator.nextDoc(); } return filter; }
private OpenBitSet applySpatialFilter( Set<FeatureId> matches, Multimap<FeatureId, Integer> docIndexLookup, OpenBitSet bits) throws IOException { JeevesJCS jcs = getJCSCache(); processCachedFeatures(jcs, matches, docIndexLookup, bits); while (!matches.isEmpty()) { Id fidFilter; if (matches.size() > MAX_FIDS_PER_QUERY) { FeatureId[] subset = new FeatureId[MAX_FIDS_PER_QUERY]; int i = 0; Iterator<FeatureId> iter = matches.iterator(); while (iter.hasNext() && i < MAX_FIDS_PER_QUERY) { subset[i] = iter.next(); iter.remove(); i++; } fidFilter = _filterFactory.id(subset); } else { fidFilter = _filterFactory.id(matches); matches = Collections.emptySet(); } FeatureSource<SimpleFeatureType, SimpleFeature> _featureSource = sourceAccessor.one(); String ftn = _featureSource.getSchema().getName().getLocalPart(); String[] geomAtt = {_featureSource.getSchema().getGeometryDescriptor().getLocalName()}; FeatureCollection<SimpleFeatureType, SimpleFeature> features = _featureSource.getFeatures(new org.geotools.data.Query(ftn, fidFilter, geomAtt)); FeatureIterator<SimpleFeature> iterator = features.features(); try { while (iterator.hasNext()) { SimpleFeature feature = iterator.next(); FeatureId featureId = feature.getIdentifier(); jcs.put(featureId.getID(), feature.getDefaultGeometry()); if (evaluateFeature(feature)) { for (int doc : docIndexLookup.get(featureId)) { bits.set(doc); } } } } catch (CacheException e) { throw new Error(e); } finally { iterator.close(); } } return bits; }
@SuppressWarnings({"unchecked"}) @Override public T[] call() throws Exception { OpenBitSetDISI accumulator = new OpenBitSetDISI(finalBitsetSize); OpenBitSetDISI toCompareDisi = new OpenBitSetDISI(finalBitsetSize); toCompareDisi.inPlaceOr(toCompare.iterator()); Object[] result = new Object[toIndex - fromIndex]; for (int i = fromIndex; i < toIndex; i++) { result[i - fromIndex] = operation.compute(accumulator, bs[i], toCompareDisi); } return ArrayUtils.typedArray(result); }
public static SolrCachingAuxDocScorer createAuxDocScorer( SolrIndexSearcher searcher, Similarity similarity, Query query, SolrIndexReader reader) throws IOException { // Get hold of solr top level searcher // Execute query with caching // translate reults to leaf docs // build ordered doc list DocSet auxDocSet = searcher.getDocSet(query); CacheEntry[] indexedByDocId = (CacheEntry[]) searcher.cacheLookup( AlfrescoSolrEventListener.ALFRESCO_CACHE, AlfrescoSolrEventListener.KEY_DBID_LEAF_PATH_BY_DOC_ID); // List<ScoreDoc> auxDocs = pathCollector.getDocs(); OpenBitSet translated = new OpenBitSet(); if (auxDocSet instanceof BitDocSet) { BitDocSet source = (BitDocSet) auxDocSet; OpenBitSet openBitSet = source.getBits(); int current = -1; while ((current = openBitSet.nextSetBit(current + 1)) != -1) { CacheEntry entry = indexedByDocId[current]; translated.set(entry.getLeaf()); } } else { for (DocIterator it = auxDocSet.iterator(); it.hasNext(); /* */ ) { CacheEntry entry = indexedByDocId[it.nextDoc()]; translated.set(entry.getLeaf()); } } return new SolrCachingAuxDocScorer(similarity, new BitDocSet(translated), reader); }
@Override public RandomAccessDocIdSet getRandomAccessDocIdSet(final BoboSegmentReader reader) throws IOException { final FacetDataCache<?> dataCache = facetDataCacheBuilder.build(reader); final OpenBitSet openBitSet = getBitSet(dataCache); long count = openBitSet.cardinality(); if (count == 0) { return EmptyDocIdSet.getInstance(); } else { final boolean multi = dataCache instanceof MultiValueFacetDataCache; final MultiValueFacetDataCache<?> multiCache = multi ? (MultiValueFacetDataCache<?>) dataCache : null; return new RandomAccessDocIdSet() { @Override public DocIdSetIterator iterator() { if (multi) { return new MultiValueORFacetFilter.MultiValueOrFacetDocIdSetIterator( multiCache, openBitSet); } else { return new FacetOrFilter.FacetOrDocIdSetIterator(dataCache, openBitSet); } } @Override public boolean get(int docId) { if (multi) { return multiCache._nestedArray.contains(docId, openBitSet); } else { return openBitSet.fastGet(dataCache.orderArray.get(docId)); } } }; } }
public DocSet getRandomDocSet(int n, int maxDoc) { OpenBitSet obs = new OpenBitSet(maxDoc); int[] a = new int[n]; for (int i = 0; i < n; i++) { for (; ; ) { int idx = rand.nextInt(maxDoc); if (obs.getAndSet(idx)) continue; a[i] = idx; break; } } if (n <= smallSetCuttoff) { if (smallSetType == 0) { Arrays.sort(a); return new SortedIntDocSet(a); } else if (smallSetType == 1) { Arrays.sort(a); return loadfactor != 0 ? new HashDocSet(a, 0, n, 1 / loadfactor) : new HashDocSet(a, 0, n); } } return new BitDocSet(obs, n); }
@Override public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException { assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START && lastPostingsConsumer.docFreq == 0; state = TermsConsumerState.FINISHED; assert docCount >= 0; assert docCount == visitedDocs.cardinality(); assert sumDocFreq >= docCount; assert sumDocFreq == this.sumDocFreq; if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) { assert sumTotalTermFreq == -1; } else { assert sumTotalTermFreq >= sumDocFreq; assert sumTotalTermFreq == this.sumTotalTermFreq; } in.finish(sumTotalTermFreq, sumDocFreq, docCount); }
public DocSet getDocSlice(OpenBitSet bs) { int len = (int) bs.cardinality(); int[] arr = new int[len + 5]; arr[0] = 10; arr[1] = 20; arr[2] = 30; arr[arr.length - 1] = 1; arr[arr.length - 2] = 2; int offset = 3; int end = offset + len; OpenBitSetIterator iter = new OpenBitSetIterator(bs); // put in opposite order... DocLists are not ordered. for (int i = end - 1; i >= offset; i--) { arr[i] = iter.nextDoc(); } return new DocSlice(offset, len, arr, null, len * 2, 100.0f); }
@Override public void startDoc(int docID, int freq) throws IOException { assert state == PostingsConsumerState.INITIAL; state = PostingsConsumerState.START; assert docID >= 0; if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) { assert freq == -1; this.freq = 0; // we don't expect any positions here } else { assert freq > 0; this.freq = freq; totalTermFreq += freq; } this.positionCount = 0; this.lastPosition = 0; this.lastStartOffset = 0; docFreq++; visitedDocs.set(docID); in.startDoc(docID, freq); }
private void processCachedFeatures( GroupCacheAccess jcs, Set<FeatureId> matches, Multimap<FeatureId, Integer> docIndexLookup, OpenBitSet bits) { for (java.util.Iterator<FeatureId> iter = matches.iterator(); iter.hasNext(); ) { FeatureId id = iter.next(); Geometry geom = (Geometry) jcs.get(id.getID()); if (geom != null) { iter.remove(); final SimpleFeatureBuilder simpleFeatureBuilder = new SimpleFeatureBuilder(this.sourceAccessor.one().getSchema()); simpleFeatureBuilder.set( this.sourceAccessor.one().getSchema().getGeometryDescriptor().getName(), geom); final SimpleFeature simpleFeature = simpleFeatureBuilder.buildFeature(id.getID()); if (evaluateFeature(simpleFeature)) { for (int doc : docIndexLookup.get(id)) { bits.set(doc); } } } } }
private void loadTerms() throws IOException { PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false); final Builder<PairOutputs.Pair<Long, PairOutputs.Pair<Long, Long>>> b; final PairOutputs<Long, Long> outputsInner = new PairOutputs<Long, Long>(posIntOutputs, posIntOutputs); final PairOutputs<Long, PairOutputs.Pair<Long, Long>> outputs = new PairOutputs<Long, PairOutputs.Pair<Long, Long>>(posIntOutputs, outputsInner); b = new Builder<PairOutputs.Pair<Long, PairOutputs.Pair<Long, Long>>>( FST.INPUT_TYPE.BYTE1, outputs); IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone(); in.seek(termsStart); final BytesRef lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; OpenBitSet visitedDocs = new OpenBitSet(); final IntsRef scratchIntsRef = new IntsRef(); while (true) { SimpleTextUtil.readLine(in, scratch); if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) { if (lastDocsStart != -1) { b.add( Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair( lastDocsStart, outputsInner.newPair((long) docFreq, totalTermFreq))); sumTotalTermFreq += totalTermFreq; } break; } else if (StringHelper.startsWith(scratch, DOC)) { docFreq++; sumDocFreq++; UnicodeUtil.UTF8toUTF16( scratch.bytes, scratch.offset + DOC.length, scratch.length - DOC.length, scratchUTF16); int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length); visitedDocs.set(docID); } else if (StringHelper.startsWith(scratch, FREQ)) { UnicodeUtil.UTF8toUTF16( scratch.bytes, scratch.offset + FREQ.length, scratch.length - FREQ.length, scratchUTF16); totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length); } else if (StringHelper.startsWith(scratch, TERM)) { if (lastDocsStart != -1) { b.add( Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair( lastDocsStart, outputsInner.newPair((long) docFreq, totalTermFreq))); } lastDocsStart = in.getFilePointer(); final int len = scratch.length - TERM.length; if (len > lastTerm.length) { lastTerm.grow(len); } System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len); lastTerm.length = len; docFreq = 0; sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; termCount++; } } docCount = (int) visitedDocs.cardinality(); fst = b.finish(); /* PrintStream ps = new PrintStream("out.dot"); fst.toDot(ps); ps.close(); System.out.println("SAVED out.dot"); */ // System.out.println("FST " + fst.sizeInBytes()); }
@Override public void add(byte[] key, int offset, int length) { for (long bucketIndex : getHashBuckets(key, offset, length)) { bitset.fastSet(bucketIndex); } }
protected void doSingle(int maxSize) { int sz = rand.nextInt(maxSize + 1); int sz2 = rand.nextInt(maxSize); OpenBitSet bs1 = getRandomSet(sz, rand.nextInt(sz + 1)); OpenBitSet bs2 = getRandomSet(sz, rand.nextInt(sz2 + 1)); DocSet a1 = new BitDocSet(bs1); DocSet a2 = new BitDocSet(bs2); DocSet b1 = getDocSet(bs1); DocSet b2 = getDocSet(bs2); checkEqual(bs1, b1); checkEqual(bs2, b2); iter(a1, b1); iter(a2, b2); OpenBitSet a_and = (OpenBitSet) bs1.clone(); a_and.and(bs2); OpenBitSet a_or = (OpenBitSet) bs1.clone(); a_or.or(bs2); // OpenBitSet a_xor = (OpenBitSet)bs1.clone(); a_xor.xor(bs2); OpenBitSet a_andn = (OpenBitSet) bs1.clone(); a_andn.andNot(bs2); checkEqual(a_and, b1.intersection(b2)); checkEqual(a_or, b1.union(b2)); checkEqual(a_andn, b1.andNot(b2)); assertEquals(a_and.cardinality(), b1.intersectionSize(b2)); assertEquals(a_or.cardinality(), b1.unionSize(b2)); assertEquals(a_andn.cardinality(), b1.andNotSize(b2)); }
public void checkEqual(OpenBitSet bs, DocSet set) { for (int i = 0; i < bs.capacity(); i++) { assertEquals(bs.get(i), set.exists(i)); } assertEquals(bs.cardinality(), set.size()); }
public void add(ByteBuffer key) { for (long bucketIndex : getHashBuckets(key)) { bitset.fastSet(bucketIndex); } }
private List<ConceptAnnotation> filterSubsumedConcepts( String q, List<ConceptAnnotation> annotations) { OpenBitSet qset = new OpenBitSet(q.length()); qset.set(0, qset.length()); // sort the annotations, longest first Collections.sort( annotations, new Comparator<ConceptAnnotation>() { @Override public int compare(ConceptAnnotation ca1, ConceptAnnotation ca2) { Integer len1 = ca1.getEnd() - ca1.getBegin(); Integer len2 = ca2.getEnd() - ca2.getBegin(); return len2.compareTo(len1); } }); List<ConceptAnnotation> filtered = new ArrayList<ConceptAnnotation>(); long prevCardinality = qset.cardinality(); for (ConceptAnnotation annotation : annotations) { OpenBitSet cset = new OpenBitSet(qset.length()); cset.set(0, qset.length()); cset.flip(annotation.getBegin(), annotation.getEnd()); cset.intersect(qset); long cardinality = cset.cardinality(); if (cardinality == prevCardinality) { // concept is subsumed, skip it continue; } filtered.add(annotation); prevCardinality = cardinality; } return filtered; }