private List<ConceptAnnotation> filterSubsumedConcepts( String q, List<ConceptAnnotation> annotations) { OpenBitSet qset = new OpenBitSet(q.length()); qset.set(0, qset.length()); // sort the annotations, longest first Collections.sort( annotations, new Comparator<ConceptAnnotation>() { @Override public int compare(ConceptAnnotation ca1, ConceptAnnotation ca2) { Integer len1 = ca1.getEnd() - ca1.getBegin(); Integer len2 = ca2.getEnd() - ca2.getBegin(); return len2.compareTo(len1); } }); List<ConceptAnnotation> filtered = new ArrayList<ConceptAnnotation>(); long prevCardinality = qset.cardinality(); for (ConceptAnnotation annotation : annotations) { OpenBitSet cset = new OpenBitSet(qset.length()); cset.set(0, qset.length()); cset.flip(annotation.getBegin(), annotation.getEnd()); cset.intersect(qset); long cardinality = cset.cardinality(); if (cardinality == prevCardinality) { // concept is subsumed, skip it continue; } filtered.add(annotation); prevCardinality = cardinality; } return filtered; }
public <V extends ConceptComponent<?, ?>.Version> List<V> locateLatest( List<V> parts, ViewCoordinate vc) throws IOException { V latest = null; OpenBitSet resultsPartSet = new OpenBitSet(parts.size()); for (PositionBI pos : vc.getPositionSet()) { RelativePositionComputerBI mapper = RelativePositionComputer.getComputer(pos); OpenBitSet iteratorPartSet = new OpenBitSet(parts.size()); for (int i = 0; i < parts.size(); i++) { V part = parts.get(i); if (mapper.onRoute(part)) { if (latest == null) { latest = part; iteratorPartSet.set(i); } else { switch (mapper.relativePosition(latest, part)) { case BEFORE: // nothing to do break; case CONTRADICTION: iteratorPartSet.set(i); break; case AFTER: latest = part; iteratorPartSet.clear(0, Integer.MAX_VALUE); iteratorPartSet.set(i); break; default: break; } } } } resultsPartSet.or(iteratorPartSet); } List<V> resultsList = new ArrayList<>((int) resultsPartSet.cardinality()); DocIdSetIterator resultsItr = resultsPartSet.iterator(); int id = resultsItr.nextDoc(); while (id != DocIdSetIterator.NO_MORE_DOCS) { resultsList.add(parts.get(id)); id = resultsItr.nextDoc(); } return resultsList; }
private OpenBitSet applySpatialFilter( Set<FeatureId> matches, Multimap<FeatureId, Integer> docIndexLookup, OpenBitSet bits) throws IOException { JeevesJCS jcs = getJCSCache(); processCachedFeatures(jcs, matches, docIndexLookup, bits); while (!matches.isEmpty()) { Id fidFilter; if (matches.size() > MAX_FIDS_PER_QUERY) { FeatureId[] subset = new FeatureId[MAX_FIDS_PER_QUERY]; int i = 0; Iterator<FeatureId> iter = matches.iterator(); while (iter.hasNext() && i < MAX_FIDS_PER_QUERY) { subset[i] = iter.next(); iter.remove(); i++; } fidFilter = _filterFactory.id(subset); } else { fidFilter = _filterFactory.id(matches); matches = Collections.emptySet(); } FeatureSource<SimpleFeatureType, SimpleFeature> _featureSource = sourceAccessor.one(); String ftn = _featureSource.getSchema().getName().getLocalPart(); String[] geomAtt = {_featureSource.getSchema().getGeometryDescriptor().getLocalName()}; FeatureCollection<SimpleFeatureType, SimpleFeature> features = _featureSource.getFeatures(new org.geotools.data.Query(ftn, fidFilter, geomAtt)); FeatureIterator<SimpleFeature> iterator = features.features(); try { while (iterator.hasNext()) { SimpleFeature feature = iterator.next(); FeatureId featureId = feature.getIdentifier(); jcs.put(featureId.getID(), feature.getDefaultGeometry()); if (evaluateFeature(feature)) { for (int doc : docIndexLookup.get(featureId)) { bits.set(doc); } } } } catch (CacheException e) { throw new Error(e); } finally { iterator.close(); } } return bits; }
public static SolrCachingAuxDocScorer createAuxDocScorer( SolrIndexSearcher searcher, Similarity similarity, Query query, SolrIndexReader reader) throws IOException { // Get hold of solr top level searcher // Execute query with caching // translate reults to leaf docs // build ordered doc list DocSet auxDocSet = searcher.getDocSet(query); CacheEntry[] indexedByDocId = (CacheEntry[]) searcher.cacheLookup( AlfrescoSolrEventListener.ALFRESCO_CACHE, AlfrescoSolrEventListener.KEY_DBID_LEAF_PATH_BY_DOC_ID); // List<ScoreDoc> auxDocs = pathCollector.getDocs(); OpenBitSet translated = new OpenBitSet(); if (auxDocSet instanceof BitDocSet) { BitDocSet source = (BitDocSet) auxDocSet; OpenBitSet openBitSet = source.getBits(); int current = -1; while ((current = openBitSet.nextSetBit(current + 1)) != -1) { CacheEntry entry = indexedByDocId[current]; translated.set(entry.getLeaf()); } } else { for (DocIterator it = auxDocSet.iterator(); it.hasNext(); /* */ ) { CacheEntry entry = indexedByDocId[it.nextDoc()]; translated.set(entry.getLeaf()); } } return new SolrCachingAuxDocScorer(similarity, new BitDocSet(translated), reader); }
@Override public void startDoc(int docID, int freq) throws IOException { assert state == PostingsConsumerState.INITIAL; state = PostingsConsumerState.START; assert docID >= 0; if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) { assert freq == -1; this.freq = 0; // we don't expect any positions here } else { assert freq > 0; this.freq = freq; totalTermFreq += freq; } this.positionCount = 0; this.lastPosition = 0; this.lastStartOffset = 0; docFreq++; visitedDocs.set(docID); in.startDoc(docID, freq); }
/* */ public DocIdSet getDocIdSet(IndexReader reader) /* */ throws IOException /* */ { /* 103 */ TermEnum enumerator = this.query.getEnum(reader); /* */ try /* */ { /* 106 */ if (enumerator.term() == null) { /* 107 */ return DocIdSet.EMPTY_DOCIDSET; } OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); /* 110 */ int[] docs = new int[32]; /* 111 */ int[] freqs = new int[32]; /* 112 */ TermDocs termDocs = reader.termDocs(); /* */ int termCount; /* */ try { termCount = 0; /* */ do { /* 116 */ Term term = enumerator.term(); /* 117 */ if (term == null) /* */ break; /* 119 */ termCount++; /* 120 */ termDocs.seek(term); /* */ while (true) { /* 122 */ int count = termDocs.read(docs, freqs); /* 123 */ if (count == 0) break; /* 124 */ for (int i = 0; i < count; i++) { /* 125 */ bitSet.set(docs[i]); /* */ } /* */ } /* */ /* */ } /* */ /* 131 */ while (enumerator.next()); /* */ /* 133 */ this.query.incTotalNumberOfTerms(termCount); /* */ } finally /* */ { /* 136 */ termDocs.close(); /* */ } /* 138 */ return bitSet; /* */ } finally { /* 140 */ enumerator.close(); /* */ } /* */ }
@Override public DocIdSet getDocIdSet(IndexReader subReader) throws IOException { if (bases == null || !bases.containsKey(subReader)) { return docs; } int docBase = bases.get(subReader); int readerSize = subReader.maxDoc(); OpenBitSet filter = new OpenBitSet(readerSize); DocIdSetIterator iterator = docs.iterator(); int doc = iterator.advance(docBase); while (doc < docBase + readerSize) { filter.set(doc - docBase); doc = iterator.nextDoc(); } return filter; }
private void processCachedFeatures( GroupCacheAccess jcs, Set<FeatureId> matches, Multimap<FeatureId, Integer> docIndexLookup, OpenBitSet bits) { for (java.util.Iterator<FeatureId> iter = matches.iterator(); iter.hasNext(); ) { FeatureId id = iter.next(); Geometry geom = (Geometry) jcs.get(id.getID()); if (geom != null) { iter.remove(); final SimpleFeatureBuilder simpleFeatureBuilder = new SimpleFeatureBuilder(this.sourceAccessor.one().getSchema()); simpleFeatureBuilder.set( this.sourceAccessor.one().getSchema().getGeometryDescriptor().getName(), geom); final SimpleFeature simpleFeature = simpleFeatureBuilder.buildFeature(id.getID()); if (evaluateFeature(simpleFeature)) { for (int doc : docIndexLookup.get(id)) { bits.set(doc); } } } } }
private void loadTerms() throws IOException { PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false); final Builder<PairOutputs.Pair<Long, PairOutputs.Pair<Long, Long>>> b; final PairOutputs<Long, Long> outputsInner = new PairOutputs<Long, Long>(posIntOutputs, posIntOutputs); final PairOutputs<Long, PairOutputs.Pair<Long, Long>> outputs = new PairOutputs<Long, PairOutputs.Pair<Long, Long>>(posIntOutputs, outputsInner); b = new Builder<PairOutputs.Pair<Long, PairOutputs.Pair<Long, Long>>>( FST.INPUT_TYPE.BYTE1, outputs); IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone(); in.seek(termsStart); final BytesRef lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; OpenBitSet visitedDocs = new OpenBitSet(); final IntsRef scratchIntsRef = new IntsRef(); while (true) { SimpleTextUtil.readLine(in, scratch); if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) { if (lastDocsStart != -1) { b.add( Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair( lastDocsStart, outputsInner.newPair((long) docFreq, totalTermFreq))); sumTotalTermFreq += totalTermFreq; } break; } else if (StringHelper.startsWith(scratch, DOC)) { docFreq++; sumDocFreq++; UnicodeUtil.UTF8toUTF16( scratch.bytes, scratch.offset + DOC.length, scratch.length - DOC.length, scratchUTF16); int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length); visitedDocs.set(docID); } else if (StringHelper.startsWith(scratch, FREQ)) { UnicodeUtil.UTF8toUTF16( scratch.bytes, scratch.offset + FREQ.length, scratch.length - FREQ.length, scratchUTF16); totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length); } else if (StringHelper.startsWith(scratch, TERM)) { if (lastDocsStart != -1) { b.add( Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair( lastDocsStart, outputsInner.newPair((long) docFreq, totalTermFreq))); } lastDocsStart = in.getFilePointer(); final int len = scratch.length - TERM.length; if (len > lastTerm.length) { lastTerm.grow(len); } System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len); lastTerm.length = len; docFreq = 0; sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; termCount++; } } docCount = (int) visitedDocs.cardinality(); fst = b.finish(); /* PrintStream ps = new PrintStream("out.dot"); fst.toDot(ps); ps.close(); System.out.println("SAVED out.dot"); */ // System.out.println("FST " + fst.sizeInBytes()); }
protected void fillFloatValues(FloatValues vals, IndexReader reader, String field) throws IOException { if (parser == null) { try { parser = FieldCache.DEFAULT_FLOAT_PARSER; fillFloatValues(vals, reader, field); return; } catch (NumberFormatException ne) { vals.parserHashCode = null; // wipe the previous one parser = FieldCache.NUMERIC_UTILS_FLOAT_PARSER; fillFloatValues(vals, reader, field); return; } } setParserAndResetCounts(vals, parser); Terms terms = MultiFields.getTerms(reader, field); int maxDoc = reader.maxDoc(); vals.values = null; if (terms != null) { final TermsEnum termsEnum = terms.iterator(); OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet(maxDoc) : null; DocsEnum docs = null; try { while (true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final float termval = parser.parseFloat(term); docs = termsEnum.docs(null, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (vals.values == null) { vals.values = new float[maxDoc]; } vals.values[docID] = termval; vals.numDocs++; if (validBits != null) { validBits.set(docID); } } vals.numTerms++; } } catch (FieldCache.StopFillCacheException stop) { } if (vals.valid == null) { vals.valid = checkMatchAllBits(validBits, vals.numDocs, maxDoc); } } if (vals.values == null) { vals.values = new float[maxDoc]; } if (vals.valid == null && vals.numDocs < 1) { vals.valid = new Bits.MatchNoBits(maxDoc); } }
private OpenBitSet createObs(int nums[], int maxDoc) { OpenBitSet bitSet = new OpenBitSet(maxDoc); for (int num : nums) bitSet.set(num); return bitSet; }
private OpenBitSet createObs(ArrayList<Integer> nums, int maxDoc) { OpenBitSet bitSet = new OpenBitSet(maxDoc); for (int num : nums) bitSet.set(num); return bitSet; }