/**
   * Returns Doc Ids by searching the index for document having the correct spatial hash cell id at
   * given grid level
   *
   * @param reader reader to the index
   */
  @Override
  public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
    if (spatialHashCellsIds.size() == 0) {
      return null;
    }

    final AtomicReader atomicReader = context.reader();

    OpenBitSet matchedDocumentsIds = new OpenBitSet(atomicReader.maxDoc());
    Boolean found = false;
    for (int i = 0; i < spatialHashCellsIds.size(); i++) {
      Term spatialHashCellTerm = new Term(fieldName, spatialHashCellsIds.get(i));
      DocsEnum spatialHashCellsDocs = atomicReader.termDocsEnum(spatialHashCellTerm);
      if (spatialHashCellsDocs != null) {
        while (true) {
          final int docId = spatialHashCellsDocs.nextDoc();
          if (docId == DocIdSetIterator.NO_MORE_DOCS) {
            break;
          } else {
            if (acceptDocs == null || acceptDocs.get(docId)) {
              matchedDocumentsIds.fastSet(docId);
              found = true;
            }
          }
        }
      }
    }

    if (found) {
      return matchedDocumentsIds;
    } else {
      return null;
    }
  }
Ejemplo n.º 2
0
 @Override
 public void collect(int doc) throws IOException {
   BytesWrap parentId = typeCache.parentIdByDoc(doc);
   if (parentId == null) {
     return;
   }
   for (Tuple<IndexReader, IdReaderTypeCache> tuple : readers) {
     IndexReader indexReader = tuple.v1();
     IdReaderTypeCache idReaderTypeCache = tuple.v2();
     if (idReaderTypeCache
         == null) { // might be if we don't have that doc with that type in this reader
       continue;
     }
     int parentDocId = idReaderTypeCache.docById(parentId);
     if (parentDocId != -1 && !indexReader.isDeleted(parentDocId)) {
       OpenBitSet docIdSet = parentDocs().get(indexReader.getCoreCacheKey());
       if (docIdSet == null) {
         docIdSet = new OpenBitSet(indexReader.maxDoc());
         parentDocs.put(indexReader.getCoreCacheKey(), docIdSet);
       }
       docIdSet.fastSet(parentDocId);
       return;
     }
   }
 }
Ejemplo n.º 3
0
 public OpenBitSet getRandomSet(int sz, int bitsToSet) {
   OpenBitSet bs = new OpenBitSet(sz);
   if (sz == 0) return bs;
   for (int i = 0; i < bitsToSet; i++) {
     bs.fastSet(rand.nextInt(sz));
   }
   return bs;
 }
Ejemplo n.º 4
0
 // convert an openBitSet to an array list
 private ArrayList<Integer> bitSetToArrayList(OpenBitSet bs) {
   ArrayList<Integer> listRes = new ArrayList<Integer>();
   for (int i = 0; i < bs.capacity(); i++) {
     if (bs.get(i)) {
       listRes.add(i);
     }
   }
   return listRes;
 }
Ejemplo n.º 5
0
  @Test
  public void _testAndIntersections() throws Exception {
    System.out.println("Running test case: intersections, PForDeltaAndDocIdSet.nextDoc() ...");

    ArrayList<OpenBitSet> obs = new ArrayList<OpenBitSet>();
    ArrayList<DocIdSet> docs = new ArrayList<DocIdSet>();
    ArrayList<Integer> expectedIntersectionResult = new ArrayList<Integer>();

    int maxDoc = 5000;
    int numDoc1 = 1000;
    int numDoc2 = 2000;
    int numDoc3 = 4000;
    int[] originalInput = null;
    int[] input1 = null;
    int[] input2 = null;
    int[] input3 = null;
    originalInput = new int[maxDoc];
    for (int i = 0; i < maxDoc; ++i) {
      originalInput[i] = i;
    }

    // generate random numbers and add them into PForDeltaDocIdSets
    input1 = generateRandomDataNew(originalInput, maxDoc, numDoc1);
    loadRandomDataSets(input1, obs, docs, numDoc1);
    input2 = generateRandomDataNew(originalInput, maxDoc, numDoc2);
    loadRandomDataSets(input2, obs, docs, numDoc2);
    input3 = generateRandomDataNew(originalInput, maxDoc, numDoc3);
    loadRandomDataSets(input3, obs, docs, numDoc3);

    // get the expected result
    OpenBitSet base = obs.get(0);
    for (int i = 1; i < obs.size(); ++i) {
      base.intersect(obs.get(i));
    }
    for (int k = 0; k < base.size(); ++k) {
      if (base.get(k)) expectedIntersectionResult.add(k);
    }

    // get the results from PForDeltaAndDocIdSet
    ArrayList<Integer> intersectionResult = new ArrayList<Integer>();
    AndDocIdSet ands = new AndDocIdSet(docs);
    DocIdSetIterator iter = ands.iterator();
    int docId = iter.nextDoc();
    while (docId != DocIdSetIterator.NO_MORE_DOCS) {
      intersectionResult.add(docId);
      docId = iter.nextDoc();
    }

    if (!compareTwoLists(intersectionResult, expectedIntersectionResult)) {
      System.out.println("The result for the new version does not match the expectation");
    }
    System.out.println("----------------completed---------------------------");
  }
Ejemplo n.º 6
0
 //  print a openBitSet object
 private ArrayList<Integer> printBitSet(OpenBitSet bs) {
   ArrayList<Integer> listRes = new ArrayList<Integer>();
   System.out.print("bitSet(" + bs.capacity() + ") [");
   for (int i = 0; i < bs.capacity(); i++) {
     if (bs.get(i)) {
       listRes.add(i);
       System.out.print(i);
       System.out.print(" ");
     }
   }
   System.out.println("]");
   return listRes;
 }
Ejemplo n.º 7
0
  @Override
  public RandomAccessDocIdSet getRandomAccessDocIdSet(BoboIndexReader reader) throws IOException {
    final MultiValueFacetDataCache dataCache =
        (MultiValueFacetDataCache) _facetHandler.getFacetData(reader);
    final int[] index = _valueConverter.convert(dataCache, _vals);
    final BigNestedIntArray nestedArray = dataCache._nestedArray;
    final OpenBitSet bitset = new OpenBitSet(dataCache.valArray.size());

    for (int i : index) {
      bitset.fastSet(i);
    }

    if (_takeCompliment) {
      // flip the bits
      int size = dataCache.valArray.size();
      for (int i = 0; i < size; ++i) {
        bitset.fastFlip(i);
      }
    }

    long count = bitset.cardinality();

    if (count == 0) {
      final DocIdSet empty = EmptyDocIdSet.getInstance();
      return new RandomAccessDocIdSet() {
        @Override
        public boolean get(int docId) {
          return false;
        }

        @Override
        public DocIdSetIterator iterator() throws IOException {
          return empty.iterator();
        }
      };
    } else {
      return new RandomAccessDocIdSet() {
        @Override
        public DocIdSetIterator iterator() {
          return new MultiValueOrFacetDocIdSetIterator(dataCache, bitset);
        }

        @Override
        public final boolean get(int docId) {
          return nestedArray.contains(docId, bitset);
        }
      };
    }
  }
Ejemplo n.º 8
0
 public boolean isPresent(ByteBuffer key) {
   for (long bucketIndex : getHashBuckets(key)) {
     if (!bitset.fastGet(bucketIndex)) {
       return false;
     }
   }
   return true;
 }
Ejemplo n.º 9
0
 public DocSet getIntDocSet(OpenBitSet bs) {
   int[] docs = new int[(int) bs.cardinality()];
   OpenBitSetIterator iter = new OpenBitSetIterator(bs);
   for (int i = 0; i < docs.length; i++) {
     docs[i] = iter.nextDoc();
   }
   return new SortedIntDocSet(docs);
 }
Ejemplo n.º 10
0
 @Override
 public boolean isPresent(byte[] key, int offset, int length) {
   for (long bucketIndex : getHashBuckets(key, offset, length)) {
     if (!bitset.fastGet(bucketIndex)) {
       return false;
     }
   }
   return true;
 }
Ejemplo n.º 11
0
 long emptyBuckets() {
   long n = 0;
   for (long i = 0; i < buckets(); i++) {
     if (!bitset.get(i)) {
       n++;
     }
   }
   return n;
 }
Ejemplo n.º 12
0
 @Override
 public double getFacetSelectivity(BoboSegmentReader reader) {
   FacetDataCache<?> dataCache = facetDataCacheBuilder.build(reader);
   final OpenBitSet openBitSet = getBitSet(dataCache);
   int[] frequencies = dataCache.freqs;
   double selectivity = 0;
   int accumFreq = 0;
   int index = openBitSet.nextSetBit(0);
   while (index >= 0) {
     accumFreq += frequencies[index];
     index = openBitSet.nextSetBit(index + 1);
   }
   int total = reader.maxDoc();
   selectivity = (double) accumFreq / (double) total;
   if (selectivity > 0.999) {
     selectivity = 1.0;
   }
   return selectivity;
 }
Ejemplo n.º 13
0
  public <V extends ConceptComponent<?, ?>.Version> List<V> locateLatest(
      List<V> parts, ViewCoordinate vc) throws IOException {
    V latest = null;
    OpenBitSet resultsPartSet = new OpenBitSet(parts.size());
    for (PositionBI pos : vc.getPositionSet()) {
      RelativePositionComputerBI mapper = RelativePositionComputer.getComputer(pos);
      OpenBitSet iteratorPartSet = new OpenBitSet(parts.size());
      for (int i = 0; i < parts.size(); i++) {
        V part = parts.get(i);
        if (mapper.onRoute(part)) {
          if (latest == null) {
            latest = part;
            iteratorPartSet.set(i);
          } else {
            switch (mapper.relativePosition(latest, part)) {
              case BEFORE:
                // nothing to do
                break;

              case CONTRADICTION:
                iteratorPartSet.set(i);
                break;

              case AFTER:
                latest = part;
                iteratorPartSet.clear(0, Integer.MAX_VALUE);
                iteratorPartSet.set(i);
                break;

              default:
                break;
            }
          }
        }
      }
      resultsPartSet.or(iteratorPartSet);
    }
    List<V> resultsList = new ArrayList<>((int) resultsPartSet.cardinality());
    DocIdSetIterator resultsItr = resultsPartSet.iterator();
    int id = resultsItr.nextDoc();
    while (id != DocIdSetIterator.NO_MORE_DOCS) {
      resultsList.add(parts.get(id));
      id = resultsItr.nextDoc();
    }
    return resultsList;
  }
 /*     */ public DocIdSet getDocIdSet(IndexReader reader) /*     */ throws IOException /*     */ {
   /* 103 */ TermEnum enumerator = this.query.getEnum(reader);
   /*     */ try
   /*     */ {
     /* 106 */ if (enumerator.term() == null) {
       /* 107 */ return DocIdSet.EMPTY_DOCIDSET;
     }
     OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
     /* 110 */ int[] docs = new int[32];
     /* 111 */ int[] freqs = new int[32];
     /* 112 */ TermDocs termDocs = reader.termDocs();
     /*     */ int termCount;
     /*     */ try {
       termCount = 0;
       /*     */ do {
         /* 116 */ Term term = enumerator.term();
         /* 117 */ if (term == null) /*     */ break;
         /* 119 */ termCount++;
         /* 120 */ termDocs.seek(term);
         /*     */ while (true) {
           /* 122 */ int count = termDocs.read(docs, freqs);
           /* 123 */ if (count == 0) break;
           /* 124 */ for (int i = 0; i < count; i++) {
             /* 125 */ bitSet.set(docs[i]);
             /*     */ }
           /*     */ }
         /*     */
         /*     */ }
       /*     */
       /* 131 */ while (enumerator.next());
       /*     */
       /* 133 */ this.query.incTotalNumberOfTerms(termCount);
       /*     */ } finally
     /*     */ {
       /* 136 */ termDocs.close();
       /*     */ }
     /* 138 */ return bitSet;
     /*     */ } finally {
     /* 140 */ enumerator.close();
     /*     */ }
   /*     */ }
Ejemplo n.º 15
0
  @Override
  public DocIdSet getDocIdSet(IndexReader subReader) throws IOException {

    if (bases == null || !bases.containsKey(subReader)) {
      return docs;
    }

    int docBase = bases.get(subReader);

    int readerSize = subReader.maxDoc();
    OpenBitSet filter = new OpenBitSet(readerSize);

    DocIdSetIterator iterator = docs.iterator();
    int doc = iterator.advance(docBase);

    while (doc < docBase + readerSize) {
      filter.set(doc - docBase);
      doc = iterator.nextDoc();
    }
    return filter;
  }
  private OpenBitSet applySpatialFilter(
      Set<FeatureId> matches, Multimap<FeatureId, Integer> docIndexLookup, OpenBitSet bits)
      throws IOException {

    JeevesJCS jcs = getJCSCache();
    processCachedFeatures(jcs, matches, docIndexLookup, bits);

    while (!matches.isEmpty()) {
      Id fidFilter;
      if (matches.size() > MAX_FIDS_PER_QUERY) {
        FeatureId[] subset = new FeatureId[MAX_FIDS_PER_QUERY];
        int i = 0;
        Iterator<FeatureId> iter = matches.iterator();
        while (iter.hasNext() && i < MAX_FIDS_PER_QUERY) {
          subset[i] = iter.next();
          iter.remove();
          i++;
        }
        fidFilter = _filterFactory.id(subset);
      } else {
        fidFilter = _filterFactory.id(matches);
        matches = Collections.emptySet();
      }

      FeatureSource<SimpleFeatureType, SimpleFeature> _featureSource = sourceAccessor.one();
      String ftn = _featureSource.getSchema().getName().getLocalPart();
      String[] geomAtt = {_featureSource.getSchema().getGeometryDescriptor().getLocalName()};
      FeatureCollection<SimpleFeatureType, SimpleFeature> features =
          _featureSource.getFeatures(new org.geotools.data.Query(ftn, fidFilter, geomAtt));
      FeatureIterator<SimpleFeature> iterator = features.features();

      try {
        while (iterator.hasNext()) {
          SimpleFeature feature = iterator.next();
          FeatureId featureId = feature.getIdentifier();
          jcs.put(featureId.getID(), feature.getDefaultGeometry());
          if (evaluateFeature(feature)) {
            for (int doc : docIndexLookup.get(featureId)) {
              bits.set(doc);
            }
          }
        }
      } catch (CacheException e) {
        throw new Error(e);
      } finally {
        iterator.close();
      }
    }
    return bits;
  }
  @SuppressWarnings({"unchecked"})
  @Override
  public T[] call() throws Exception {
    OpenBitSetDISI accumulator = new OpenBitSetDISI(finalBitsetSize);

    OpenBitSetDISI toCompareDisi = new OpenBitSetDISI(finalBitsetSize);
    toCompareDisi.inPlaceOr(toCompare.iterator());

    Object[] result = new Object[toIndex - fromIndex];
    for (int i = fromIndex; i < toIndex; i++) {
      result[i - fromIndex] = operation.compute(accumulator, bs[i], toCompareDisi);
    }

    return ArrayUtils.typedArray(result);
  }
Ejemplo n.º 18
0
  public static SolrCachingAuxDocScorer createAuxDocScorer(
      SolrIndexSearcher searcher, Similarity similarity, Query query, SolrIndexReader reader)
      throws IOException {
    // Get hold of solr top level searcher
    // Execute query with caching
    // translate reults to leaf docs
    // build ordered doc list

    DocSet auxDocSet = searcher.getDocSet(query);

    CacheEntry[] indexedByDocId =
        (CacheEntry[])
            searcher.cacheLookup(
                AlfrescoSolrEventListener.ALFRESCO_CACHE,
                AlfrescoSolrEventListener.KEY_DBID_LEAF_PATH_BY_DOC_ID);

    // List<ScoreDoc> auxDocs = pathCollector.getDocs();
    OpenBitSet translated = new OpenBitSet();

    if (auxDocSet instanceof BitDocSet) {
      BitDocSet source = (BitDocSet) auxDocSet;
      OpenBitSet openBitSet = source.getBits();
      int current = -1;
      while ((current = openBitSet.nextSetBit(current + 1)) != -1) {
        CacheEntry entry = indexedByDocId[current];
        translated.set(entry.getLeaf());
      }
    } else {
      for (DocIterator it = auxDocSet.iterator(); it.hasNext(); /* */ ) {
        CacheEntry entry = indexedByDocId[it.nextDoc()];
        translated.set(entry.getLeaf());
      }
    }

    return new SolrCachingAuxDocScorer(similarity, new BitDocSet(translated), reader);
  }
Ejemplo n.º 19
0
  @Override
  public RandomAccessDocIdSet getRandomAccessDocIdSet(final BoboSegmentReader reader)
      throws IOException {
    final FacetDataCache<?> dataCache = facetDataCacheBuilder.build(reader);
    final OpenBitSet openBitSet = getBitSet(dataCache);
    long count = openBitSet.cardinality();
    if (count == 0) {
      return EmptyDocIdSet.getInstance();
    } else {
      final boolean multi = dataCache instanceof MultiValueFacetDataCache;
      final MultiValueFacetDataCache<?> multiCache =
          multi ? (MultiValueFacetDataCache<?>) dataCache : null;

      return new RandomAccessDocIdSet() {
        @Override
        public DocIdSetIterator iterator() {

          if (multi) {
            return new MultiValueORFacetFilter.MultiValueOrFacetDocIdSetIterator(
                multiCache, openBitSet);
          } else {
            return new FacetOrFilter.FacetOrDocIdSetIterator(dataCache, openBitSet);
          }
        }

        @Override
        public boolean get(int docId) {
          if (multi) {
            return multiCache._nestedArray.contains(docId, openBitSet);
          } else {
            return openBitSet.fastGet(dataCache.orderArray.get(docId));
          }
        }
      };
    }
  }
Ejemplo n.º 20
0
  public DocSet getRandomDocSet(int n, int maxDoc) {
    OpenBitSet obs = new OpenBitSet(maxDoc);
    int[] a = new int[n];
    for (int i = 0; i < n; i++) {
      for (; ; ) {
        int idx = rand.nextInt(maxDoc);
        if (obs.getAndSet(idx)) continue;
        a[i] = idx;
        break;
      }
    }

    if (n <= smallSetCuttoff) {
      if (smallSetType == 0) {
        Arrays.sort(a);
        return new SortedIntDocSet(a);
      } else if (smallSetType == 1) {
        Arrays.sort(a);
        return loadfactor != 0 ? new HashDocSet(a, 0, n, 1 / loadfactor) : new HashDocSet(a, 0, n);
      }
    }

    return new BitDocSet(obs, n);
  }
 @Override
 public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
   assert state == TermsConsumerState.INITIAL
       || state == TermsConsumerState.START && lastPostingsConsumer.docFreq == 0;
   state = TermsConsumerState.FINISHED;
   assert docCount >= 0;
   assert docCount == visitedDocs.cardinality();
   assert sumDocFreq >= docCount;
   assert sumDocFreq == this.sumDocFreq;
   if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
     assert sumTotalTermFreq == -1;
   } else {
     assert sumTotalTermFreq >= sumDocFreq;
     assert sumTotalTermFreq == this.sumTotalTermFreq;
   }
   in.finish(sumTotalTermFreq, sumDocFreq, docCount);
 }
Ejemplo n.º 22
0
  public DocSet getDocSlice(OpenBitSet bs) {
    int len = (int) bs.cardinality();
    int[] arr = new int[len + 5];
    arr[0] = 10;
    arr[1] = 20;
    arr[2] = 30;
    arr[arr.length - 1] = 1;
    arr[arr.length - 2] = 2;
    int offset = 3;
    int end = offset + len;

    OpenBitSetIterator iter = new OpenBitSetIterator(bs);
    // put in opposite order... DocLists are not ordered.
    for (int i = end - 1; i >= offset; i--) {
      arr[i] = iter.nextDoc();
    }

    return new DocSlice(offset, len, arr, null, len * 2, 100.0f);
  }
 @Override
 public void startDoc(int docID, int freq) throws IOException {
   assert state == PostingsConsumerState.INITIAL;
   state = PostingsConsumerState.START;
   assert docID >= 0;
   if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
     assert freq == -1;
     this.freq = 0; // we don't expect any positions here
   } else {
     assert freq > 0;
     this.freq = freq;
     totalTermFreq += freq;
   }
   this.positionCount = 0;
   this.lastPosition = 0;
   this.lastStartOffset = 0;
   docFreq++;
   visitedDocs.set(docID);
   in.startDoc(docID, freq);
 }
 private void processCachedFeatures(
     GroupCacheAccess jcs,
     Set<FeatureId> matches,
     Multimap<FeatureId, Integer> docIndexLookup,
     OpenBitSet bits) {
   for (java.util.Iterator<FeatureId> iter = matches.iterator(); iter.hasNext(); ) {
     FeatureId id = iter.next();
     Geometry geom = (Geometry) jcs.get(id.getID());
     if (geom != null) {
       iter.remove();
       final SimpleFeatureBuilder simpleFeatureBuilder =
           new SimpleFeatureBuilder(this.sourceAccessor.one().getSchema());
       simpleFeatureBuilder.set(
           this.sourceAccessor.one().getSchema().getGeometryDescriptor().getName(), geom);
       final SimpleFeature simpleFeature = simpleFeatureBuilder.buildFeature(id.getID());
       if (evaluateFeature(simpleFeature)) {
         for (int doc : docIndexLookup.get(id)) {
           bits.set(doc);
         }
       }
     }
   }
 }
Ejemplo n.º 25
0
 private void loadTerms() throws IOException {
   PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
   final Builder<PairOutputs.Pair<Long, PairOutputs.Pair<Long, Long>>> b;
   final PairOutputs<Long, Long> outputsInner =
       new PairOutputs<Long, Long>(posIntOutputs, posIntOutputs);
   final PairOutputs<Long, PairOutputs.Pair<Long, Long>> outputs =
       new PairOutputs<Long, PairOutputs.Pair<Long, Long>>(posIntOutputs, outputsInner);
   b =
       new Builder<PairOutputs.Pair<Long, PairOutputs.Pair<Long, Long>>>(
           FST.INPUT_TYPE.BYTE1, outputs);
   IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
   in.seek(termsStart);
   final BytesRef lastTerm = new BytesRef(10);
   long lastDocsStart = -1;
   int docFreq = 0;
   long totalTermFreq = 0;
   OpenBitSet visitedDocs = new OpenBitSet();
   final IntsRef scratchIntsRef = new IntsRef();
   while (true) {
     SimpleTextUtil.readLine(in, scratch);
     if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
       if (lastDocsStart != -1) {
         b.add(
             Util.toIntsRef(lastTerm, scratchIntsRef),
             outputs.newPair(
                 lastDocsStart, outputsInner.newPair((long) docFreq, totalTermFreq)));
         sumTotalTermFreq += totalTermFreq;
       }
       break;
     } else if (StringHelper.startsWith(scratch, DOC)) {
       docFreq++;
       sumDocFreq++;
       UnicodeUtil.UTF8toUTF16(
           scratch.bytes,
           scratch.offset + DOC.length,
           scratch.length - DOC.length,
           scratchUTF16);
       int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
       visitedDocs.set(docID);
     } else if (StringHelper.startsWith(scratch, FREQ)) {
       UnicodeUtil.UTF8toUTF16(
           scratch.bytes,
           scratch.offset + FREQ.length,
           scratch.length - FREQ.length,
           scratchUTF16);
       totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
     } else if (StringHelper.startsWith(scratch, TERM)) {
       if (lastDocsStart != -1) {
         b.add(
             Util.toIntsRef(lastTerm, scratchIntsRef),
             outputs.newPair(
                 lastDocsStart, outputsInner.newPair((long) docFreq, totalTermFreq)));
       }
       lastDocsStart = in.getFilePointer();
       final int len = scratch.length - TERM.length;
       if (len > lastTerm.length) {
         lastTerm.grow(len);
       }
       System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
       lastTerm.length = len;
       docFreq = 0;
       sumTotalTermFreq += totalTermFreq;
       totalTermFreq = 0;
       termCount++;
     }
   }
   docCount = (int) visitedDocs.cardinality();
   fst = b.finish();
   /*
   PrintStream ps = new PrintStream("out.dot");
   fst.toDot(ps);
   ps.close();
   System.out.println("SAVED out.dot");
   */
   // System.out.println("FST " + fst.sizeInBytes());
 }
Ejemplo n.º 26
0
 @Override
 public void add(byte[] key, int offset, int length) {
   for (long bucketIndex : getHashBuckets(key, offset, length)) {
     bitset.fastSet(bucketIndex);
   }
 }
Ejemplo n.º 27
0
  protected void doSingle(int maxSize) {
    int sz = rand.nextInt(maxSize + 1);
    int sz2 = rand.nextInt(maxSize);
    OpenBitSet bs1 = getRandomSet(sz, rand.nextInt(sz + 1));
    OpenBitSet bs2 = getRandomSet(sz, rand.nextInt(sz2 + 1));

    DocSet a1 = new BitDocSet(bs1);
    DocSet a2 = new BitDocSet(bs2);
    DocSet b1 = getDocSet(bs1);
    DocSet b2 = getDocSet(bs2);

    checkEqual(bs1, b1);
    checkEqual(bs2, b2);

    iter(a1, b1);
    iter(a2, b2);

    OpenBitSet a_and = (OpenBitSet) bs1.clone();
    a_and.and(bs2);
    OpenBitSet a_or = (OpenBitSet) bs1.clone();
    a_or.or(bs2);
    // OpenBitSet a_xor = (OpenBitSet)bs1.clone(); a_xor.xor(bs2);
    OpenBitSet a_andn = (OpenBitSet) bs1.clone();
    a_andn.andNot(bs2);

    checkEqual(a_and, b1.intersection(b2));
    checkEqual(a_or, b1.union(b2));
    checkEqual(a_andn, b1.andNot(b2));

    assertEquals(a_and.cardinality(), b1.intersectionSize(b2));
    assertEquals(a_or.cardinality(), b1.unionSize(b2));
    assertEquals(a_andn.cardinality(), b1.andNotSize(b2));
  }
Ejemplo n.º 28
0
 public void checkEqual(OpenBitSet bs, DocSet set) {
   for (int i = 0; i < bs.capacity(); i++) {
     assertEquals(bs.get(i), set.exists(i));
   }
   assertEquals(bs.cardinality(), set.size());
 }
Ejemplo n.º 29
0
 public void add(ByteBuffer key) {
   for (long bucketIndex : getHashBuckets(key)) {
     bitset.fastSet(bucketIndex);
   }
 }
Ejemplo n.º 30
0
 private List<ConceptAnnotation> filterSubsumedConcepts(
     String q, List<ConceptAnnotation> annotations) {
   OpenBitSet qset = new OpenBitSet(q.length());
   qset.set(0, qset.length());
   // sort the annotations, longest first
   Collections.sort(
       annotations,
       new Comparator<ConceptAnnotation>() {
         @Override
         public int compare(ConceptAnnotation ca1, ConceptAnnotation ca2) {
           Integer len1 = ca1.getEnd() - ca1.getBegin();
           Integer len2 = ca2.getEnd() - ca2.getBegin();
           return len2.compareTo(len1);
         }
       });
   List<ConceptAnnotation> filtered = new ArrayList<ConceptAnnotation>();
   long prevCardinality = qset.cardinality();
   for (ConceptAnnotation annotation : annotations) {
     OpenBitSet cset = new OpenBitSet(qset.length());
     cset.set(0, qset.length());
     cset.flip(annotation.getBegin(), annotation.getEnd());
     cset.intersect(qset);
     long cardinality = cset.cardinality();
     if (cardinality == prevCardinality) {
       // concept is subsumed, skip it
       continue;
     }
     filtered.add(annotation);
     prevCardinality = cardinality;
   }
   return filtered;
 }