private List<ConceptAnnotation> filterSubsumedConcepts(
     String q, List<ConceptAnnotation> annotations) {
   OpenBitSet qset = new OpenBitSet(q.length());
   qset.set(0, qset.length());
   // sort the annotations, longest first
   Collections.sort(
       annotations,
       new Comparator<ConceptAnnotation>() {
         @Override
         public int compare(ConceptAnnotation ca1, ConceptAnnotation ca2) {
           Integer len1 = ca1.getEnd() - ca1.getBegin();
           Integer len2 = ca2.getEnd() - ca2.getBegin();
           return len2.compareTo(len1);
         }
       });
   List<ConceptAnnotation> filtered = new ArrayList<ConceptAnnotation>();
   long prevCardinality = qset.cardinality();
   for (ConceptAnnotation annotation : annotations) {
     OpenBitSet cset = new OpenBitSet(qset.length());
     cset.set(0, qset.length());
     cset.flip(annotation.getBegin(), annotation.getEnd());
     cset.intersect(qset);
     long cardinality = cset.cardinality();
     if (cardinality == prevCardinality) {
       // concept is subsumed, skip it
       continue;
     }
     filtered.add(annotation);
     prevCardinality = cardinality;
   }
   return filtered;
 }
Пример #2
0
  public <V extends ConceptComponent<?, ?>.Version> List<V> locateLatest(
      List<V> parts, ViewCoordinate vc) throws IOException {
    V latest = null;
    OpenBitSet resultsPartSet = new OpenBitSet(parts.size());
    for (PositionBI pos : vc.getPositionSet()) {
      RelativePositionComputerBI mapper = RelativePositionComputer.getComputer(pos);
      OpenBitSet iteratorPartSet = new OpenBitSet(parts.size());
      for (int i = 0; i < parts.size(); i++) {
        V part = parts.get(i);
        if (mapper.onRoute(part)) {
          if (latest == null) {
            latest = part;
            iteratorPartSet.set(i);
          } else {
            switch (mapper.relativePosition(latest, part)) {
              case BEFORE:
                // nothing to do
                break;

              case CONTRADICTION:
                iteratorPartSet.set(i);
                break;

              case AFTER:
                latest = part;
                iteratorPartSet.clear(0, Integer.MAX_VALUE);
                iteratorPartSet.set(i);
                break;

              default:
                break;
            }
          }
        }
      }
      resultsPartSet.or(iteratorPartSet);
    }
    List<V> resultsList = new ArrayList<>((int) resultsPartSet.cardinality());
    DocIdSetIterator resultsItr = resultsPartSet.iterator();
    int id = resultsItr.nextDoc();
    while (id != DocIdSetIterator.NO_MORE_DOCS) {
      resultsList.add(parts.get(id));
      id = resultsItr.nextDoc();
    }
    return resultsList;
  }
  private OpenBitSet applySpatialFilter(
      Set<FeatureId> matches, Multimap<FeatureId, Integer> docIndexLookup, OpenBitSet bits)
      throws IOException {

    JeevesJCS jcs = getJCSCache();
    processCachedFeatures(jcs, matches, docIndexLookup, bits);

    while (!matches.isEmpty()) {
      Id fidFilter;
      if (matches.size() > MAX_FIDS_PER_QUERY) {
        FeatureId[] subset = new FeatureId[MAX_FIDS_PER_QUERY];
        int i = 0;
        Iterator<FeatureId> iter = matches.iterator();
        while (iter.hasNext() && i < MAX_FIDS_PER_QUERY) {
          subset[i] = iter.next();
          iter.remove();
          i++;
        }
        fidFilter = _filterFactory.id(subset);
      } else {
        fidFilter = _filterFactory.id(matches);
        matches = Collections.emptySet();
      }

      FeatureSource<SimpleFeatureType, SimpleFeature> _featureSource = sourceAccessor.one();
      String ftn = _featureSource.getSchema().getName().getLocalPart();
      String[] geomAtt = {_featureSource.getSchema().getGeometryDescriptor().getLocalName()};
      FeatureCollection<SimpleFeatureType, SimpleFeature> features =
          _featureSource.getFeatures(new org.geotools.data.Query(ftn, fidFilter, geomAtt));
      FeatureIterator<SimpleFeature> iterator = features.features();

      try {
        while (iterator.hasNext()) {
          SimpleFeature feature = iterator.next();
          FeatureId featureId = feature.getIdentifier();
          jcs.put(featureId.getID(), feature.getDefaultGeometry());
          if (evaluateFeature(feature)) {
            for (int doc : docIndexLookup.get(featureId)) {
              bits.set(doc);
            }
          }
        }
      } catch (CacheException e) {
        throw new Error(e);
      } finally {
        iterator.close();
      }
    }
    return bits;
  }
Пример #4
0
  public static SolrCachingAuxDocScorer createAuxDocScorer(
      SolrIndexSearcher searcher, Similarity similarity, Query query, SolrIndexReader reader)
      throws IOException {
    // Get hold of solr top level searcher
    // Execute query with caching
    // translate reults to leaf docs
    // build ordered doc list

    DocSet auxDocSet = searcher.getDocSet(query);

    CacheEntry[] indexedByDocId =
        (CacheEntry[])
            searcher.cacheLookup(
                AlfrescoSolrEventListener.ALFRESCO_CACHE,
                AlfrescoSolrEventListener.KEY_DBID_LEAF_PATH_BY_DOC_ID);

    // List<ScoreDoc> auxDocs = pathCollector.getDocs();
    OpenBitSet translated = new OpenBitSet();

    if (auxDocSet instanceof BitDocSet) {
      BitDocSet source = (BitDocSet) auxDocSet;
      OpenBitSet openBitSet = source.getBits();
      int current = -1;
      while ((current = openBitSet.nextSetBit(current + 1)) != -1) {
        CacheEntry entry = indexedByDocId[current];
        translated.set(entry.getLeaf());
      }
    } else {
      for (DocIterator it = auxDocSet.iterator(); it.hasNext(); /* */ ) {
        CacheEntry entry = indexedByDocId[it.nextDoc()];
        translated.set(entry.getLeaf());
      }
    }

    return new SolrCachingAuxDocScorer(similarity, new BitDocSet(translated), reader);
  }
 @Override
 public void startDoc(int docID, int freq) throws IOException {
   assert state == PostingsConsumerState.INITIAL;
   state = PostingsConsumerState.START;
   assert docID >= 0;
   if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
     assert freq == -1;
     this.freq = 0; // we don't expect any positions here
   } else {
     assert freq > 0;
     this.freq = freq;
     totalTermFreq += freq;
   }
   this.positionCount = 0;
   this.lastPosition = 0;
   this.lastStartOffset = 0;
   docFreq++;
   visitedDocs.set(docID);
   in.startDoc(docID, freq);
 }
 /*     */ public DocIdSet getDocIdSet(IndexReader reader) /*     */ throws IOException /*     */ {
   /* 103 */ TermEnum enumerator = this.query.getEnum(reader);
   /*     */ try
   /*     */ {
     /* 106 */ if (enumerator.term() == null) {
       /* 107 */ return DocIdSet.EMPTY_DOCIDSET;
     }
     OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
     /* 110 */ int[] docs = new int[32];
     /* 111 */ int[] freqs = new int[32];
     /* 112 */ TermDocs termDocs = reader.termDocs();
     /*     */ int termCount;
     /*     */ try {
       termCount = 0;
       /*     */ do {
         /* 116 */ Term term = enumerator.term();
         /* 117 */ if (term == null) /*     */ break;
         /* 119 */ termCount++;
         /* 120 */ termDocs.seek(term);
         /*     */ while (true) {
           /* 122 */ int count = termDocs.read(docs, freqs);
           /* 123 */ if (count == 0) break;
           /* 124 */ for (int i = 0; i < count; i++) {
             /* 125 */ bitSet.set(docs[i]);
             /*     */ }
           /*     */ }
         /*     */
         /*     */ }
       /*     */
       /* 131 */ while (enumerator.next());
       /*     */
       /* 133 */ this.query.incTotalNumberOfTerms(termCount);
       /*     */ } finally
     /*     */ {
       /* 136 */ termDocs.close();
       /*     */ }
     /* 138 */ return bitSet;
     /*     */ } finally {
     /* 140 */ enumerator.close();
     /*     */ }
   /*     */ }
Пример #7
0
  @Override
  public DocIdSet getDocIdSet(IndexReader subReader) throws IOException {

    if (bases == null || !bases.containsKey(subReader)) {
      return docs;
    }

    int docBase = bases.get(subReader);

    int readerSize = subReader.maxDoc();
    OpenBitSet filter = new OpenBitSet(readerSize);

    DocIdSetIterator iterator = docs.iterator();
    int doc = iterator.advance(docBase);

    while (doc < docBase + readerSize) {
      filter.set(doc - docBase);
      doc = iterator.nextDoc();
    }
    return filter;
  }
 private void processCachedFeatures(
     GroupCacheAccess jcs,
     Set<FeatureId> matches,
     Multimap<FeatureId, Integer> docIndexLookup,
     OpenBitSet bits) {
   for (java.util.Iterator<FeatureId> iter = matches.iterator(); iter.hasNext(); ) {
     FeatureId id = iter.next();
     Geometry geom = (Geometry) jcs.get(id.getID());
     if (geom != null) {
       iter.remove();
       final SimpleFeatureBuilder simpleFeatureBuilder =
           new SimpleFeatureBuilder(this.sourceAccessor.one().getSchema());
       simpleFeatureBuilder.set(
           this.sourceAccessor.one().getSchema().getGeometryDescriptor().getName(), geom);
       final SimpleFeature simpleFeature = simpleFeatureBuilder.buildFeature(id.getID());
       if (evaluateFeature(simpleFeature)) {
         for (int doc : docIndexLookup.get(id)) {
           bits.set(doc);
         }
       }
     }
   }
 }
Пример #9
0
 private void loadTerms() throws IOException {
   PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
   final Builder<PairOutputs.Pair<Long, PairOutputs.Pair<Long, Long>>> b;
   final PairOutputs<Long, Long> outputsInner =
       new PairOutputs<Long, Long>(posIntOutputs, posIntOutputs);
   final PairOutputs<Long, PairOutputs.Pair<Long, Long>> outputs =
       new PairOutputs<Long, PairOutputs.Pair<Long, Long>>(posIntOutputs, outputsInner);
   b =
       new Builder<PairOutputs.Pair<Long, PairOutputs.Pair<Long, Long>>>(
           FST.INPUT_TYPE.BYTE1, outputs);
   IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
   in.seek(termsStart);
   final BytesRef lastTerm = new BytesRef(10);
   long lastDocsStart = -1;
   int docFreq = 0;
   long totalTermFreq = 0;
   OpenBitSet visitedDocs = new OpenBitSet();
   final IntsRef scratchIntsRef = new IntsRef();
   while (true) {
     SimpleTextUtil.readLine(in, scratch);
     if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
       if (lastDocsStart != -1) {
         b.add(
             Util.toIntsRef(lastTerm, scratchIntsRef),
             outputs.newPair(
                 lastDocsStart, outputsInner.newPair((long) docFreq, totalTermFreq)));
         sumTotalTermFreq += totalTermFreq;
       }
       break;
     } else if (StringHelper.startsWith(scratch, DOC)) {
       docFreq++;
       sumDocFreq++;
       UnicodeUtil.UTF8toUTF16(
           scratch.bytes,
           scratch.offset + DOC.length,
           scratch.length - DOC.length,
           scratchUTF16);
       int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
       visitedDocs.set(docID);
     } else if (StringHelper.startsWith(scratch, FREQ)) {
       UnicodeUtil.UTF8toUTF16(
           scratch.bytes,
           scratch.offset + FREQ.length,
           scratch.length - FREQ.length,
           scratchUTF16);
       totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
     } else if (StringHelper.startsWith(scratch, TERM)) {
       if (lastDocsStart != -1) {
         b.add(
             Util.toIntsRef(lastTerm, scratchIntsRef),
             outputs.newPair(
                 lastDocsStart, outputsInner.newPair((long) docFreq, totalTermFreq)));
       }
       lastDocsStart = in.getFilePointer();
       final int len = scratch.length - TERM.length;
       if (len > lastTerm.length) {
         lastTerm.grow(len);
       }
       System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
       lastTerm.length = len;
       docFreq = 0;
       sumTotalTermFreq += totalTermFreq;
       totalTermFreq = 0;
       termCount++;
     }
   }
   docCount = (int) visitedDocs.cardinality();
   fst = b.finish();
   /*
   PrintStream ps = new PrintStream("out.dot");
   fst.toDot(ps);
   ps.close();
   System.out.println("SAVED out.dot");
   */
   // System.out.println("FST " + fst.sizeInBytes());
 }
  protected void fillFloatValues(FloatValues vals, IndexReader reader, String field)
      throws IOException {
    if (parser == null) {
      try {
        parser = FieldCache.DEFAULT_FLOAT_PARSER;
        fillFloatValues(vals, reader, field);
        return;
      } catch (NumberFormatException ne) {
        vals.parserHashCode = null; // wipe the previous one
        parser = FieldCache.NUMERIC_UTILS_FLOAT_PARSER;
        fillFloatValues(vals, reader, field);
        return;
      }
    }
    setParserAndResetCounts(vals, parser);

    Terms terms = MultiFields.getTerms(reader, field);
    int maxDoc = reader.maxDoc();
    vals.values = null;
    if (terms != null) {
      final TermsEnum termsEnum = terms.iterator();
      OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet(maxDoc) : null;
      DocsEnum docs = null;
      try {
        while (true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          final float termval = parser.parseFloat(term);
          docs = termsEnum.docs(null, docs);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
            if (vals.values == null) {
              vals.values = new float[maxDoc];
            }
            vals.values[docID] = termval;
            vals.numDocs++;
            if (validBits != null) {
              validBits.set(docID);
            }
          }
          vals.numTerms++;
        }
      } catch (FieldCache.StopFillCacheException stop) {
      }

      if (vals.valid == null) {
        vals.valid = checkMatchAllBits(validBits, vals.numDocs, maxDoc);
      }
    }

    if (vals.values == null) {
      vals.values = new float[maxDoc];
    }

    if (vals.valid == null && vals.numDocs < 1) {
      vals.valid = new Bits.MatchNoBits(maxDoc);
    }
  }
Пример #11
0
 private OpenBitSet createObs(int nums[], int maxDoc) {
   OpenBitSet bitSet = new OpenBitSet(maxDoc);
   for (int num : nums) bitSet.set(num);
   return bitSet;
 }
Пример #12
0
 private OpenBitSet createObs(ArrayList<Integer> nums, int maxDoc) {
   OpenBitSet bitSet = new OpenBitSet(maxDoc);
   for (int num : nums) bitSet.set(num);
   return bitSet;
 }