private List<ConceptAnnotation> filterSubsumedConcepts(
     String q, List<ConceptAnnotation> annotations) {
   OpenBitSet qset = new OpenBitSet(q.length());
   qset.set(0, qset.length());
   // sort the annotations, longest first
   Collections.sort(
       annotations,
       new Comparator<ConceptAnnotation>() {
         @Override
         public int compare(ConceptAnnotation ca1, ConceptAnnotation ca2) {
           Integer len1 = ca1.getEnd() - ca1.getBegin();
           Integer len2 = ca2.getEnd() - ca2.getBegin();
           return len2.compareTo(len1);
         }
       });
   List<ConceptAnnotation> filtered = new ArrayList<ConceptAnnotation>();
   long prevCardinality = qset.cardinality();
   for (ConceptAnnotation annotation : annotations) {
     OpenBitSet cset = new OpenBitSet(qset.length());
     cset.set(0, qset.length());
     cset.flip(annotation.getBegin(), annotation.getEnd());
     cset.intersect(qset);
     long cardinality = cset.cardinality();
     if (cardinality == prevCardinality) {
       // concept is subsumed, skip it
       continue;
     }
     filtered.add(annotation);
     prevCardinality = cardinality;
   }
   return filtered;
 }
Пример #2
0
  @Test
  public void _testAndIntersections() throws Exception {
    System.out.println("Running test case: intersections, PForDeltaAndDocIdSet.nextDoc() ...");

    ArrayList<OpenBitSet> obs = new ArrayList<OpenBitSet>();
    ArrayList<DocIdSet> docs = new ArrayList<DocIdSet>();
    ArrayList<Integer> expectedIntersectionResult = new ArrayList<Integer>();

    int maxDoc = 5000;
    int numDoc1 = 1000;
    int numDoc2 = 2000;
    int numDoc3 = 4000;
    int[] originalInput = null;
    int[] input1 = null;
    int[] input2 = null;
    int[] input3 = null;
    originalInput = new int[maxDoc];
    for (int i = 0; i < maxDoc; ++i) {
      originalInput[i] = i;
    }

    // generate random numbers and add them into PForDeltaDocIdSets
    input1 = generateRandomDataNew(originalInput, maxDoc, numDoc1);
    loadRandomDataSets(input1, obs, docs, numDoc1);
    input2 = generateRandomDataNew(originalInput, maxDoc, numDoc2);
    loadRandomDataSets(input2, obs, docs, numDoc2);
    input3 = generateRandomDataNew(originalInput, maxDoc, numDoc3);
    loadRandomDataSets(input3, obs, docs, numDoc3);

    // get the expected result
    OpenBitSet base = obs.get(0);
    for (int i = 1; i < obs.size(); ++i) {
      base.intersect(obs.get(i));
    }
    for (int k = 0; k < base.size(); ++k) {
      if (base.get(k)) expectedIntersectionResult.add(k);
    }

    // get the results from PForDeltaAndDocIdSet
    ArrayList<Integer> intersectionResult = new ArrayList<Integer>();
    AndDocIdSet ands = new AndDocIdSet(docs);
    DocIdSetIterator iter = ands.iterator();
    int docId = iter.nextDoc();
    while (docId != DocIdSetIterator.NO_MORE_DOCS) {
      intersectionResult.add(docId);
      docId = iter.nextDoc();
    }

    if (!compareTwoLists(intersectionResult, expectedIntersectionResult)) {
      System.out.println("The result for the new version does not match the expectation");
    }
    System.out.println("----------------completed---------------------------");
  }