private List<ConceptAnnotation> filterSubsumedConcepts( String q, List<ConceptAnnotation> annotations) { OpenBitSet qset = new OpenBitSet(q.length()); qset.set(0, qset.length()); // sort the annotations, longest first Collections.sort( annotations, new Comparator<ConceptAnnotation>() { @Override public int compare(ConceptAnnotation ca1, ConceptAnnotation ca2) { Integer len1 = ca1.getEnd() - ca1.getBegin(); Integer len2 = ca2.getEnd() - ca2.getBegin(); return len2.compareTo(len1); } }); List<ConceptAnnotation> filtered = new ArrayList<ConceptAnnotation>(); long prevCardinality = qset.cardinality(); for (ConceptAnnotation annotation : annotations) { OpenBitSet cset = new OpenBitSet(qset.length()); cset.set(0, qset.length()); cset.flip(annotation.getBegin(), annotation.getEnd()); cset.intersect(qset); long cardinality = cset.cardinality(); if (cardinality == prevCardinality) { // concept is subsumed, skip it continue; } filtered.add(annotation); prevCardinality = cardinality; } return filtered; }
@Test public void _testAndIntersections() throws Exception { System.out.println("Running test case: intersections, PForDeltaAndDocIdSet.nextDoc() ..."); ArrayList<OpenBitSet> obs = new ArrayList<OpenBitSet>(); ArrayList<DocIdSet> docs = new ArrayList<DocIdSet>(); ArrayList<Integer> expectedIntersectionResult = new ArrayList<Integer>(); int maxDoc = 5000; int numDoc1 = 1000; int numDoc2 = 2000; int numDoc3 = 4000; int[] originalInput = null; int[] input1 = null; int[] input2 = null; int[] input3 = null; originalInput = new int[maxDoc]; for (int i = 0; i < maxDoc; ++i) { originalInput[i] = i; } // generate random numbers and add them into PForDeltaDocIdSets input1 = generateRandomDataNew(originalInput, maxDoc, numDoc1); loadRandomDataSets(input1, obs, docs, numDoc1); input2 = generateRandomDataNew(originalInput, maxDoc, numDoc2); loadRandomDataSets(input2, obs, docs, numDoc2); input3 = generateRandomDataNew(originalInput, maxDoc, numDoc3); loadRandomDataSets(input3, obs, docs, numDoc3); // get the expected result OpenBitSet base = obs.get(0); for (int i = 1; i < obs.size(); ++i) { base.intersect(obs.get(i)); } for (int k = 0; k < base.size(); ++k) { if (base.get(k)) expectedIntersectionResult.add(k); } // get the results from PForDeltaAndDocIdSet ArrayList<Integer> intersectionResult = new ArrayList<Integer>(); AndDocIdSet ands = new AndDocIdSet(docs); DocIdSetIterator iter = ands.iterator(); int docId = iter.nextDoc(); while (docId != DocIdSetIterator.NO_MORE_DOCS) { intersectionResult.add(docId); docId = iter.nextDoc(); } if (!compareTwoLists(intersectionResult, expectedIntersectionResult)) { System.out.println("The result for the new version does not match the expectation"); } System.out.println("----------------completed---------------------------"); }