// returns true if multigroup private boolean openDocumentIndex(int docID) throws Exception { // The data is only the data for this document id. Thus the base is set // to zero. _data = _env.getPositions(docID); _base = 0; _startingIndex = 0; int kk = _data[_base] & 0xFF, k2; switch (kk >> 6) // get type { case 0: // single group, no extents k2 = _data[_base + 1]; _firstGenerator.init(_data, _base += 2, k2); // decode concept table _nConcepts = _firstGenerator.decodeConcepts(kk & 0x3F, 0, _concepts); return false; case 2: // multi group, no extents _kTable.clear(); _offsets.clear(); _maxConcepts.clear(); ByteArrayDecompressor compr = new ByteArrayDecompressor(_data, _base + 1); compr.decode(kk & 0x3F, _kTable); compr.ascDecode(_kTable.popLast(), _offsets); compr.ascDecode(_kTable.popLast(), _maxConcepts); _base += 1 + compr.bytesRead(); _limit = _maxConcepts.cardinality(); return true; case 1: // single group, extents case 3: // multi group, extents throw new Exception("extents not yet implemented\n"); } return false; }
private int nextDocument(RoleFiller[] start) throws Exception { while (_nextDocGenHeap.isNonEmpty()) // still something to do { for (int i = 0; i < _nQueries; i++) if (_query[i] != null) _query[i].resetForNextDocument(); // gather all concepts this document has and store associated conceptData int index = 0; _document = _nextDocGenHeap.getDocument(); _docConcepts.clear(); _queryMasks.clear(); do { _docConcepts.add(_nextDocGenHeap.getConcept()); _queryMasks.add(_nextDocGenHeap.getQueryMask()); (_conceptData[index++] = _nextDocGenHeap.getTerms()).runBy(_query); _nextDocGenHeap.step(); } while (_nextDocGenHeap.atDocument(_document)); // if there is no saturation model, some query will always vote YES // and so every document will be opened // even if this case, however, savings can be achieved by not generating fillers // for some queries (not scoring, etc) // and, with more care, creation of some GroupGenerators can be avoided // saturating queries with lots of good hits will lead to best results int voteMask = 0; for (int i = 0; i < _nQueries; i++) if (_query[i] != null) if (_query[i].vote()) { start[i] = null; // normal reset voteMask |= 1 << i; } else start[i] = RoleFiller.STOP; // prohibit setting // we may eliminate some ConceptGroupGenerators // those which would be used only by Queries which voted NO if (voteMask != 0) // need to open up document { ConceptGroupGenerator gen; // !!! don't gather Fillers for disinterested Queries if (openDocumentIndex(_document)) // multi group { // set up all needed generators int i = 0; while ((_queryMasks.at(i) & voteMask) == 0) ++i; // assert(i < index); int c = _docConcepts.at(i); int group = 0; // find first group while (c > _maxConcepts.at(group) && ++group < _limit) ; gen = makeGenerator(group); gen.addTerms(indexOf(c), _conceptData[i]); for (++i; i < index; i++) if ((_queryMasks.at(i) & voteMask) > 0) { c = _docConcepts.at(i); if (c > _max) // need to find another group { // assert(group < _limit); while (c > _maxConcepts.at(group) && ++group < _limit) ; gen = makeGenerator(group); } gen.addTerms(indexOf(c), _conceptData[i]); } return 0; } else // single group { for (int i = 0; i < index; i++) if ((_queryMasks.at(i) & voteMask) != 0) _firstGenerator.addTerms(indexOf(_docConcepts.at(i)), _conceptData[i]); return 1; } } } return 2; }