Exemplo n.º 1
0
  // returns true if multigroup
  private boolean openDocumentIndex(int docID) throws Exception {
    // The data is only the data for this document id. Thus the base is set
    // to zero.
    _data = _env.getPositions(docID);
    _base = 0;
    _startingIndex = 0;
    int kk = _data[_base] & 0xFF, k2;
    switch (kk >> 6) // get type
    {
      case 0: // single group, no extents
        k2 = _data[_base + 1];
        _firstGenerator.init(_data, _base += 2, k2);
        // decode concept table
        _nConcepts = _firstGenerator.decodeConcepts(kk & 0x3F, 0, _concepts);
        return false;

      case 2: // multi group, no extents
        _kTable.clear();
        _offsets.clear();
        _maxConcepts.clear();
        ByteArrayDecompressor compr = new ByteArrayDecompressor(_data, _base + 1);
        compr.decode(kk & 0x3F, _kTable);
        compr.ascDecode(_kTable.popLast(), _offsets);
        compr.ascDecode(_kTable.popLast(), _maxConcepts);
        _base += 1 + compr.bytesRead();
        _limit = _maxConcepts.cardinality();
        return true;

      case 1: // single group, extents
      case 3: // multi group, extents
        throw new Exception("extents not yet implemented\n");
    }
    return false;
  }
Exemplo n.º 2
0
  private int nextDocument(RoleFiller[] start) throws Exception {
    while (_nextDocGenHeap.isNonEmpty()) // still something to do
    {
      for (int i = 0; i < _nQueries; i++) if (_query[i] != null) _query[i].resetForNextDocument();

      // gather all concepts this document has and store associated conceptData
      int index = 0;
      _document = _nextDocGenHeap.getDocument();
      _docConcepts.clear();
      _queryMasks.clear();
      do {
        _docConcepts.add(_nextDocGenHeap.getConcept());
        _queryMasks.add(_nextDocGenHeap.getQueryMask());
        (_conceptData[index++] = _nextDocGenHeap.getTerms()).runBy(_query);
        _nextDocGenHeap.step();
      } while (_nextDocGenHeap.atDocument(_document));

      // if there is no saturation model, some query will always vote YES
      // and so every document will be opened
      // even if this case, however, savings can be achieved by not generating fillers
      // for some queries (not scoring, etc)
      // and, with more care, creation of some GroupGenerators can be avoided
      // saturating queries with lots of good hits will lead to best results
      int voteMask = 0;
      for (int i = 0; i < _nQueries; i++)
        if (_query[i] != null)
          if (_query[i].vote()) {
            start[i] = null; // normal reset
            voteMask |= 1 << i;
          } else start[i] = RoleFiller.STOP; // prohibit setting

      // we may eliminate some ConceptGroupGenerators
      // those which would be used only by Queries which voted NO
      if (voteMask != 0) // need to open up document
      {
        ConceptGroupGenerator gen;
        // !!! don't gather Fillers for disinterested Queries
        if (openDocumentIndex(_document)) // multi group
        {
          // set up all needed generators
          int i = 0;
          while ((_queryMasks.at(i) & voteMask) == 0) ++i;
          //		assert(i < index);
          int c = _docConcepts.at(i);
          int group = 0;
          // find first group
          while (c > _maxConcepts.at(group) && ++group < _limit) ;
          gen = makeGenerator(group);
          gen.addTerms(indexOf(c), _conceptData[i]);

          for (++i; i < index; i++)
            if ((_queryMasks.at(i) & voteMask) > 0) {
              c = _docConcepts.at(i);
              if (c > _max) // need to find another group
              {
                //			  assert(group < _limit);
                while (c > _maxConcepts.at(group) && ++group < _limit) ;
                gen = makeGenerator(group);
              }
              gen.addTerms(indexOf(c), _conceptData[i]);
            }
          return 0;
        } else // single group
        {
          for (int i = 0; i < index; i++)
            if ((_queryMasks.at(i) & voteMask) != 0)
              _firstGenerator.addTerms(indexOf(_docConcepts.at(i)), _conceptData[i]);
          return 1;
        }
      }
    }
    return 2;
  }