示例#1
0
文件: Search.java 项目: srnsw/xena
  // returns true if multigroup
  private boolean openDocumentIndex(int docID) throws Exception {
    // The data is only the data for this document id. Thus the base is set
    // to zero.
    _data = _env.getPositions(docID);
    _base = 0;
    _startingIndex = 0;
    int kk = _data[_base] & 0xFF, k2;
    switch (kk >> 6) // get type
    {
      case 0: // single group, no extents
        k2 = _data[_base + 1];
        _firstGenerator.init(_data, _base += 2, k2);
        // decode concept table
        _nConcepts = _firstGenerator.decodeConcepts(kk & 0x3F, 0, _concepts);
        return false;

      case 2: // multi group, no extents
        _kTable.clear();
        _offsets.clear();
        _maxConcepts.clear();
        ByteArrayDecompressor compr = new ByteArrayDecompressor(_data, _base + 1);
        compr.decode(kk & 0x3F, _kTable);
        compr.ascDecode(_kTable.popLast(), _offsets);
        compr.ascDecode(_kTable.popLast(), _maxConcepts);
        _base += 1 + compr.bytesRead();
        _limit = _maxConcepts.cardinality();
        return true;

      case 1: // single group, extents
      case 3: // multi group, extents
        throw new Exception("extents not yet implemented\n");
    }
    return false;
  }
示例#2
0
文件: Search.java 项目: srnsw/xena
  private void searchDocument() {
    RoleFiller[] start = new RoleFiller[_nQueries];
    do {
      try {
        switch (nextDocument(start)) {
          case 0: // multi group
            _genHeap.start(start);
            while (_genHeap.next(start)) ;
            break;

          case 1: // single group
            if (_firstGenerator.next()) {
              _firstGenerator.generateFillers(start);
              while (_firstGenerator.next()) _firstGenerator.generateFillers(start);
            }
            break;

          case 2: // reached the end
            return;
        }
      } catch (Exception e) {
        e.printStackTrace(System.err);
        continue;
      }

      for (int i = 0; i < _nQueries; i++) {
        RoleFiller next;
        if ((next = start[i]) != null && next != RoleFiller.STOP)
          next.scoreList(_query[i], _document);
      }
      _genHeap.reset();
    } while (_nextDocGenHeap.isNonEmpty());
  }
示例#3
0
文件: Search.java 项目: srnsw/xena
  private ConceptGroupGenerator makeGenerator(int group) throws Exception {
    int shift, index;

    if (group > 0) {
      index = _base + _offsets.at(group - 1);
      shift = _maxConcepts.at(group - 1);
    } else {
      index = _base;
      shift = 0;
    }

    // initialize generator
    ConceptGroupGenerator gen = new ConceptGroupGenerator(_data, index, _kTable.at(2 * group + 1));
    // decode concept table
    _nConcepts = gen.decodeConcepts(_kTable.at(2 * group), shift, _concepts);
    if (group < _limit) _max = _concepts[_nConcepts] = _maxConcepts.at(group);
    else _max = _concepts[_nConcepts - 1];
    _genHeap.addGenerator(gen);
    _startingIndex = 0; // in _concepts; lower search index
    return gen;
  }
示例#4
0
文件: Search.java 项目: srnsw/xena
  private int nextDocument(RoleFiller[] start) throws Exception {
    while (_nextDocGenHeap.isNonEmpty()) // still something to do
    {
      for (int i = 0; i < _nQueries; i++) if (_query[i] != null) _query[i].resetForNextDocument();

      // gather all concepts this document has and store associated conceptData
      int index = 0;
      _document = _nextDocGenHeap.getDocument();
      _docConcepts.clear();
      _queryMasks.clear();
      do {
        _docConcepts.add(_nextDocGenHeap.getConcept());
        _queryMasks.add(_nextDocGenHeap.getQueryMask());
        (_conceptData[index++] = _nextDocGenHeap.getTerms()).runBy(_query);
        _nextDocGenHeap.step();
      } while (_nextDocGenHeap.atDocument(_document));

      // if there is no saturation model, some query will always vote YES
      // and so every document will be opened
      // even if this case, however, savings can be achieved by not generating fillers
      // for some queries (not scoring, etc)
      // and, with more care, creation of some GroupGenerators can be avoided
      // saturating queries with lots of good hits will lead to best results
      int voteMask = 0;
      for (int i = 0; i < _nQueries; i++)
        if (_query[i] != null)
          if (_query[i].vote()) {
            start[i] = null; // normal reset
            voteMask |= 1 << i;
          } else start[i] = RoleFiller.STOP; // prohibit setting

      // we may eliminate some ConceptGroupGenerators
      // those which would be used only by Queries which voted NO
      if (voteMask != 0) // need to open up document
      {
        ConceptGroupGenerator gen;
        // !!! don't gather Fillers for disinterested Queries
        if (openDocumentIndex(_document)) // multi group
        {
          // set up all needed generators
          int i = 0;
          while ((_queryMasks.at(i) & voteMask) == 0) ++i;
          //		assert(i < index);
          int c = _docConcepts.at(i);
          int group = 0;
          // find first group
          while (c > _maxConcepts.at(group) && ++group < _limit) ;
          gen = makeGenerator(group);
          gen.addTerms(indexOf(c), _conceptData[i]);

          for (++i; i < index; i++)
            if ((_queryMasks.at(i) & voteMask) > 0) {
              c = _docConcepts.at(i);
              if (c > _max) // need to find another group
              {
                //			  assert(group < _limit);
                while (c > _maxConcepts.at(group) && ++group < _limit) ;
                gen = makeGenerator(group);
              }
              gen.addTerms(indexOf(c), _conceptData[i]);
            }
          return 0;
        } else // single group
        {
          for (int i = 0; i < index; i++)
            if ((_queryMasks.at(i) & voteMask) != 0)
              _firstGenerator.addTerms(indexOf(_docConcepts.at(i)), _conceptData[i]);
          return 1;
        }
      }
    }
    return 2;
  }