/**
  * conversion from a SolrQueryResponse (which is a solr-internal data format) to SolrDocumentList
  * (which is a solrj-format) The conversion is done inside the solrj api using the
  * BinaryResponseWriter and a very complex unfolding process via
  * org.apache.solr.common.util.JavaBinCodec.marshal.
  *
  * @param request
  * @param sqr
  * @return
  */
 public SolrDocumentList SolrQueryResponse2SolrDocumentList(
     final SolrQueryRequest req, final SolrQueryResponse rsp) {
   SolrDocumentList sdl = new SolrDocumentList();
   NamedList<?> nl = rsp.getValues();
   ResultContext resultContext = (ResultContext) nl.get("response");
   DocList response =
       resultContext == null
           ? new DocSlice(0, 0, new int[0], new float[0], 0, 0.0f)
           : resultContext.docs;
   sdl.setNumFound(response == null ? 0 : response.matches());
   sdl.setStart(response == null ? 0 : response.offset());
   String originalName = Thread.currentThread().getName();
   if (response != null) {
     try {
       SolrIndexSearcher searcher = req.getSearcher();
       final int responseCount = response.size();
       DocIterator iterator = response.iterator();
       for (int i = 0; i < responseCount; i++) {
         int docid = iterator.nextDoc();
         Thread.currentThread()
             .setName("EmbeddedSolrConnector.SolrQueryResponse2SolrDocumentList: " + docid);
         Document responsedoc = searcher.doc(docid, (Set<String>) null);
         SolrDocument sordoc = doc2SolrDoc(responsedoc);
         sdl.add(sordoc);
       }
     } catch (IOException e) {
       ConcurrentLog.logException(e);
     }
   }
   Thread.currentThread().setName(originalName);
   return sdl;
 }
 /**
  * check if a given document, identified by url hash as document id exists
  *
  * @param id the url hash and document id
  * @return the load date if any entry in solr exists, -1 otherwise
  * @throws IOException
  */
 @Override
 public LoadTimeURL getLoadTimeURL(String id) throws IOException {
   int responseCount = 0;
   DocListSearcher docListSearcher = null;
   try {
     docListSearcher =
         new DocListSearcher(
             "{!cache=false raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id,
             null,
             0,
             1,
             CollectionSchema.id.getSolrFieldName(),
             CollectionSchema.load_date_dt.getSolrFieldName());
     responseCount = docListSearcher.response.size();
     if (responseCount == 0) return null;
     SolrIndexSearcher searcher = docListSearcher.request.getSearcher();
     DocIterator iterator = docListSearcher.response.iterator();
     // for (int i = 0; i < responseCount; i++) {
     Document doc =
         searcher.doc(iterator.nextDoc(), AbstractSolrConnector.SOLR_ID_and_LOAD_DATE_FIELDS);
     if (doc == null) return null;
     return AbstractSolrConnector.getLoadTimeURL(doc);
     // }
   } catch (Throwable e) {
     ConcurrentLog.logException(e);
     throw new IOException(e.getMessage());
   } finally {
     if (docListSearcher != null) docListSearcher.close();
   }
 }
示例#3
0
  int collect(DocSet docs, int slot) throws IOException {
    int count = 0;
    SolrIndexSearcher searcher = fcontext.searcher;

    final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    int segBase = 0;
    int segMax;
    int adjustedMax = 0;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
      final int doc = docsIt.nextDoc();
      if (doc >= adjustedMax) {
        do {
          ctx = ctxIt.next();
          if (ctx == null) {
            // should be impossible
            throw new RuntimeException("INTERNAL FACET ERROR");
          }
          segBase = ctx.docBase;
          segMax = ctx.reader().maxDoc();
          adjustedMax = segBase + segMax;
        } while (doc >= adjustedMax);
        assert doc >= ctx.docBase;
        setNextReader(ctx);
      }
      count++;
      collect(doc - segBase, slot); // per-seg collectors
    }
    return count;
  }
示例#4
0
  public InvertResult invertScan(IndexSchema schema, InvertParams params) throws Exception {
    InvertResult rtn = new InvertResult();
    rtn.setParams(schema, params);
    DocSet docset = params.getDocset();
    DocSet[] subdocset = new DocSet[subReaders.length];
    if (subdocset.length == 1) {
      subdocset[0] = docset;
    } else {
      for (int i = 0; i < subReaders.length; i++) {
        subdocset[i] = new BitDocSet();
      }

      int index = 0;
      int end = this.getend(index);
      DocIterator iter = docset.iterator();
      while (iter.hasNext()) {
        int doc = iter.nextDoc();
        if (doc >= end) {
          index = this.readerIndex(doc);
          end = this.getend(index);
        }
        subdocset[index].add(doc - this.starts[index]);
      }
    }
    for (int i = 0; i < subReaders.length; i++) {
      params.setDocset(subdocset[i]);
      rtn.merge(subReaders[i].invertScan(schema, params));
    }
    return rtn;
  }
  @Override
  public void writeDocList(String name, DocList ids, Set<String> fields, Map otherFields)
      throws IOException {
    boolean includeScore = false;
    if (fields != null) {
      includeScore = fields.contains("score");
      if (fields.size() == 0 || (fields.size() == 1 && includeScore) || fields.contains("*")) {
        fields = null; // null means return all stored fields
      }
    }

    int sz = ids.size();

    writeMapOpener(includeScore ? 4 : 3);
    incLevel();
    writeKey("numFound", false);
    writeInt(null, ids.matches());
    writeMapSeparator();
    writeKey("start", false);
    writeInt(null, ids.offset());

    if (includeScore) {
      writeMapSeparator();
      writeKey("maxScore", false);
      writeFloat(null, ids.maxScore());
    }
    writeMapSeparator();
    // indent();
    writeKey("docs", false);
    writeArrayOpener(sz);

    incLevel();
    boolean first = true;

    SolrIndexSearcher searcher = req.getSearcher();
    DocIterator iterator = ids.iterator();
    for (int i = 0; i < sz; i++) {
      int id = iterator.nextDoc();
      Document doc = searcher.doc(id, fields);

      if (first) {
        first = false;
      } else {
        writeArraySeparator();
      }
      indent();
      writeDoc(null, doc, fields, (includeScore ? iterator.score() : 0.0f), includeScore);
    }
    decLevel();
    writeArrayCloser();

    if (otherFields != null) {
      writeMap(null, otherFields, true, false);
    }

    decLevel();
    indent();
    writeMapCloser();
  }
  public static SolrReaderSetScorer createReaderSetScorer(
      Weight weight,
      AtomicReaderContext context,
      Bits acceptDocs,
      SolrIndexSearcher searcher,
      String authorities,
      AtomicReader reader)
      throws IOException {

    DocSet readableDocSet =
        (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_READER_CACHE, authorities);

    if (readableDocSet == null) {

      String[] auths = authorities.substring(1).split(authorities.substring(0, 1));

      readableDocSet = new BitDocSet(new FixedBitSet(searcher.maxDoc()));

      BooleanQuery bQuery = new BooleanQuery();
      for (String current : auths) {
        bQuery.add(new TermQuery(new Term(QueryConstants.FIELD_READER, current)), Occur.SHOULD);
      }

      DocSet aclDocs = searcher.getDocSet(bQuery);

      BooleanQuery aQuery = new BooleanQuery();
      for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/ ) {
        int docID = it.nextDoc();
        // Obtain the ACL ID for this ACL doc.
        long aclID =
            searcher.getAtomicReader().getNumericDocValues(QueryConstants.FIELD_ACLID).get(docID);
        SchemaField schemaField = searcher.getSchema().getField(QueryConstants.FIELD_ACLID);
        Query query = schemaField.getType().getFieldQuery(null, schemaField, Long.toString(aclID));
        aQuery.add(query, Occur.SHOULD);

        if ((aQuery.clauses().size() > 999) || !it.hasNext()) {
          DocSet docsForAclId = searcher.getDocSet(aQuery);
          readableDocSet = readableDocSet.union(docsForAclId);

          aQuery = new BooleanQuery();
        }
      }
      // Exclude the ACL docs from the results, we only want real docs that match.
      // Probably not very efficient, what we really want is remove(docID)
      readableDocSet = readableDocSet.andNot(aclDocs);
      searcher.cacheInsert(CacheConstants.ALFRESCO_READER_CACHE, authorities, readableDocSet);
    }

    // TODO: cache the full set? e.g. searcher.cacheInsert(CacheConstants.ALFRESCO_READERSET_CACHE,
    // authorities, readableDocSet)
    // plus check of course, for presence in cache at start of method.
    return new SolrReaderSetScorer(weight, readableDocSet, context, acceptDocs, searcher);
  }
  /**
   * Generates a list of Highlighted query fragments for each item in a list of documents, or
   * returns null if highlighting is disabled.
   *
   * @param docs query results
   * @param query the query
   * @param req the current request
   * @param defaultFields default list of fields to summarize
   * @return NamedList containing a NamedList for each document, which in turns contains sets
   *     (field, summary) pairs.
   */
  @Override
  @SuppressWarnings("unchecked")
  public NamedList<Object> doHighlighting(
      DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
    SolrParams params = req.getParams();
    if (!isHighlightingEnabled(params)) return null;

    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    NamedList fragments = new SimpleOrderedMap();
    String[] fieldNames = getHighlightFields(query, req, defaultFields);
    Set<String> fset = new HashSet<String>();

    {
      // pre-fetch documents using the Searcher's doc cache
      for (String f : fieldNames) {
        fset.add(f);
      }
      // fetch unique key if one exists.
      SchemaField keyField = schema.getUniqueKeyField();
      if (null != keyField) fset.add(keyField.getName());
    }

    // get FastVectorHighlighter instance out of the processing loop
    FastVectorHighlighter fvh =
        new FastVectorHighlighter(
            // FVH cannot process hl.usePhraseHighlighter parameter per-field basis
            params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
            // FVH cannot process hl.requireFieldMatch parameter per-field basis
            params.getBool(HighlightParams.FIELD_MATCH, false));
    fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE));
    FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader());

    // Highlight each document
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
      int docId = iterator.nextDoc();
      Document doc = searcher.doc(docId, fset);
      NamedList docSummaries = new SimpleOrderedMap();
      for (String fieldName : fieldNames) {
        fieldName = fieldName.trim();
        if (useFastVectorHighlighter(params, schema, fieldName))
          doHighlightingByFastVectorHighlighter(
              fvh, fieldQuery, req, docSummaries, docId, doc, fieldName);
        else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName);
      }
      String printId = schema.printableUniqueKey(doc);
      fragments.add(printId == null ? null : printId, docSummaries);
    }
    return fragments;
  }
  protected void processIds(
      ResponseBuilder rb, DocList dl, IndexSchema schema, SolrIndexSearcher searcher)
      throws IOException {

    StringBuilder sb = new StringBuilder();

    Set<String> fields = Collections.singleton(schema.getUniqueKeyField().getName());
    for (DocIterator iter = dl.iterator(); iter.hasNext(); ) {

      sb.append(schema.printableUniqueKey(searcher.doc(iter.nextDoc(), fields))).append(',');
    }
    if (sb.length() > 0) {
      rb.rsp.addToLog("responseLog", sb.substring(0, sb.length() - 1));
    }
  }
示例#9
0
 @SuppressWarnings({"rawtypes", "unchecked"})
 private Iterator<Integer> docIterator(ResponseBuilder rb) {
   if (rb.grouping()) {
     List<Integer> docList = new ArrayList<>();
     NamedList values = rb.rsp.getValues();
     NamedList grouped = (NamedList) values.get("grouped");
     for (String field : rb.getGroupingSpec().getFields()) {
       NamedList fieldResults = (NamedList) grouped.get(field);
       if (rb.getGroupingSpec().getResponseFormat() == Grouping.Format.grouped) {
         List<NamedList> groups = (List<NamedList>) fieldResults.get("groups");
         for (NamedList group : groups) {
           for (DocIterator it = ((DocList) group.get("doclist")).iterator(); it.hasNext(); ) {
             docList.add(it.nextDoc());
           }
         }
       } else {
         for (DocIterator it = ((DocList) fieldResults.get("doclist")).iterator();
             it.hasNext(); ) {
           docList.add(it.nextDoc());
         }
       }
     }
     return docList.iterator();
   } else {
     return rb.getResults().docList.iterator();
   }
 }
  public static SolrCachingAuxDocScorer createAuxDocScorer(
      SolrIndexSearcher searcher, Similarity similarity, Query query, SolrIndexReader reader)
      throws IOException {
    // Get hold of solr top level searcher
    // Execute query with caching
    // translate reults to leaf docs
    // build ordered doc list

    DocSet auxDocSet = searcher.getDocSet(query);

    CacheEntry[] indexedByDocId =
        (CacheEntry[])
            searcher.cacheLookup(
                AlfrescoSolrEventListener.ALFRESCO_CACHE,
                AlfrescoSolrEventListener.KEY_DBID_LEAF_PATH_BY_DOC_ID);

    // List<ScoreDoc> auxDocs = pathCollector.getDocs();
    OpenBitSet translated = new OpenBitSet();

    if (auxDocSet instanceof BitDocSet) {
      BitDocSet source = (BitDocSet) auxDocSet;
      OpenBitSet openBitSet = source.getBits();
      int current = -1;
      while ((current = openBitSet.nextSetBit(current + 1)) != -1) {
        CacheEntry entry = indexedByDocId[current];
        translated.set(entry.getLeaf());
      }
    } else {
      for (DocIterator it = auxDocSet.iterator(); it.hasNext(); /* */ ) {
        CacheEntry entry = indexedByDocId[it.nextDoc()];
        translated.set(entry.getLeaf());
      }
    }

    return new SolrCachingAuxDocScorer(similarity, new BitDocSet(translated), reader);
  }
示例#11
0
  public NamedList get(String[] fields, DocSet baseDocs) throws IOException, ParseException {
    if (this.crcget == null) {
      this.container =
          this.parse.createContainer(fields, baseDocs, this.reader, this.searcher, this.req);
      DocIterator iter = baseDocs.iterator();
      this.recordCount.inc(baseDocs.size());
      Doclist res = new Doclist(this.parse.limit_offset);
      int doc = -1;
      while (iter.hasNext()) {
        doc = iter.nextDoc();
        res.add(doc);
        if (res.index >= this.parse.limit_offset) {
          break;
        }
      }
      PriorityQueue<SelectDetailRow> topItems = this.transGroupValue(res, fields);
      this.container.free();
      return this.toNameList(topItems);
    }
    String hostkey = String.valueOf(this.getkeyCrc());

    ConcurrentHashMap<Long, String> cache =
        MdrillUtils.CRC_CACHE_SIZE.remove(crcget + "@" + hostkey);

    NamedList rtn = new NamedList();
    Map<Long, String> crcvalue = new HashMap<Long, String>();
    rtn.add("fdtcre", crcvalue);
    if (cache != null) {
      MapFieldSelector selector = new MapFieldSelector(fields);
      FieldType[] ftlist = new FieldType[fields.length];
      IndexSchema schema = this.searcher.getSchema();

      for (int j = 0; j < fields.length; j++) {
        ftlist[j] = schema.getFieldType(fields[j]);
      }

      String crcliststr = params.get("mdrill.crc.key.get.crclist");
      if (crcliststr != null) {
        String[] crclist = crcliststr.split(",");
        for (String s : crclist) {
          Long crc = Long.parseLong(s);
          String v = cache.get(crc);
          if (v != null) {
            String cols[] = v.split(UniqConfig.GroupJoinString(), -1);
            if (cols.length >= 2) {
              int doc = Integer.parseInt(cols[0]);

              SortGroupVal buff = new SortGroupVal();
              buff.groupbuff.append("-");
              buff.groupbuff.append(UniqConfig.GroupJoinString());
              buff.groupbuff.append("-");
              Document docfields = this.reader.document(doc, selector);
              if (docfields == null) {
                for (int j = 0; j < fields.length; j++) {
                  buff.groupbuff.append(UniqConfig.GroupJoinString());
                  buff.groupbuff.append(EncodeUtils.encode("-"));
                }
                if (!crcvalue.containsKey(crc)) {
                  crcvalue.put(crc, buff.groupbuff.toString());
                }
              } else {

                for (int j = 0; j < fields.length; j++) {
                  buff.groupbuff.append(UniqConfig.GroupJoinString());

                  Fieldable fv = docfields.getFieldable(fields[j]);

                  if (fv != null) {
                    buff.groupbuff.append(ftlist[j].toExternal(fv));
                  } else {
                    buff.groupbuff.append(EncodeUtils.encode("-"));
                  }
                }
                crcvalue.put(crc, buff.groupbuff.toString());
              }
            }
          }
        }
      }
    }
    return rtn;
  }
示例#12
0
  protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher)
      throws IOException {
    SolrQueryRequest req = rb.req;
    SolrQueryResponse rsp = rb.rsp;

    // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
    // currently have an option to return sort field values.  Because of this, we
    // take the documents given and re-derive the sort values.
    boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false);
    if (fsv) {
      Sort sort = rb.getSortSpec().getSort();
      SortField[] sortFields =
          sort == null ? new SortField[] {SortField.FIELD_SCORE} : sort.getSort();
      NamedList sortVals = new NamedList(); // order is important for the sort fields
      Field field = new Field("dummy", "", Field.Store.YES, Field.Index.NO); // a dummy Field

      SolrIndexReader reader = searcher.getReader();
      SolrIndexReader[] readers = reader.getLeafReaders();
      SolrIndexReader subReader = reader;
      if (readers.length == 1) {
        // if there is a single segment, use that subReader and avoid looking up each time
        subReader = readers[0];
        readers = null;
      }
      int[] offsets = reader.getLeafOffsets();

      for (SortField sortField : sortFields) {
        int type = sortField.getType();
        if (type == SortField.SCORE || type == SortField.DOC) continue;

        FieldComparator comparator = null;
        FieldComparator comparators[] =
            (readers == null) ? null : new FieldComparator[readers.length];

        String fieldname = sortField.getField();
        FieldType ft = fieldname == null ? null : req.getSchema().getFieldTypeNoEx(fieldname);

        DocList docList = rb.getResults().docList;
        ArrayList<Object> vals = new ArrayList<Object>(docList.size());
        DocIterator it = rb.getResults().docList.iterator();

        int offset = 0;
        int idx = 0;

        while (it.hasNext()) {
          int doc = it.nextDoc();
          if (readers != null) {
            idx = SolrIndexReader.readerIndex(doc, offsets);
            subReader = readers[idx];
            offset = offsets[idx];
            comparator = comparators[idx];
          }

          if (comparator == null) {
            comparator = sortField.getComparator(1, 0);
            comparator = comparator.setNextReader(subReader, offset);
            if (comparators != null) comparators[idx] = comparator;
          }

          doc -= offset; // adjust for what segment this is in
          comparator.copy(0, doc);
          Object val = comparator.value(0);

          // Sortable float, double, int, long types all just use a string
          // comparator. For these, we need to put the type into a readable
          // format.  One reason for this is that XML can't represent all
          // string values (or even all unicode code points).
          // indexedToReadable() should be a no-op and should
          // thus be harmless anyway (for all current ways anyway)
          if (val instanceof String) {
            field.setValue((String) val);
            val = ft.toObject(field);
          }

          // Must do the same conversion when sorting by a
          // String field in Lucene, which returns the terms
          // data as BytesRef:
          if (val instanceof BytesRef) {
            field.setValue(((BytesRef) val).utf8ToString());
            val = ft.toObject(field);
          }

          vals.add(val);
        }

        sortVals.add(fieldname, vals);
      }

      rsp.add("sort_values", sortVals);
    }
  }