Exemplo n.º 1
0
  /**
   * Find words for a more-like-this query former.
   *
   * @param docNum the id of the lucene document from which to find terms
   */
  private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
    Map<String, Int> termFreqMap = new HashMap<>();
    for (String fieldName : fieldNames) {
      final Fields vectors = ir.getTermVectors(docNum);
      final Terms vector;
      if (vectors != null) {
        vector = vectors.terms(fieldName);
      } else {
        vector = null;
      }

      // field does not store term vector info
      if (vector == null) {
        StoredDocument d = ir.document(docNum);
        StorableField[] fields = d.getFields(fieldName);
        for (StorableField field : fields) {
          final String stringValue = field.stringValue();
          if (stringValue != null) {
            addTermFrequencies(new StringReader(stringValue), termFreqMap, fieldName);
          }
        }
      } else {
        addTermFrequencies(termFreqMap, vector);
      }
    }

    return createQueue(termFreqMap);
  }
Exemplo n.º 2
0
  public void testKeepsLastFilter() throws Throwable {
    DuplicateFilter df = new DuplicateFilter(KEY_FIELD);
    df.setKeepMode(DuplicateFilter.KeepMode.KM_USE_LAST_OCCURRENCE);
    ScoreDoc[] hits = searcher.search(tq, df, 1000).scoreDocs;
    assertTrue("Filtered searching should have found some matches", hits.length > 0);
    for (ScoreDoc hit : hits) {
      StoredDocument d = searcher.doc(hit.doc);
      String url = d.get(KEY_FIELD);
      DocsEnum td =
          _TestUtil.docs(
              random(),
              reader,
              KEY_FIELD,
              new BytesRef(url),
              MultiFields.getLiveDocs(reader),
              null,
              0);

      int lastDoc = 0;
      while (td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        lastDoc = td.docID();
      }
      assertEquals("Duplicate urls should return last doc", lastDoc, hit.doc);
    }
  }
Exemplo n.º 3
0
  public void testDefaultFilter() throws Throwable {
    DuplicateFilter df = new DuplicateFilter(KEY_FIELD);
    HashSet<String> results = new HashSet<String>();
    ScoreDoc[] hits = searcher.search(tq, df, 1000).scoreDocs;

    for (ScoreDoc hit : hits) {
      StoredDocument d = searcher.doc(hit.doc);
      String url = d.get(KEY_FIELD);
      assertFalse("No duplicate urls should be returned", results.contains(url));
      results.add(url);
    }
  }
Exemplo n.º 4
0
  /**
   * Match up search results and add corresponding data for each result (if we have query results
   * available).
   */
  @Override
  @SuppressWarnings({"rawtypes", "unchecked"})
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(getName(), false)) {
      return;
    }

    XJoinResults<?> results = (XJoinResults<?>) rb.req.getContext().get(getResultsTag());
    if (results == null || rb.getResults() == null) {
      return;
    }

    // general results
    FieldAppender appender =
        new FieldAppender(
            (String) params.get(getName() + "." + XJoinParameters.RESULTS_FIELD_LIST, "*"));
    NamedList general = appender.addNamedList(rb.rsp.getValues(), getName(), results);

    // per join id results
    FieldAppender docAppender =
        new FieldAppender(
            (String) params.get(getName() + "." + XJoinParameters.DOC_FIELD_LIST, "*"));
    Set<String> joinFields = new HashSet<>();
    joinFields.add(joinField);

    List<String> joinIds = new ArrayList<>();
    for (Iterator<Integer> it = docIterator(rb); it.hasNext(); ) {
      StoredDocument doc = rb.req.getSearcher().doc(it.next(), joinFields);
      for (String joinId : doc.getValues(joinField)) {
        if (!joinIds.contains(joinId)) {
          joinIds.add(joinId);
        }
      }
    }

    for (String joinId : joinIds) {
      Object object = results.getResult(joinId);
      if (object == null) continue;
      NamedList external = new NamedList<>();
      general.add("external", external);
      external.add("joinId", joinId);
      if (object instanceof Iterable) {
        for (Object item : (Iterable) object) {
          docAppender.addNamedList(external, "doc", item);
        }
      } else {
        docAppender.addNamedList(external, "doc", object);
      }
    }
  }
Exemplo n.º 5
0
  public void testNoFilter() throws Throwable {
    HashSet<String> results = new HashSet<String>();
    ScoreDoc[] hits = searcher.search(tq, null, 1000).scoreDocs;
    assertTrue("Default searching should have found some matches", hits.length > 0);
    boolean dupsFound = false;

    for (ScoreDoc hit : hits) {
      StoredDocument d = searcher.doc(hit.doc);
      String url = d.get(KEY_FIELD);
      if (!dupsFound) dupsFound = results.contains(url);
      results.add(url);
    }
    assertTrue("Default searching should have found duplicate urls", dupsFound);
  }
Exemplo n.º 6
0
  private static SolrDocument toSolrDoc(SolrInputDocument sdoc, IndexSchema schema) {
    // TODO: do something more performant than this double conversion
    Document doc = DocumentBuilder.toDocument(sdoc, schema);

    // copy the stored fields only
    StoredDocument out = new StoredDocument();
    for (IndexableField f : doc.getFields()) {
      if (f.fieldType().stored()) {
        out.add((StorableField) f);
      }
    }

    return toSolrDoc(out, schema);
  }
Exemplo n.º 7
0
  public void testFastFilter() throws Throwable {
    DuplicateFilter df = new DuplicateFilter(KEY_FIELD);
    df.setProcessingMode(DuplicateFilter.ProcessingMode.PM_FAST_INVALIDATION);
    HashSet<String> results = new HashSet<String>();
    ScoreDoc[] hits = searcher.search(tq, df, 1000).scoreDocs;
    assertTrue("Filtered searching should have found some matches", hits.length > 0);

    for (ScoreDoc hit : hits) {
      StoredDocument d = searcher.doc(hit.doc);
      String url = d.get(KEY_FIELD);
      assertFalse("No duplicate urls should be returned", results.contains(url));
      results.add(url);
    }
    assertEquals("Two urls found", 2, results.size());
  }
  protected NamedList serializeTopDocs(QueryCommandResult result) throws IOException {
    NamedList<Object> queryResult = new NamedList<>();
    queryResult.add("matches", result.getMatches());
    queryResult.add("totalHits", result.getTopDocs().totalHits);
    if (rb.getGroupingSpec().isNeedScore()) {
      queryResult.add("maxScore", result.getTopDocs().getMaxScore());
    }
    List<NamedList> documents = new ArrayList<>();
    queryResult.add("documents", documents);

    final IndexSchema schema = rb.req.getSearcher().getSchema();
    SchemaField uniqueField = schema.getUniqueKeyField();
    CharsRef spare = new CharsRef();
    for (ScoreDoc scoreDoc : result.getTopDocs().scoreDocs) {
      NamedList<Object> document = new NamedList<>();
      documents.add(document);

      StoredDocument doc = retrieveDocument(uniqueField, scoreDoc.doc);
      document.add("id", uniqueField.getType().toExternal(doc.getField(uniqueField.getName())));
      if (rb.getGroupingSpec().isNeedScore()) {
        document.add("score", scoreDoc.score);
      }
      if (!FieldDoc.class.isInstance(scoreDoc)) {
        continue;
      }

      FieldDoc fieldDoc = (FieldDoc) scoreDoc;
      Object[] convertedSortValues = new Object[fieldDoc.fields.length];
      for (int j = 0; j < fieldDoc.fields.length; j++) {
        Object sortValue = fieldDoc.fields[j];
        Sort groupSort = rb.getGroupingSpec().getGroupSort();
        SchemaField field =
            groupSort.getSort()[j].getField() != null
                ? schema.getFieldOrNull(groupSort.getSort()[j].getField())
                : null;
        if (field != null) {
          FieldType fieldType = field.getType();
          if (sortValue != null) {
            sortValue = fieldType.marshalSortValue(sortValue);
          }
        }
        convertedSortValues[j] = sortValue;
      }
      document.add("sortValues", convertedSortValues);
    }

    return queryResult;
  }
  @Test
  public void testReturnedDocID() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw =
        new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));

    int num = Math.min(1000, atLeast(10));
    for (int i = 0; i < num; i++) {
      Document document = new Document();
      document.add(new SuggestField("suggest_field", "abc_" + i, num));
      document.add(new StoredField("int_field", i));
      iw.addDocument(document);

      if (random().nextBoolean()) {
        iw.commit();
      }
    }

    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query =
        new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
    TopSuggestDocs suggest = indexSearcher.suggest(query, num);
    assertEquals(num, suggest.totalHits);
    for (SuggestScoreDoc suggestScoreDoc : suggest.scoreLookupDocs()) {
      String key = suggestScoreDoc.key.toString();
      assertTrue(key.startsWith("abc_"));
      String substring = key.substring(4);
      int fieldValue = Integer.parseInt(substring);
      StoredDocument doc = reader.document(suggestScoreDoc.doc);
      assertEquals(doc.getField("int_field").numericValue().intValue(), fieldValue);
    }

    reader.close();
    iw.close();
  }
Exemplo n.º 10
0
  private static SolrInputDocument toSolrInputDocument(StoredDocument doc, IndexSchema schema) {
    SolrInputDocument out = new SolrInputDocument();
    for (StorableField f : doc.getFields()) {
      String fname = f.name();
      SchemaField sf = schema.getFieldOrNull(f.name());
      Object val = null;
      if (sf != null) {
        if (!sf.stored() || schema.isCopyFieldTarget(sf)) continue;
        val = sf.getType().toObject(f); // object or external string?
      } else {
        val = f.stringValue();
        if (val == null) val = f.numericValue();
        if (val == null) val = f.binaryValue();
        if (val == null) val = f;
      }

      // todo: how to handle targets of copy fields (including polyfield sub-fields)?
      out.addField(fname, val);
    }
    return out;
  }
Exemplo n.º 11
0
  private static SolrDocument toSolrDoc(StoredDocument doc, IndexSchema schema) {
    SolrDocument out = new SolrDocument();
    for (StorableField f : doc.getFields()) {
      // Make sure multivalued fields are represented as lists
      Object existing = out.get(f.name());
      if (existing == null) {
        SchemaField sf = schema.getFieldOrNull(f.name());

        // don't return copyField targets
        if (sf != null && schema.isCopyFieldTarget(sf)) continue;

        if (sf != null && sf.multiValued()) {
          List<Object> vals = new ArrayList<Object>();
          vals.add(f);
          out.setField(f.name(), vals);
        } else {
          out.setField(f.name(), f);
        }
      } else {
        out.addField(f.name(), f);
      }
    }
    return out;
  }
  protected NamedList serializeTopGroups(TopGroups<BytesRef> data, SchemaField groupField)
      throws IOException {
    NamedList<Object> result = new NamedList<>();
    result.add("totalGroupedHitCount", data.totalGroupedHitCount);
    result.add("totalHitCount", data.totalHitCount);
    if (data.totalGroupCount != null) {
      result.add("totalGroupCount", data.totalGroupCount);
    }
    CharsRef spare = new CharsRef();

    final IndexSchema schema = rb.req.getSearcher().getSchema();
    SchemaField uniqueField = schema.getUniqueKeyField();
    for (GroupDocs<BytesRef> searchGroup : data.groups) {
      NamedList<Object> groupResult = new NamedList<>();
      groupResult.add("totalHits", searchGroup.totalHits);
      if (!Float.isNaN(searchGroup.maxScore)) {
        groupResult.add("maxScore", searchGroup.maxScore);
      }

      List<NamedList<Object>> documents = new ArrayList<>();
      for (int i = 0; i < searchGroup.scoreDocs.length; i++) {
        NamedList<Object> document = new NamedList<>();
        documents.add(document);

        StoredDocument doc = retrieveDocument(uniqueField, searchGroup.scoreDocs[i].doc);
        document.add("id", uniqueField.getType().toExternal(doc.getField(uniqueField.getName())));
        if (!Float.isNaN(searchGroup.scoreDocs[i].score)) {
          document.add("score", searchGroup.scoreDocs[i].score);
        }
        if (!(searchGroup.scoreDocs[i] instanceof FieldDoc)) {
          continue;
        }

        FieldDoc fieldDoc = (FieldDoc) searchGroup.scoreDocs[i];
        Object[] convertedSortValues = new Object[fieldDoc.fields.length];
        for (int j = 0; j < fieldDoc.fields.length; j++) {
          Object sortValue = fieldDoc.fields[j];
          Sort sortWithinGroup = rb.getGroupingSpec().getSortWithinGroup();
          SchemaField field =
              sortWithinGroup.getSort()[j].getField() != null
                  ? schema.getFieldOrNull(sortWithinGroup.getSort()[j].getField())
                  : null;
          if (field != null) {
            FieldType fieldType = field.getType();
            if (sortValue != null) {
              sortValue = fieldType.marshalSortValue(sortValue);
            }
          }
          convertedSortValues[j] = sortValue;
        }
        document.add("sortValues", convertedSortValues);
      }
      groupResult.add("documents", documents);
      String groupValue =
          searchGroup.groupValue != null
              ? groupField.getType().indexedToReadable(searchGroup.groupValue.utf8ToString())
              : null;
      result.add(groupValue, groupResult);
    }

    return result;
  }