Пример #1
0
  private void runTest(Class<? extends Lookup> lookupClass, boolean supportsExactWeights)
      throws Exception {

    // Add all input keys.
    Lookup lookup = lookupClass.newInstance();
    TermFreq[] keys = new TermFreq[this.keys.length];
    for (int i = 0; i < keys.length; i++) keys[i] = new TermFreq(this.keys[i], i);
    lookup.build(new TermFreqArrayIterator(keys));

    // Store the suggester.
    File storeDir = TEMP_DIR;
    lookup.store(new FileOutputStream(new File(storeDir, "lookup.dat")));

    // Re-read it from disk.
    lookup = lookupClass.newInstance();
    lookup.load(new FileInputStream(new File(storeDir, "lookup.dat")));

    // Assert validity.
    Random random = random();
    long previous = Long.MIN_VALUE;
    for (TermFreq k : keys) {
      List<LookupResult> list =
          lookup.lookup(_TestUtil.bytesToCharSequence(k.term, random), false, 1);
      assertEquals(1, list.size());
      LookupResult lookupResult = list.get(0);
      assertNotNull(k.term.utf8ToString(), lookupResult.key);

      if (supportsExactWeights) {
        assertEquals(k.term.utf8ToString(), k.v, lookupResult.value);
      } else {
        assertTrue(lookupResult.value + ">=" + previous, lookupResult.value >= previous);
        previous = lookupResult.value;
      }
    }
  }
Пример #2
0
  @Override
  public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException {
    LOG.info("build()");
    if (sourceLocation == null) {
      reader = searcher.getIndexReader();
      dictionary = new HighFrequencyDictionary(reader, field, threshold);
    } else {
      try {

        final String fileDelim = ",";
        if (sourceLocation.contains(fileDelim)) {
          String[] files = sourceLocation.split(fileDelim);
          Reader[] readers = new Reader[files.length];
          for (int i = 0; i < files.length; i++) {
            Reader reader =
                new InputStreamReader(
                    core.getResourceLoader().openResource(files[i]), IOUtils.CHARSET_UTF_8);
            readers[i] = reader;
          }
          dictionary = new MultipleFileDictionary(readers);
        } else {
          dictionary =
              new FileDictionary(
                  new InputStreamReader(
                      core.getResourceLoader().openResource(sourceLocation),
                      IOUtils.CHARSET_UTF_8));
        }
      } catch (UnsupportedEncodingException e) {
        // should not happen
        LOG.error("should not happen", e);
      }
    }

    lookup.build(dictionary);
    if (storeDir != null) {
      File target = new File(storeDir, factory.storeFileName());
      if (!lookup.store(new FileOutputStream(target))) {
        if (sourceLocation == null) {
          assert reader != null && field != null;
          LOG.error(
              "Store Lookup build from index on field: "
                  + field
                  + " failed reader has: "
                  + reader.maxDoc()
                  + " docs");
        } else {
          LOG.error("Store Lookup build from sourceloaction: " + sourceLocation + " failed");
        }
      } else {
        LOG.info("Stored suggest data to: " + target.getAbsolutePath());
      }
    }
  }
  @Test
  public void testCompletionPostingsFormat() throws IOException {
    AnalyzingCompletionLookupProviderV1 providerV1 =
        new AnalyzingCompletionLookupProviderV1(true, false, true, true);
    AnalyzingCompletionLookupProvider currentProvider =
        new AnalyzingCompletionLookupProvider(true, false, true, true);
    List<Completion090PostingsFormat.CompletionLookupProvider> providers =
        Lists.newArrayList(providerV1, currentProvider);

    Completion090PostingsFormat.CompletionLookupProvider randomProvider =
        providers.get(getRandom().nextInt(providers.size()));
    RAMDirectory dir = new RAMDirectory();
    writeData(dir, randomProvider);

    IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT);
    LookupFactory load = currentProvider.load(input);
    PostingsFormat format = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT);
    NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer());
    Lookup lookup =
        load.getLookup(
            new CompletionFieldMapper(
                new Names("foo"),
                analyzer,
                analyzer,
                format,
                null,
                true,
                true,
                true,
                Integer.MAX_VALUE,
                indexSettings,
                AbstractFieldMapper.MultiFields.empty(),
                null,
                ContextMapping.EMPTY_MAPPING),
            new CompletionSuggestionContext(null));
    List<LookupResult> result = lookup.lookup("ge", false, 10);
    assertThat(result.get(0).key.toString(), equalTo("Generator - Foo Fighters"));
    assertThat(result.get(0).payload.utf8ToString(), equalTo("id:10"));
    dir.close();
  }
Пример #4
0
  @Override
  public String init(NamedList config, SolrCore core) {
    LOG.info("init: " + config);
    String name = super.init(config, core);
    threshold =
        config.get(THRESHOLD_TOKEN_FREQUENCY) == null
            ? 0.0f
            : (Float) config.get(THRESHOLD_TOKEN_FREQUENCY);
    sourceLocation = (String) config.get(LOCATION);
    lookupImpl = (String) config.get(LOOKUP_IMPL);

    IndexSchema schema = core.getLatestSchema();
    suggestionAnalyzerFieldTypeName = (String) config.get(SUGGESTION_ANALYZER_FIELDTYPE);
    if (schema.getFieldTypes().containsKey(suggestionAnalyzerFieldTypeName)) {
      FieldType fieldType = schema.getFieldTypes().get(suggestionAnalyzerFieldTypeName);
      suggestionAnalyzer = fieldType.getQueryAnalyzer();
    }

    // support the old classnames without -Factory for config file backwards compatibility.
    if (lookupImpl == null
        || "org.apache.solr.spelling.suggest.jaspell.JaspellLookup".equals(lookupImpl)) {
      lookupImpl = JaspellLookupFactory.class.getName();
    } else if ("org.apache.solr.spelling.suggest.tst.TSTLookup".equals(lookupImpl)) {
      lookupImpl = TSTLookupFactory.class.getName();
    } else if ("org.apache.solr.spelling.suggest.fst.FSTLookup".equals(lookupImpl)) {
      lookupImpl = FSTLookupFactory.class.getName();
    }

    factory = core.getResourceLoader().newInstance(lookupImpl, LookupFactory.class);

    lookup = factory.create(config, core);
    String store = (String) config.get(STORE_DIR);
    if (store != null) {
      storeDir = new File(store);
      if (!storeDir.isAbsolute()) {
        storeDir = new File(core.getDataDir() + File.separator + storeDir);
      }
      if (!storeDir.exists()) {
        storeDir.mkdirs();
      } else {
        // attempt reload of the stored lookup
        try {
          lookup.load(new FileInputStream(new File(storeDir, factory.storeFileName())));
        } catch (IOException e) {
          LOG.warn("Loading stored lookup data failed", e);
        }
      }
    }
    return name;
  }
  public List<String> autocomplete(String query) {
    List<String> result = new ArrayList<String>();

    String prefix = "";

    query = query.trim();

    if (query.contains(" ")) {
      prefix = query.substring(0, query.lastIndexOf(' ')) + ' ';
      query = query.substring(query.lastIndexOf(' ') + 1);
    }

    try {
      Lookup lookup = new TSTLookup();
      Directory indexPathDir = FSDirectory.open(new File(SearcherConfiguration.getIndexPath()));
      IndexReader ir = DirectoryReader.open(indexPathDir);

      Dictionary dictionary = new LuceneDictionary(ir, WebPage.TITLE);

      lookup.build(dictionary);

      List<LookupResult> resultsList = lookup.lookup(query, false, 30);

      for (LookupResult lr : resultsList) {
        String v = lr.key.toString();
        if (!v.equals(query)) {
          result.add(prefix + v);
        }
      }

    } catch (IOException e) {
      e.printStackTrace();
    }

    return result;
  }
Пример #6
0
 @Override
 public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException {
   LOG.info("reload()");
   if (dictionary == null && storeDir != null) {
     // this may be a firstSearcher event, try loading it
     FileInputStream is = new FileInputStream(new File(storeDir, factory.storeFileName()));
     try {
       if (lookup.load(is)) {
         return; // loaded ok
       }
     } finally {
       IOUtils.closeWhileHandlingException(is);
     }
     LOG.debug("load failed, need to build Lookup again");
   }
   // loading was unsuccessful - build it again
   build(core, searcher);
 }
Пример #7
0
  @Override
  protected Suggest.Suggestion<
          ? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>
      innerExecute(
          String name,
          CompletionSuggestionContext suggestionContext,
          IndexReader indexReader,
          CharsRef spare)
          throws IOException {
    if (suggestionContext.mapper() == null
        || !(suggestionContext.mapper() instanceof CompletionFieldMapper)) {
      throw new ElasticsearchException(
          "Field [" + suggestionContext.getField() + "] is not a completion suggest field");
    }

    CompletionSuggestion completionSuggestion =
        new CompletionSuggestion(name, suggestionContext.getSize());
    UnicodeUtil.UTF8toUTF16(suggestionContext.getText(), spare);

    CompletionSuggestion.Entry completionSuggestEntry =
        new CompletionSuggestion.Entry(new StringText(spare.toString()), 0, spare.length());
    completionSuggestion.addTerm(completionSuggestEntry);

    String fieldName = suggestionContext.getField();
    Map<String, CompletionSuggestion.Entry.Option> results =
        Maps.newHashMapWithExpectedSize(indexReader.leaves().size() * suggestionContext.getSize());
    for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
      AtomicReader atomicReader = atomicReaderContext.reader();
      Terms terms = atomicReader.fields().terms(fieldName);
      if (terms instanceof Completion090PostingsFormat.CompletionTerms) {
        final Completion090PostingsFormat.CompletionTerms lookupTerms =
            (Completion090PostingsFormat.CompletionTerms) terms;
        final Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), suggestionContext);
        if (lookup == null) {
          // we don't have a lookup for this segment.. this might be possible if a merge dropped all
          // docs from the segment that had a value in this segment.
          continue;
        }
        List<Lookup.LookupResult> lookupResults =
            lookup.lookup(spare, false, suggestionContext.getSize());
        for (Lookup.LookupResult res : lookupResults) {

          final String key = res.key.toString();
          final float score = res.value;
          final Option value = results.get(key);
          if (value == null) {
            final Option option =
                new CompletionSuggestion.Entry.Option(
                    new StringText(key),
                    score,
                    res.payload == null ? null : new BytesArray(res.payload));
            results.put(key, option);
          } else if (value.getScore() < score) {
            value.setScore(score);
            value.setPayload(res.payload == null ? null : new BytesArray(res.payload));
          }
        }
      }
    }
    final List<CompletionSuggestion.Entry.Option> options =
        new ArrayList<CompletionSuggestion.Entry.Option>(results.values());
    CollectionUtil.introSort(options, scoreComparator);

    int optionCount = Math.min(suggestionContext.getSize(), options.size());
    for (int i = 0; i < optionCount; i++) {
      completionSuggestEntry.addOption(options.get(i));
    }

    return completionSuggestion;
  }
Пример #8
0
  @Override
  public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
    LOG.debug("getSuggestions: " + options.tokens);
    if (lookup == null) {
      LOG.info("Lookup is null - invoke spellchecker.build first");
      return EMPTY_RESULT;
    }
    SpellingResult res = new SpellingResult();
    CharsRef scratch = new CharsRef();

    for (Token currentToken : options.tokens) {
      scratch.chars = currentToken.buffer();
      scratch.offset = 0;
      scratch.length = currentToken.length();
      boolean onlyMorePopular =
          (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR)
              && !(lookup instanceof WFSTCompletionLookup)
              && !(lookup instanceof AnalyzingSuggester);

      // get more than the requested suggestions as a lot get collapsed by the corrections
      List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count * 10);
      if (suggestions == null || suggestions.size() == 0) {
        continue;
      }

      if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
        Collections.sort(suggestions);
      }

      final LinkedHashMap<String, Integer> lhm = new LinkedHashMap<String, Integer>();
      for (LookupResult lr : suggestions) {
        String suggestion = lr.key.toString();
        if (this.suggestionAnalyzer != null) {
          String correction = getAnalyzerResult(suggestion);
          // multiple could map to the same, so don't repeat suggestions
          if (!isStringNullOrEmpty(correction)) {
            if (lhm.containsKey(correction)) {
              lhm.put(correction, lhm.get(correction) + (int) lr.value);
            } else {
              lhm.put(correction, (int) lr.value);
            }
          }
        } else {
          lhm.put(suggestion, (int) lr.value);
        }

        if (lhm.size() >= options.count) {
          break;
        }
      }

      // sort by new doc frequency
      Map<String, Integer> orderedMap = null;
      if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
        // retain the sort order from above
        orderedMap = lhm;
      } else {
        orderedMap =
            new TreeMap<String, Integer>(
                new Comparator<String>() {
                  @Override
                  public int compare(String s1, String s2) {
                    return lhm.get(s2).compareTo(lhm.get(s1));
                  }
                });
        orderedMap.putAll(lhm);
      }

      for (Map.Entry<String, Integer> entry : orderedMap.entrySet()) {
        res.add(currentToken, entry.getKey(), entry.getValue());
      }
    }
    return res;
  }
  @Test
  public void testDuellCompletions()
      throws IOException, NoSuchFieldException, SecurityException, IllegalArgumentException,
          IllegalAccessException {
    final boolean preserveSeparators = getRandom().nextBoolean();
    final boolean preservePositionIncrements = getRandom().nextBoolean();
    final boolean usePayloads = getRandom().nextBoolean();
    final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0;

    XAnalyzingSuggester reference =
        new XAnalyzingSuggester(
            new StandardAnalyzer(),
            null,
            new StandardAnalyzer(),
            options,
            256,
            -1,
            preservePositionIncrements,
            null,
            false,
            1,
            XAnalyzingSuggester.SEP_LABEL,
            XAnalyzingSuggester.PAYLOAD_SEP,
            XAnalyzingSuggester.END_BYTE,
            XAnalyzingSuggester.HOLE_CHARACTER);
    LineFileDocs docs = new LineFileDocs(getRandom());
    int num = scaledRandomIntBetween(150, 300);
    final String[] titles = new String[num];
    final long[] weights = new long[num];
    for (int i = 0; i < titles.length; i++) {
      Document nextDoc = docs.nextDoc();
      IndexableField field = nextDoc.getField("title");
      titles[i] = field.stringValue();
      weights[i] = between(0, 100);
    }
    docs.close();
    final InputIterator primaryIter =
        new InputIterator() {
          int index = 0;
          long currentWeight = -1;

          @Override
          public BytesRef next() throws IOException {
            if (index < titles.length) {
              currentWeight = weights[index];
              return new BytesRef(titles[index++]);
            }
            return null;
          }

          @Override
          public long weight() {
            return currentWeight;
          }

          @Override
          public BytesRef payload() {
            return null;
          }

          @Override
          public boolean hasPayloads() {
            return false;
          }

          @Override
          public Set<BytesRef> contexts() {
            return null;
          }

          @Override
          public boolean hasContexts() {
            return false;
          }
        };
    InputIterator iter;
    if (usePayloads) {
      iter =
          new InputIterator() {
            @Override
            public long weight() {
              return primaryIter.weight();
            }

            @Override
            public BytesRef next() throws IOException {
              return primaryIter.next();
            }

            @Override
            public BytesRef payload() {
              return new BytesRef(Long.toString(weight()));
            }

            @Override
            public boolean hasPayloads() {
              return true;
            }

            @Override
            public Set<BytesRef> contexts() {
              return null;
            }

            @Override
            public boolean hasContexts() {
              return false;
            }
          };
    } else {
      iter = primaryIter;
    }
    reference.build(iter);
    PostingsFormat provider = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT);

    NamedAnalyzer namedAnalzyer = new NamedAnalyzer("foo", new StandardAnalyzer());
    final CompletionFieldMapper mapper =
        new CompletionFieldMapper(
            new Names("foo"),
            namedAnalzyer,
            namedAnalzyer,
            provider,
            null,
            usePayloads,
            preserveSeparators,
            preservePositionIncrements,
            Integer.MAX_VALUE,
            indexSettings,
            AbstractFieldMapper.MultiFields.empty(),
            null,
            ContextMapping.EMPTY_MAPPING);
    Lookup buildAnalyzingLookup = buildAnalyzingLookup(mapper, titles, titles, weights);
    Field field = buildAnalyzingLookup.getClass().getDeclaredField("maxAnalyzedPathsForOneInput");
    field.setAccessible(true);
    Field refField = reference.getClass().getDeclaredField("maxAnalyzedPathsForOneInput");
    refField.setAccessible(true);
    assertThat(refField.get(reference), equalTo(field.get(buildAnalyzingLookup)));

    for (int i = 0; i < titles.length; i++) {
      int res = between(1, 10);
      final StringBuilder builder = new StringBuilder();
      SuggestUtils.analyze(
          namedAnalzyer.tokenStream("foo", titles[i]),
          new SuggestUtils.TokenConsumer() {
            @Override
            public void nextToken() throws IOException {
              if (builder.length() == 0) {
                builder.append(this.charTermAttr.toString());
              }
            }
          });
      String firstTerm = builder.toString();
      String prefix =
          firstTerm.isEmpty() ? "" : firstTerm.substring(0, between(1, firstTerm.length()));
      List<LookupResult> refLookup = reference.lookup(prefix, false, res);
      List<LookupResult> lookup = buildAnalyzingLookup.lookup(prefix, false, res);
      assertThat(refLookup.toString(), lookup.size(), equalTo(refLookup.size()));
      for (int j = 0; j < refLookup.size(); j++) {
        assertThat(lookup.get(j).key, equalTo(refLookup.get(j).key));
        assertThat(
            "prefix: "
                + prefix
                + " "
                + j
                + " -- missmatch cost: "
                + lookup.get(j).key
                + " - "
                + lookup.get(j).value
                + " | "
                + refLookup.get(j).key
                + " - "
                + refLookup.get(j).value,
            lookup.get(j).value,
            equalTo(refLookup.get(j).value));
        assertThat(lookup.get(j).payload, equalTo(refLookup.get(j).payload));
        if (usePayloads) {
          assertThat(
              lookup.get(j).payload.utf8ToString(), equalTo(Long.toString(lookup.get(j).value)));
        }
      }
    }
  }