@Override
  public void build(InputIterator tfit) throws IOException {
    if (tfit.hasPayloads()) {
      throw new IllegalArgumentException("this suggester doesn't support payloads");
    }
    root = new TernaryTreeNode();
    // buffer first
    if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
      // make sure it's sorted and the comparator uses UTF16 sort order
      tfit = new SortedInputIterator(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
    }

    ArrayList<String> tokens = new ArrayList<String>();
    ArrayList<Number> vals = new ArrayList<Number>();
    BytesRef spare;
    CharsRef charsSpare = new CharsRef();
    while ((spare = tfit.next()) != null) {
      charsSpare.grow(spare.length);
      UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
      tokens.add(charsSpare.toString());
      vals.add(Long.valueOf(tfit.weight()));
    }
    autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
  }
Beispiel #2
0
  @Override
  public void build(InputIterator iterator) throws IOException {
    if (iterator.hasPayloads()) {
      throw new IllegalArgumentException("this suggester doesn't support payloads");
    }
    if (iterator.hasContexts()) {
      throw new IllegalArgumentException("this suggester doesn't support contexts");
    }
    count = 0;
    trie = new JaspellTernarySearchTrie();
    trie.setMatchAlmostDiff(editDistance);
    BytesRef spare;
    final CharsRefBuilder charsSpare = new CharsRefBuilder();

    while ((spare = iterator.next()) != null) {
      final long weight = iterator.weight();
      if (spare.length == 0) {
        continue;
      }
      charsSpare.copyUTF8Bytes(spare);
      trie.put(charsSpare.toString(), Long.valueOf(weight));
      count++;
    }
  }
  @Override
  public void build(InputIterator iter) throws IOException {

    if (searcherMgr != null) {
      searcherMgr.close();
      searcherMgr = null;
    }

    if (writer != null) {
      writer.close();
      writer = null;
    }

    AtomicReader r = null;
    boolean success = false;
    try {
      // First pass: build a temporary normal Lucene index,
      // just indexing the suggestions as they iterate:
      writer =
          new IndexWriter(
              dir,
              getIndexWriterConfig(
                  matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
      BytesRef text;
      Document doc = new Document();
      FieldType ft = getTextFieldType();
      Field textField = new Field(TEXT_FIELD_NAME, "", ft);
      doc.add(textField);

      Field textGramField = new Field("textgrams", "", ft);
      doc.add(textGramField);

      Field exactTextField = new StringField(EXACT_TEXT_FIELD_NAME, "", Field.Store.NO);
      doc.add(exactTextField);

      Field textDVField = new BinaryDocValuesField(TEXT_FIELD_NAME, new BytesRef());
      doc.add(textDVField);

      // TODO: use threads...?
      Field weightField = new NumericDocValuesField("weight", 0L);
      doc.add(weightField);

      Field payloadField;
      if (iter.hasPayloads()) {
        payloadField = new BinaryDocValuesField("payloads", new BytesRef());
        doc.add(payloadField);
      } else {
        payloadField = null;
      }
      // long t0 = System.nanoTime();
      while ((text = iter.next()) != null) {
        String textString = text.utf8ToString();
        textField.setStringValue(textString);
        exactTextField.setStringValue(textString);
        textGramField.setStringValue(textString);
        textDVField.setBytesValue(text);
        weightField.setLongValue(iter.weight());
        if (iter.hasPayloads()) {
          payloadField.setBytesValue(iter.payload());
        }
        writer.addDocument(doc);
      }
      // System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");

      searcherMgr = new SearcherManager(writer, true, null);
      success = true;
    } finally {
      if (success) {
        IOUtils.close(r);
      } else {
        IOUtils.closeWhileHandlingException(writer, r);
        writer = null;
      }
    }
  }