@Override public void build(InputIterator tfit) throws IOException { if (tfit.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } root = new TernaryTreeNode(); // buffer first if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) { // make sure it's sorted and the comparator uses UTF16 sort order tfit = new SortedInputIterator(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); } ArrayList<String> tokens = new ArrayList<String>(); ArrayList<Number> vals = new ArrayList<Number>(); BytesRef spare; CharsRef charsSpare = new CharsRef(); while ((spare = tfit.next()) != null) { charsSpare.grow(spare.length); UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare); tokens.add(charsSpare.toString()); vals.add(Long.valueOf(tfit.weight())); } autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root); }
@Override public void build(InputIterator iterator) throws IOException { if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } if (iterator.hasContexts()) { throw new IllegalArgumentException("this suggester doesn't support contexts"); } count = 0; trie = new JaspellTernarySearchTrie(); trie.setMatchAlmostDiff(editDistance); BytesRef spare; final CharsRefBuilder charsSpare = new CharsRefBuilder(); while ((spare = iterator.next()) != null) { final long weight = iterator.weight(); if (spare.length == 0) { continue; } charsSpare.copyUTF8Bytes(spare); trie.put(charsSpare.toString(), Long.valueOf(weight)); count++; } }
@Override public void build(InputIterator iter) throws IOException { if (searcherMgr != null) { searcherMgr.close(); searcherMgr = null; } if (writer != null) { writer.close(); writer = null; } AtomicReader r = null; boolean success = false; try { // First pass: build a temporary normal Lucene index, // just indexing the suggestions as they iterate: writer = new IndexWriter( dir, getIndexWriterConfig( matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE)); BytesRef text; Document doc = new Document(); FieldType ft = getTextFieldType(); Field textField = new Field(TEXT_FIELD_NAME, "", ft); doc.add(textField); Field textGramField = new Field("textgrams", "", ft); doc.add(textGramField); Field exactTextField = new StringField(EXACT_TEXT_FIELD_NAME, "", Field.Store.NO); doc.add(exactTextField); Field textDVField = new BinaryDocValuesField(TEXT_FIELD_NAME, new BytesRef()); doc.add(textDVField); // TODO: use threads...? Field weightField = new NumericDocValuesField("weight", 0L); doc.add(weightField); Field payloadField; if (iter.hasPayloads()) { payloadField = new BinaryDocValuesField("payloads", new BytesRef()); doc.add(payloadField); } else { payloadField = null; } // long t0 = System.nanoTime(); while ((text = iter.next()) != null) { String textString = text.utf8ToString(); textField.setStringValue(textString); exactTextField.setStringValue(textString); textGramField.setStringValue(textString); textDVField.setBytesValue(text); weightField.setLongValue(iter.weight()); if (iter.hasPayloads()) { payloadField.setBytesValue(iter.payload()); } writer.addDocument(doc); } // System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec"); searcherMgr = new SearcherManager(writer, true, null); success = true; } finally { if (success) { IOUtils.close(r); } else { IOUtils.closeWhileHandlingException(writer, r); writer = null; } } }