public static void recoverSrcData( SenseiResult res, SenseiHit[] hits, boolean isFetchStoredFields) { if (hits != null) { for (SenseiHit hit : hits) { try { byte[] dataBytes = hit.getStoredValue(); if (dataBytes == null || dataBytes.length == 0) { Document doc = hit.getStoredFields(); if (doc != null) { dataBytes = doc.getBinaryValue(AbstractZoieIndexable.DOCUMENT_STORE_FIELD); if (dataBytes == null || dataBytes.length == 0) { dataBytes = doc.getBinaryValue(SenseiSchema.SRC_DATA_COMPRESSED_FIELD_NAME); if (dataBytes == null || dataBytes.length == 0) { dataBytes = doc.getBinaryValue(SenseiSchema.SRC_DATA_FIELD_NAME); if (dataBytes != null && dataBytes.length > 0) { hit.setSrcData(new String(dataBytes, "UTF-8")); dataBytes = null; // set to null to avoid gunzip. } } doc.removeFields(SenseiSchema.SRC_DATA_COMPRESSED_FIELD_NAME); doc.removeFields(SenseiSchema.SRC_DATA_FIELD_NAME); } } } if (dataBytes != null && dataBytes.length > 0) { byte[] data; try { data = DefaultJsonSchemaInterpreter.decompress(dataBytes); } catch (Exception ex) { data = dataBytes; } hit.setSrcData(new String(data, "UTF-8")); } } catch (Exception e) { logger.error(e.getMessage(), e); res.getErrors().add(new SenseiError(e.getMessage(), ErrorType.BrokerGatherError)); } recoverSrcData(res, hit.getSenseiGroupHits(), isFetchStoredFields); // Remove stored fields since the user is not requesting: if (!isFetchStoredFields) hit.setStoredFields(null); } } }
@Override public void collect(int doc) throws IOException { String id = fieldData.stringValue(doc); // the _source is the query Document document = reader.document(doc, SourceFieldSelector.INSTANCE); byte[] source = document.getBinaryValue(SourceFieldMapper.NAME); try { queries.put(id, percolator.parseQuery(id, source, 0, source.length)); } catch (Exception e) { logger.warn("failed to add query [{}]", e, id); } }
@Override public void collect(int doc) throws IOException { // the _source is the query Document document = reader.document(doc, new UidAndSourceFieldSelector()); String id = Uid.createUid(document.get(UidFieldMapper.NAME)).id(); byte[] source = document.getBinaryValue(SourceFieldMapper.NAME); try { queries.put(id, percolator.parseQuery(id, source, 0, source.length)); } catch (Exception e) { logger.warn("failed to add query [{}]", e, id); } }
@Override protected byte[] getFromStore(long uid) throws IOException { int docid = mapDocId(uid); if (docid < 0) return null; IndexReader reader = null; if (_currentReaderData != null) { reader = _currentReaderData.reader; } if (docid >= 0 && reader != null) { Document doc = reader.document(docid); if (doc != null) { return doc.getBinaryValue(_field); } } return null; }
/** * @param args * @throws IOException * @throws ParseException */ public static void main(String[] args) throws IOException, ParseException { if (args.length > 0) { Config.basePath = new File(args[0]); } IndexReader reader = DirectoryReader.open(NIOFSDirectory.open(new File(Config.basePath, "lucene"))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); QueryParser parser = new QueryParser(Version.LUCENE_44, "city", analyzer); long tm = System.currentTimeMillis(); for (int i = 0; i < RUNS; i++) { long cn = (long) (Math.random() * FillLucene.CAMERAS); String qs = "city" + cn; Query query = parser.parse(qs); TopDocs results = searcher.search(query, 5); ScoreDoc[] hits = results.scoreDocs; if (results.totalHits != 1) { System.out.println("Wrong results num: " + results.totalHits + " Query: " + qs); continue; } Document doc = searcher.doc(hits[0].doc); // System.out.println("City: "+doc.get("city")); // System.out.println("Country: "+doc.get("country")); BytesRef data = doc.getBinaryValue("data"); Camera cam = Camera.load(ByteBuffer.wrap(data.bytes)); if (!cam.getCity().equals(qs)) System.out.println("Invalid result"); } tm = System.currentTimeMillis() - tm; System.out.println("Time: " + StringUtils.millisToString(tm) + " Rate: " + (RUNS * 1000 / tm)); /* Query query = parser.parse("city800000000"); TopDocs results = searcher.search(query, 5); ScoreDoc[] hits = results.scoreDocs; System.out.println("Results: "+results.totalHits); Document doc = searcher.doc(hits[0].doc); System.out.println("City: "+doc.get("city")); System.out.println("Country: "+doc.get("country")); BytesRef data = doc.getBinaryValue("data"); Camera cam = Camera.load( ByteBuffer.wrap(data.bytes) ); System.out.println("Camera: "+cam.getId()+" City: "+cam.getCity()+" Country: "+cam.getCountry()); */ }
@Nightly public void test() throws Exception { MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("4GBStoredFields"))); dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER); IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024); } final Document doc = new Document(); final FieldType ft = new FieldType(); ft.setIndexed(false); ft.setStored(true); ft.freeze(); final int valueLength = RandomInts.randomIntBetween(random(), 1 << 13, 1 << 20); final byte[] value = new byte[valueLength]; for (int i = 0; i < valueLength; ++i) { // random so that even compressing codecs can't compress it value[i] = (byte) random().nextInt(256); } final Field f = new Field("fld", value, ft); doc.add(f); final int numDocs = (int) ((1L << 32) / valueLength + 100); for (int i = 0; i < numDocs; ++i) { w.addDocument(doc); if (VERBOSE && i % (numDocs / 10) == 0) { System.out.println(i + " of " + numDocs + "..."); } } w.forceMerge(1); w.close(); if (VERBOSE) { boolean found = false; for (String file : dir.listAll()) { if (file.endsWith(".fdt")) { final long fileLength = dir.fileLength(file); if (fileLength >= 1L << 32) { found = true; } System.out.println("File length of " + file + " : " + fileLength); } } if (!found) { System.out.println("No .fdt file larger than 4GB, test bug?"); } } DirectoryReader rd = DirectoryReader.open(dir); Document sd = rd.document(numDocs - 1); assertNotNull(sd); assertEquals(1, sd.getFields().size()); BytesRef valueRef = sd.getBinaryValue("fld"); assertNotNull(valueRef); assertEquals(new BytesRef(value), valueRef); rd.close(); dir.close(); }
public void testWriteReadMerge() throws IOException { // get another codec, other than the default: so we are merging segments across different codecs final Codec otherCodec; if ("SimpleText".equals(Codec.getDefault().getName())) { otherCodec = new Lucene46Codec(); } else { otherCodec = new SimpleTextCodec(); } Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf.clone()); final int docCount = atLeast(200); final byte[][][] data = new byte[docCount][][]; for (int i = 0; i < docCount; ++i) { final int fieldCount = rarely() ? RandomInts.randomIntBetween(random(), 1, 500) : RandomInts.randomIntBetween(random(), 1, 5); data[i] = new byte[fieldCount][]; for (int j = 0; j < fieldCount; ++j) { final int length = rarely() ? random().nextInt(1000) : random().nextInt(10); final int max = rarely() ? 256 : 2; data[i][j] = randomByteArray(length, max); } } final FieldType type = new FieldType(StringField.TYPE_STORED); type.setIndexed(false); type.freeze(); IntField id = new IntField("id", 0, Store.YES); for (int i = 0; i < data.length; ++i) { Document doc = new Document(); doc.add(id); id.setIntValue(i); for (int j = 0; j < data[i].length; ++j) { Field f = new Field("bytes" + j, data[i][j], type); doc.add(f); } iw.w.addDocument(doc); if (random().nextBoolean() && (i % (data.length / 10) == 0)) { iw.w.close(); // test merging against a non-compressing codec if (iwConf.getCodec() == otherCodec) { iwConf.setCodec(Codec.getDefault()); } else { iwConf.setCodec(otherCodec); } iw = new RandomIndexWriter(random(), dir, iwConf.clone()); } } for (int i = 0; i < 10; ++i) { final int min = random().nextInt(data.length); final int max = min + random().nextInt(20); iw.deleteDocuments(NumericRangeQuery.newIntRange("id", min, max, true, false)); } iw.forceMerge(2); // force merges with deletions iw.commit(); final DirectoryReader ir = DirectoryReader.open(dir); assertTrue(ir.numDocs() > 0); int numDocs = 0; for (int i = 0; i < ir.maxDoc(); ++i) { final Document doc = ir.document(i); if (doc == null) { continue; } ++numDocs; final int docId = doc.getField("id").numericValue().intValue(); assertEquals(data[docId].length + 1, doc.getFields().size()); for (int j = 0; j < data[docId].length; ++j) { final byte[] arr = data[docId][j]; final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j); final byte[] arr2 = Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length); assertArrayEquals(arr, arr2); } } assertTrue(ir.numDocs() <= numDocs); ir.close(); iw.deleteAll(); iw.commit(); iw.forceMerge(1); iw.close(); dir.close(); }
public byte[] toData(int documentId) throws IOException { Document document = searcher.doc(documentId); return document.getBinaryValue(FieldNames.DATA); }