Example #1
0
  public static void recoverSrcData(
      SenseiResult res, SenseiHit[] hits, boolean isFetchStoredFields) {
    if (hits != null) {
      for (SenseiHit hit : hits) {
        try {
          byte[] dataBytes = hit.getStoredValue();
          if (dataBytes == null || dataBytes.length == 0) {
            Document doc = hit.getStoredFields();
            if (doc != null) {
              dataBytes = doc.getBinaryValue(AbstractZoieIndexable.DOCUMENT_STORE_FIELD);

              if (dataBytes == null || dataBytes.length == 0) {
                dataBytes = doc.getBinaryValue(SenseiSchema.SRC_DATA_COMPRESSED_FIELD_NAME);

                if (dataBytes == null || dataBytes.length == 0) {
                  dataBytes = doc.getBinaryValue(SenseiSchema.SRC_DATA_FIELD_NAME);
                  if (dataBytes != null && dataBytes.length > 0) {
                    hit.setSrcData(new String(dataBytes, "UTF-8"));
                    dataBytes = null; // set to null to avoid gunzip.
                  }
                }
                doc.removeFields(SenseiSchema.SRC_DATA_COMPRESSED_FIELD_NAME);
                doc.removeFields(SenseiSchema.SRC_DATA_FIELD_NAME);
              }
            }
          }
          if (dataBytes != null && dataBytes.length > 0) {
            byte[] data;
            try {
              data = DefaultJsonSchemaInterpreter.decompress(dataBytes);
            } catch (Exception ex) {

              data = dataBytes;
            }
            hit.setSrcData(new String(data, "UTF-8"));
          }
        } catch (Exception e) {
          logger.error(e.getMessage(), e);
          res.getErrors().add(new SenseiError(e.getMessage(), ErrorType.BrokerGatherError));
        }

        recoverSrcData(res, hit.getSenseiGroupHits(), isFetchStoredFields);

        // Remove stored fields since the user is not requesting:
        if (!isFetchStoredFields) hit.setStoredFields(null);
      }
    }
  }
 @Override
 public void collect(int doc) throws IOException {
   String id = fieldData.stringValue(doc);
   // the _source is the query
   Document document = reader.document(doc, SourceFieldSelector.INSTANCE);
   byte[] source = document.getBinaryValue(SourceFieldMapper.NAME);
   try {
     queries.put(id, percolator.parseQuery(id, source, 0, source.length));
   } catch (Exception e) {
     logger.warn("failed to add query [{}]", e, id);
   }
 }
 @Override
 public void collect(int doc) throws IOException {
   // the _source is the query
   Document document = reader.document(doc, new UidAndSourceFieldSelector());
   String id = Uid.createUid(document.get(UidFieldMapper.NAME)).id();
   byte[] source = document.getBinaryValue(SourceFieldMapper.NAME);
   try {
     queries.put(id, percolator.parseQuery(id, source, 0, source.length));
   } catch (Exception e) {
     logger.warn("failed to add query [{}]", e, id);
   }
 }
Example #4
0
 @Override
 protected byte[] getFromStore(long uid) throws IOException {
   int docid = mapDocId(uid);
   if (docid < 0) return null;
   IndexReader reader = null;
   if (_currentReaderData != null) {
     reader = _currentReaderData.reader;
   }
   if (docid >= 0 && reader != null) {
     Document doc = reader.document(docid);
     if (doc != null) {
       return doc.getBinaryValue(_field);
     }
   }
   return null;
 }
  /**
   * @param args
   * @throws IOException
   * @throws ParseException
   */
  public static void main(String[] args) throws IOException, ParseException {
    if (args.length > 0) {
      Config.basePath = new File(args[0]);
    }

    IndexReader reader =
        DirectoryReader.open(NIOFSDirectory.open(new File(Config.basePath, "lucene")));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);

    QueryParser parser = new QueryParser(Version.LUCENE_44, "city", analyzer);

    long tm = System.currentTimeMillis();

    for (int i = 0; i < RUNS; i++) {
      long cn = (long) (Math.random() * FillLucene.CAMERAS);

      String qs = "city" + cn;

      Query query = parser.parse(qs);

      TopDocs results = searcher.search(query, 5);
      ScoreDoc[] hits = results.scoreDocs;

      if (results.totalHits != 1) {
        System.out.println("Wrong results num: " + results.totalHits + " Query: " + qs);
        continue;
      }

      Document doc = searcher.doc(hits[0].doc);

      //   System.out.println("City: "+doc.get("city"));
      //   System.out.println("Country: "+doc.get("country"));

      BytesRef data = doc.getBinaryValue("data");

      Camera cam = Camera.load(ByteBuffer.wrap(data.bytes));

      if (!cam.getCity().equals(qs)) System.out.println("Invalid result");
    }

    tm = System.currentTimeMillis() - tm;

    System.out.println("Time: " + StringUtils.millisToString(tm) + " Rate: " + (RUNS * 1000 / tm));

    /*
    Query query = parser.parse("city800000000");

    TopDocs results = searcher.search(query, 5);
    ScoreDoc[] hits = results.scoreDocs;

    System.out.println("Results: "+results.totalHits);

    Document doc = searcher.doc(hits[0].doc);

    System.out.println("City: "+doc.get("city"));
    System.out.println("Country: "+doc.get("country"));

    BytesRef data =  doc.getBinaryValue("data");

    Camera cam = Camera.load( ByteBuffer.wrap(data.bytes) );

    System.out.println("Camera: "+cam.getId()+" City: "+cam.getCity()+" Country: "+cam.getCountry());
    */
  }
  @Nightly
  public void test() throws Exception {
    MockDirectoryWrapper dir =
        new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("4GBStoredFields")));
    dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);

    IndexWriter w =
        new IndexWriter(
            dir,
            new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                .setRAMBufferSizeMB(256.0)
                .setMergeScheduler(new ConcurrentMergeScheduler())
                .setMergePolicy(newLogMergePolicy(false, 10))
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));

    MergePolicy mp = w.getConfig().getMergePolicy();
    if (mp instanceof LogByteSizeMergePolicy) {
      // 1 petabyte:
      ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
    }

    final Document doc = new Document();
    final FieldType ft = new FieldType();
    ft.setIndexed(false);
    ft.setStored(true);
    ft.freeze();
    final int valueLength = RandomInts.randomIntBetween(random(), 1 << 13, 1 << 20);
    final byte[] value = new byte[valueLength];
    for (int i = 0; i < valueLength; ++i) {
      // random so that even compressing codecs can't compress it
      value[i] = (byte) random().nextInt(256);
    }
    final Field f = new Field("fld", value, ft);
    doc.add(f);

    final int numDocs = (int) ((1L << 32) / valueLength + 100);
    for (int i = 0; i < numDocs; ++i) {
      w.addDocument(doc);
      if (VERBOSE && i % (numDocs / 10) == 0) {
        System.out.println(i + " of " + numDocs + "...");
      }
    }
    w.forceMerge(1);
    w.close();
    if (VERBOSE) {
      boolean found = false;
      for (String file : dir.listAll()) {
        if (file.endsWith(".fdt")) {
          final long fileLength = dir.fileLength(file);
          if (fileLength >= 1L << 32) {
            found = true;
          }
          System.out.println("File length of " + file + " : " + fileLength);
        }
      }
      if (!found) {
        System.out.println("No .fdt file larger than 4GB, test bug?");
      }
    }

    DirectoryReader rd = DirectoryReader.open(dir);
    Document sd = rd.document(numDocs - 1);
    assertNotNull(sd);
    assertEquals(1, sd.getFields().size());
    BytesRef valueRef = sd.getBinaryValue("fld");
    assertNotNull(valueRef);
    assertEquals(new BytesRef(value), valueRef);
    rd.close();

    dir.close();
  }
  public void testWriteReadMerge() throws IOException {
    // get another codec, other than the default: so we are merging segments across different codecs
    final Codec otherCodec;
    if ("SimpleText".equals(Codec.getDefault().getName())) {
      otherCodec = new Lucene46Codec();
    } else {
      otherCodec = new SimpleTextCodec();
    }
    Directory dir = newDirectory();
    IndexWriterConfig iwConf =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf.clone());

    final int docCount = atLeast(200);
    final byte[][][] data = new byte[docCount][][];
    for (int i = 0; i < docCount; ++i) {
      final int fieldCount =
          rarely()
              ? RandomInts.randomIntBetween(random(), 1, 500)
              : RandomInts.randomIntBetween(random(), 1, 5);
      data[i] = new byte[fieldCount][];
      for (int j = 0; j < fieldCount; ++j) {
        final int length = rarely() ? random().nextInt(1000) : random().nextInt(10);
        final int max = rarely() ? 256 : 2;
        data[i][j] = randomByteArray(length, max);
      }
    }

    final FieldType type = new FieldType(StringField.TYPE_STORED);
    type.setIndexed(false);
    type.freeze();
    IntField id = new IntField("id", 0, Store.YES);
    for (int i = 0; i < data.length; ++i) {
      Document doc = new Document();
      doc.add(id);
      id.setIntValue(i);
      for (int j = 0; j < data[i].length; ++j) {
        Field f = new Field("bytes" + j, data[i][j], type);
        doc.add(f);
      }
      iw.w.addDocument(doc);
      if (random().nextBoolean() && (i % (data.length / 10) == 0)) {
        iw.w.close();
        // test merging against a non-compressing codec
        if (iwConf.getCodec() == otherCodec) {
          iwConf.setCodec(Codec.getDefault());
        } else {
          iwConf.setCodec(otherCodec);
        }
        iw = new RandomIndexWriter(random(), dir, iwConf.clone());
      }
    }

    for (int i = 0; i < 10; ++i) {
      final int min = random().nextInt(data.length);
      final int max = min + random().nextInt(20);
      iw.deleteDocuments(NumericRangeQuery.newIntRange("id", min, max, true, false));
    }

    iw.forceMerge(2); // force merges with deletions

    iw.commit();

    final DirectoryReader ir = DirectoryReader.open(dir);
    assertTrue(ir.numDocs() > 0);
    int numDocs = 0;
    for (int i = 0; i < ir.maxDoc(); ++i) {
      final Document doc = ir.document(i);
      if (doc == null) {
        continue;
      }
      ++numDocs;
      final int docId = doc.getField("id").numericValue().intValue();
      assertEquals(data[docId].length + 1, doc.getFields().size());
      for (int j = 0; j < data[docId].length; ++j) {
        final byte[] arr = data[docId][j];
        final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j);
        final byte[] arr2 =
            Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length);
        assertArrayEquals(arr, arr2);
      }
    }
    assertTrue(ir.numDocs() <= numDocs);
    ir.close();

    iw.deleteAll();
    iw.commit();
    iw.forceMerge(1);

    iw.close();
    dir.close();
  }
Example #8
0
 public byte[] toData(int documentId) throws IOException {
   Document document = searcher.doc(documentId);
   return document.getBinaryValue(FieldNames.DATA);
 }