Java BytesRef Examples, BytesRef Java Examples

Example #1

0

Show file

File: ByteBlockPool.java Project: szitnik/SoftwareAnalysis

 /**
  * Dereferences the byte block according to {@link BytesRef} offset. The offset is interpreted as
  * the absolute offset into the {@link ByteBlockPool}.
  */
 public final BytesRef deref(BytesRef bytes) {
   final int offset = bytes.offset;
   byte[] buffer = buffers[offset >> BYTE_BLOCK_SHIFT];
   int pos = offset & BYTE_BLOCK_MASK;
   bytes.bytes = buffer;
   bytes.offset = pos;
   return bytes;
 }

Example #2

0

Show file

File: ByteBlockPool.java Project: szitnik/SoftwareAnalysis

 // Fill in a BytesRef from term's length & bytes encoded in
 // byte block
 public final BytesRef setBytesRef(BytesRef term, int textStart) {
   final byte[] bytes = term.bytes = buffers[textStart >> BYTE_BLOCK_SHIFT];
   int pos = textStart & BYTE_BLOCK_MASK;
   if ((bytes[pos] & 0x80) == 0) {
     // length is 1 byte
     term.length = bytes[pos];
     term.offset = pos + 1;
   } else {
     // length is 2 bytes
     term.length = (bytes[pos] & 0x7f) + ((bytes[pos + 1] & 0xff) << 7);
     term.offset = pos + 2;
   }
   assert term.length >= 0;
   return term;
 }

Example #3

0

Show file

File: ByteBlockPool.java Project: szitnik/SoftwareAnalysis

 /**
  * Copies bytes from the pool starting at the given offset with the given length into the given
  * {@link BytesRef} at offset <tt>0</tt> and returns it.
  *
  * <p>Note: this method allows to copy across block boundaries.
  */
 public final BytesRef copyFrom(final BytesRef bytes, final int offset, final int length) {
   bytes.offset = 0;
   bytes.grow(length);
   bytes.length = length;
   int bufferIndex = offset >> BYTE_BLOCK_SHIFT;
   byte[] buffer = buffers[bufferIndex];
   int pos = offset & BYTE_BLOCK_MASK;
   int overflow = (pos + length) - BYTE_BLOCK_SIZE;
   do {
     if (overflow <= 0) {
       System.arraycopy(buffer, pos, bytes.bytes, bytes.offset, bytes.length);
       bytes.length = length;
       bytes.offset = 0;
       break;
     } else {
       final int bytesToCopy = length - overflow;
       System.arraycopy(buffer, pos, bytes.bytes, bytes.offset, bytesToCopy);
       pos = 0;
       bytes.length -= bytesToCopy;
       bytes.offset += bytesToCopy;
       buffer = buffers[++bufferIndex];
       overflow = overflow - BYTE_BLOCK_SIZE;
     }
   } while (true);
   return bytes;
 }

Example #4

0

Show file

File: TestMultiFields.java Project: joseerlang/Lucene-solr

  public void testRandom() throws Exception {

    int num = atLeast(2);
    for (int iter = 0; iter < num; iter++) {
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }

      Directory dir = newDirectory();

      IndexWriter w =
          new IndexWriter(
              dir,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setMergePolicy(NoMergePolicy.COMPOUND_FILES));
      _TestUtil.keepFullyDeletedSegments(w);

      Map<BytesRef, List<Integer>> docs = new HashMap<BytesRef, List<Integer>>();
      Set<Integer> deleted = new HashSet<Integer>();
      List<BytesRef> terms = new ArrayList<BytesRef>();

      int numDocs = _TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER);
      Document doc = new Document();
      Field f = newStringField("field", "", Field.Store.NO);
      doc.add(f);
      Field id = newStringField("id", "", Field.Store.NO);
      doc.add(id);

      boolean onlyUniqueTerms = random().nextBoolean();
      if (VERBOSE) {
        System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
      }
      Set<BytesRef> uniqueTerms = new HashSet<BytesRef>();
      for (int i = 0; i < numDocs; i++) {

        if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) {
          // re-use existing term
          BytesRef term = terms.get(random().nextInt(terms.size()));
          docs.get(term).add(i);
          f.setStringValue(term.utf8ToString());
        } else {
          String s = _TestUtil.randomUnicodeString(random(), 10);
          BytesRef term = new BytesRef(s);
          if (!docs.containsKey(term)) {
            docs.put(term, new ArrayList<Integer>());
          }
          docs.get(term).add(i);
          terms.add(term);
          uniqueTerms.add(term);
          f.setStringValue(s);
        }
        id.setStringValue("" + i);
        w.addDocument(doc);
        if (random().nextInt(4) == 1) {
          w.commit();
        }
        if (i > 0 && random().nextInt(20) == 1) {
          int delID = random().nextInt(i);
          deleted.add(delID);
          w.deleteDocuments(new Term("id", "" + delID));
          if (VERBOSE) {
            System.out.println("TEST: delete " + delID);
          }
        }
      }

      if (VERBOSE) {
        List<BytesRef> termsList = new ArrayList<BytesRef>(uniqueTerms);
        Collections.sort(termsList, BytesRef.getUTF8SortedAsUTF16Comparator());
        System.out.println("TEST: terms in UTF16 order:");
        for (BytesRef b : termsList) {
          System.out.println("  " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b);
          for (int docID : docs.get(b)) {
            if (deleted.contains(docID)) {
              System.out.println("    " + docID + " (deleted)");
            } else {
              System.out.println("    " + docID);
            }
          }
        }
      }

      IndexReader reader = w.getReader();
      w.close();
      if (VERBOSE) {
        System.out.println("TEST: reader=" + reader);
      }

      Bits liveDocs = MultiFields.getLiveDocs(reader);
      for (int delDoc : deleted) {
        assertFalse(liveDocs.get(delDoc));
      }

      for (int i = 0; i < 100; i++) {
        BytesRef term = terms.get(random().nextInt(terms.size()));
        if (VERBOSE) {
          System.out.println(
              "TEST: seek term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
        }

        DocsEnum docsEnum = _TestUtil.docs(random(), reader, "field", term, liveDocs, null, 0);
        assertNotNull(docsEnum);

        for (int docID : docs.get(term)) {
          if (!deleted.contains(docID)) {
            assertEquals(docID, docsEnum.nextDoc());
          }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
      }

      reader.close();
      dir.close();
    }
  }

Example #5

0

Show file

File: FSTCompletionLookup.java Project: sdgdsffdsfff/bookcodes

  @Override
  public void build(TermFreqIterator tfit) throws IOException {
    if (tfit instanceof TermFreqPayloadIterator) {
      throw new IllegalArgumentException("this suggester doesn't support payloads");
    }
    File tempInput =
        File.createTempFile(
            FSTCompletionLookup.class.getSimpleName(), ".input", Sort.defaultTempDir());
    File tempSorted =
        File.createTempFile(
            FSTCompletionLookup.class.getSimpleName(), ".sorted", Sort.defaultTempDir());

    Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
    Sort.ByteSequencesReader reader = null;
    ExternalRefSorter sorter = null;

    // Push floats up front before sequences to sort them. For now, assume they are non-negative.
    // If negative floats are allowed some trickery needs to be done to find their byte order.
    boolean success = false;
    try {
      byte[] buffer = new byte[0];
      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
      BytesRef spare;
      while ((spare = tfit.next()) != null) {
        if (spare.length + 4 >= buffer.length) {
          buffer = ArrayUtil.grow(buffer, spare.length + 4);
        }

        output.reset(buffer);
        output.writeInt(encodeWeight(tfit.weight()));
        output.writeBytes(spare.bytes, spare.offset, spare.length);
        writer.write(buffer, 0, output.getPosition());
      }
      writer.close();

      // We don't know the distribution of scores and we need to bucket them, so we'll sort
      // and divide into equal buckets.
      SortInfo info = new Sort().sort(tempInput, tempSorted);
      tempInput.delete();
      FSTCompletionBuilder builder =
          new FSTCompletionBuilder(
              buckets, sorter = new ExternalRefSorter(new Sort()), sharedTailLength);

      final int inputLines = info.lines;
      reader = new Sort.ByteSequencesReader(tempSorted);
      long line = 0;
      int previousBucket = 0;
      int previousScore = 0;
      ByteArrayDataInput input = new ByteArrayDataInput();
      BytesRef tmp1 = new BytesRef();
      BytesRef tmp2 = new BytesRef();
      while (reader.read(tmp1)) {
        input.reset(tmp1.bytes);
        int currentScore = input.readInt();

        int bucket;
        if (line > 0 && currentScore == previousScore) {
          bucket = previousBucket;
        } else {
          bucket = (int) (line * buckets / inputLines);
        }
        previousScore = currentScore;
        previousBucket = bucket;

        // Only append the input, discard the weight.
        tmp2.bytes = tmp1.bytes;
        tmp2.offset = input.getPosition();
        tmp2.length = tmp1.length - input.getPosition();
        builder.add(tmp2, bucket);

        line++;
      }

      // The two FSTCompletions share the same automaton.
      this.higherWeightsCompletion = builder.build();
      this.normalCompletion =
          new FSTCompletion(higherWeightsCompletion.getFST(), false, exactMatchFirst);

      success = true;
    } finally {
      if (success) IOUtils.close(reader, writer, sorter);
      else IOUtils.closeWhileHandlingException(reader, writer, sorter);

      tempInput.delete();
      tempSorted.delete();
    }
  }

Example #6

0

Show file

File: TestStressIndexing2.java Project: kushal256/heliosearch

  public void verifyEquals(DirectoryReader r1, DirectoryReader r2, String idField)
      throws Throwable {
    if (VERBOSE) {
      System.out.println("\nr1 docs:");
      printDocs(r1);
      System.out.println("\nr2 docs:");
      printDocs(r2);
    }
    if (r1.numDocs() != r2.numDocs()) {
      assert false : "r1.numDocs()=" + r1.numDocs() + " vs r2.numDocs()=" + r2.numDocs();
    }
    boolean hasDeletes = !(r1.maxDoc() == r2.maxDoc() && r1.numDocs() == r1.maxDoc());

    int[] r2r1 = new int[r2.maxDoc()]; // r2 id to r1 id mapping

    // create mapping from id2 space to id2 based on idField
    final Fields f1 = MultiFields.getFields(r1);
    if (f1 == null) {
      // make sure r2 is empty
      assertNull(MultiFields.getFields(r2));
      return;
    }
    final Terms terms1 = f1.terms(idField);
    if (terms1 == null) {
      assertTrue(
          MultiFields.getFields(r2) == null || MultiFields.getFields(r2).terms(idField) == null);
      return;
    }
    final TermsEnum termsEnum = terms1.iterator(null);

    final Bits liveDocs1 = MultiFields.getLiveDocs(r1);
    final Bits liveDocs2 = MultiFields.getLiveDocs(r2);

    Fields fields = MultiFields.getFields(r2);
    if (fields == null) {
      // make sure r1 is in fact empty (eg has only all
      // deleted docs):
      Bits liveDocs = MultiFields.getLiveDocs(r1);
      DocsEnum docs = null;
      while (termsEnum.next() != null) {
        docs = TestUtil.docs(random(), termsEnum, liveDocs, docs, DocsEnum.FLAG_NONE);
        while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          fail("r1 is not empty but r2 is");
        }
      }
      return;
    }
    Terms terms2 = fields.terms(idField);
    TermsEnum termsEnum2 = terms2.iterator(null);

    DocsEnum termDocs1 = null;
    DocsEnum termDocs2 = null;

    while (true) {
      BytesRef term = termsEnum.next();
      // System.out.println("TEST: match id term=" + term);
      if (term == null) {
        break;
      }

      termDocs1 = TestUtil.docs(random(), termsEnum, liveDocs1, termDocs1, DocsEnum.FLAG_NONE);
      if (termsEnum2.seekExact(term)) {
        termDocs2 = TestUtil.docs(random(), termsEnum2, liveDocs2, termDocs2, DocsEnum.FLAG_NONE);
      } else {
        termDocs2 = null;
      }

      if (termDocs1.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
        // This doc is deleted and wasn't replaced
        assertTrue(termDocs2 == null || termDocs2.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
        continue;
      }

      int id1 = termDocs1.docID();
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, termDocs1.nextDoc());

      assertTrue(termDocs2.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      int id2 = termDocs2.docID();
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, termDocs2.nextDoc());

      r2r1[id2] = id1;

      // verify stored fields are equivalent
      try {
        verifyEquals(r1.document(id1), r2.document(id2));
      } catch (Throwable t) {
        System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
        System.out.println("  d1=" + r1.document(id1));
        System.out.println("  d2=" + r2.document(id2));
        throw t;
      }

      try {
        // verify term vectors are equivalent
        verifyEquals(r1.getTermVectors(id1), r2.getTermVectors(id2));
      } catch (Throwable e) {
        System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
        Fields tv1 = r1.getTermVectors(id1);
        System.out.println("  d1=" + tv1);
        if (tv1 != null) {
          DocsAndPositionsEnum dpEnum = null;
          DocsEnum dEnum = null;
          for (String field : tv1) {
            System.out.println("    " + field + ":");
            Terms terms3 = tv1.terms(field);
            assertNotNull(terms3);
            TermsEnum termsEnum3 = terms3.iterator(null);
            BytesRef term2;
            while ((term2 = termsEnum3.next()) != null) {
              System.out.println(
                  "      " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
              dpEnum = termsEnum3.docsAndPositions(null, dpEnum);
              if (dpEnum != null) {
                assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                final int freq = dpEnum.freq();
                System.out.println("        doc=" + dpEnum.docID() + " freq=" + freq);
                for (int posUpto = 0; posUpto < freq; posUpto++) {
                  System.out.println("          pos=" + dpEnum.nextPosition());
                }
              } else {
                dEnum = TestUtil.docs(random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS);
                assertNotNull(dEnum);
                assertTrue(dEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                final int freq = dEnum.freq();
                System.out.println("        doc=" + dEnum.docID() + " freq=" + freq);
              }
            }
          }
        }

        Fields tv2 = r2.getTermVectors(id2);
        System.out.println("  d2=" + tv2);
        if (tv2 != null) {
          DocsAndPositionsEnum dpEnum = null;
          DocsEnum dEnum = null;
          for (String field : tv2) {
            System.out.println("    " + field + ":");
            Terms terms3 = tv2.terms(field);
            assertNotNull(terms3);
            TermsEnum termsEnum3 = terms3.iterator(null);
            BytesRef term2;
            while ((term2 = termsEnum3.next()) != null) {
              System.out.println(
                  "      " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
              dpEnum = termsEnum3.docsAndPositions(null, dpEnum);
              if (dpEnum != null) {
                assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                final int freq = dpEnum.freq();
                System.out.println("        doc=" + dpEnum.docID() + " freq=" + freq);
                for (int posUpto = 0; posUpto < freq; posUpto++) {
                  System.out.println("          pos=" + dpEnum.nextPosition());
                }
              } else {
                dEnum = TestUtil.docs(random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS);
                assertNotNull(dEnum);
                assertTrue(dEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                final int freq = dEnum.freq();
                System.out.println("        doc=" + dEnum.docID() + " freq=" + freq);
              }
            }
          }
        }

        throw e;
      }
    }

    // System.out.println("TEST: done match id");

    // Verify postings
    // System.out.println("TEST: create te1");
    final Fields fields1 = MultiFields.getFields(r1);
    final Iterator<String> fields1Enum = fields1.iterator();
    final Fields fields2 = MultiFields.getFields(r2);
    final Iterator<String> fields2Enum = fields2.iterator();

    String field1 = null, field2 = null;
    TermsEnum termsEnum1 = null;
    termsEnum2 = null;
    DocsEnum docs1 = null, docs2 = null;

    // pack both doc and freq into single element for easy sorting
    long[] info1 = new long[r1.numDocs()];
    long[] info2 = new long[r2.numDocs()];

    for (; ; ) {
      BytesRef term1 = null, term2 = null;

      // iterate until we get some docs
      int len1;
      for (; ; ) {
        len1 = 0;
        if (termsEnum1 == null) {
          if (!fields1Enum.hasNext()) {
            break;
          }
          field1 = fields1Enum.next();
          Terms terms = fields1.terms(field1);
          if (terms == null) {
            continue;
          }
          termsEnum1 = terms.iterator(null);
        }
        term1 = termsEnum1.next();
        if (term1 == null) {
          // no more terms in this field
          termsEnum1 = null;
          continue;
        }

        // System.out.println("TEST: term1=" + term1);
        docs1 = TestUtil.docs(random(), termsEnum1, liveDocs1, docs1, DocsEnum.FLAG_FREQS);
        while (docs1.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          int d = docs1.docID();
          int f = docs1.freq();
          info1[len1] = (((long) d) << 32) | f;
          len1++;
        }
        if (len1 > 0) break;
      }

      // iterate until we get some docs
      int len2;
      for (; ; ) {
        len2 = 0;
        if (termsEnum2 == null) {
          if (!fields2Enum.hasNext()) {
            break;
          }
          field2 = fields2Enum.next();
          Terms terms = fields2.terms(field2);
          if (terms == null) {
            continue;
          }
          termsEnum2 = terms.iterator(null);
        }
        term2 = termsEnum2.next();
        if (term2 == null) {
          // no more terms in this field
          termsEnum2 = null;
          continue;
        }

        // System.out.println("TEST: term1=" + term1);
        docs2 = TestUtil.docs(random(), termsEnum2, liveDocs2, docs2, DocsEnum.FLAG_FREQS);
        while (docs2.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          int d = r2r1[docs2.docID()];
          int f = docs2.freq();
          info2[len2] = (((long) d) << 32) | f;
          len2++;
        }
        if (len2 > 0) break;
      }

      assertEquals(len1, len2);
      if (len1 == 0) break; // no more terms

      assertEquals(field1, field2);
      assertTrue(term1.bytesEquals(term2));

      if (!hasDeletes) assertEquals(termsEnum1.docFreq(), termsEnum2.docFreq());

      assertEquals("len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes, term1, term2);

      // sort info2 to get it into ascending docid
      Arrays.sort(info2, 0, len2);

      // now compare
      for (int i = 0; i < len1; i++) {
        assertEquals(
            "i="
                + i
                + " len="
                + len1
                + " d1="
                + (info1[i] >>> 32)
                + " f1="
                + (info1[i] & Integer.MAX_VALUE)
                + " d2="
                + (info2[i] >>> 32)
                + " f2="
                + (info2[i] & Integer.MAX_VALUE)
                + " field="
                + field1
                + " term="
                + term1.utf8ToString(),
            info1[i],
            info2[i]);
      }
    }
  }