Java UTF8Sequence Examples

Programming Language: Java

Class/Type: UTF8Sequence

Examples at hotexamples.com: 4

Java UTF8Sequence - 4 examples found. These are the top rated real world Java examples of UTF8Sequence extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

byteAt(3)

numBits(2)

set(2)

Example #1

Show file

File: UTF32ToUTF8.java Project: RedbeardTheNinja/ClusterBasedRelevanceFeedback

 // Builds necessary utf8 edges between start & end
 void convertOneEdge(State start, State end, int startCodePoint, int endCodePoint) {
   startUTF8.set(startCodePoint);
   endUTF8.set(endCodePoint);
   // System.out.println("start = " + startUTF8);
   // System.out.println("  end = " + endUTF8);
   build(start, end, startUTF8, endUTF8, 0);
 }

Example #2

Show file

File: UTF32ToUTF8.java Project: RedbeardTheNinja/ClusterBasedRelevanceFeedback

 private void end(State start, State end, UTF8Sequence utf8, int upto, boolean doAll) {
   if (upto == utf8.len - 1) {
     // Done recursing
     start.addTransition(
         new Transition(
             utf8.byteAt(upto) & (~MASKS[utf8.numBits(upto) - 1]),
             utf8.byteAt(upto),
             end)); // type=end
   } else {
     final int startCode;
     if (utf8.numBits(upto) == 5) {
       // special case -- avoid created unused edges (utf8
       // doesn't accept certain byte sequences) -- there
       // are other cases we could optimize too:
       startCode = 194;
     } else {
       startCode = utf8.byteAt(upto) & (~MASKS[utf8.numBits(upto) - 1]);
     }
     if (doAll && utf8.byteAt(upto) != startCode) {
       all(start, end, startCode, utf8.byteAt(upto) - 1, utf8.len - upto - 1);
     }
     State n = newUTF8State();
     start.addTransition(new Transition(utf8.byteAt(upto), n)); // type=end
     end(n, end, utf8, 1 + upto, true);
   }
 }

Example #3

Show file

File: UTF32ToUTF8.java Project: RedbeardTheNinja/ClusterBasedRelevanceFeedback

 private void start(State start, State end, UTF8Sequence utf8, int upto, boolean doAll) {
   if (upto == utf8.len - 1) {
     // Done recursing
     start.addTransition(
         new Transition(
             utf8.byteAt(upto),
             utf8.byteAt(upto) | MASKS[utf8.numBits(upto) - 1],
             end)); // type=start
   } else {
     State n = newUTF8State();
     start.addTransition(new Transition(utf8.byteAt(upto), n)); // type=start
     start(n, end, utf8, 1 + upto, true);
     int endCode = utf8.byteAt(upto) | MASKS[utf8.numBits(upto) - 1];
     if (doAll && utf8.byteAt(upto) != endCode) {
       all(start, end, utf8.byteAt(upto) + 1, endCode, utf8.len - upto - 1);
     }
   }
 }

Example #4

Show file

File: UTF32ToUTF8.java Project: RedbeardTheNinja/ClusterBasedRelevanceFeedback

  private void build(
      State start, State end, UTF8Sequence startUTF8, UTF8Sequence endUTF8, int upto) {

    // Break into start, middle, end:
    if (startUTF8.byteAt(upto) == endUTF8.byteAt(upto)) {
      // Degen case: lead with the same byte:
      if (upto == startUTF8.len - 1 && upto == endUTF8.len - 1) {
        // Super degen: just single edge, one UTF8 byte:
        start.addTransition(new Transition(startUTF8.byteAt(upto), endUTF8.byteAt(upto), end));
        return;
      } else {
        assert startUTF8.len > upto + 1;
        assert endUTF8.len > upto + 1;
        State n = newUTF8State();

        // Single value leading edge
        start.addTransition(new Transition(startUTF8.byteAt(upto), n)); // type=single

        // Recurse for the rest
        build(n, end, startUTF8, endUTF8, 1 + upto);
      }
    } else if (startUTF8.len == endUTF8.len) {
      if (upto == startUTF8.len - 1) {
        start.addTransition(
            new Transition(startUTF8.byteAt(upto), endUTF8.byteAt(upto), end)); // type=startend
      } else {
        start(start, end, startUTF8, upto, false);
        if (endUTF8.byteAt(upto) - startUTF8.byteAt(upto) > 1) {
          // There is a middle
          all(
              start,
              end,
              startUTF8.byteAt(upto) + 1,
              endUTF8.byteAt(upto) - 1,
              startUTF8.len - upto - 1);
        }
        end(start, end, endUTF8, upto, false);
      }
    } else {

      // start
      start(start, end, startUTF8, upto, true);

      // possibly middle, spanning multiple num bytes
      int byteCount = 1 + startUTF8.len - upto;
      final int limit = endUTF8.len - upto;
      while (byteCount < limit) {
        // wasteful: we only need first byte, and, we should
        // statically encode this first byte:
        tmpUTF8a.set(startCodes[byteCount - 1]);
        tmpUTF8b.set(endCodes[byteCount - 1]);
        all(start, end, tmpUTF8a.byteAt(0), tmpUTF8b.byteAt(0), tmpUTF8a.len - 1);
        byteCount++;
      }

      // end
      end(start, end, endUTF8, upto, true);
    }
  }