// Builds necessary utf8 edges between start & end void convertOneEdge(State start, State end, int startCodePoint, int endCodePoint) { startUTF8.set(startCodePoint); endUTF8.set(endCodePoint); // System.out.println("start = " + startUTF8); // System.out.println(" end = " + endUTF8); build(start, end, startUTF8, endUTF8, 0); }
private void build( State start, State end, UTF8Sequence startUTF8, UTF8Sequence endUTF8, int upto) { // Break into start, middle, end: if (startUTF8.byteAt(upto) == endUTF8.byteAt(upto)) { // Degen case: lead with the same byte: if (upto == startUTF8.len - 1 && upto == endUTF8.len - 1) { // Super degen: just single edge, one UTF8 byte: start.addTransition(new Transition(startUTF8.byteAt(upto), endUTF8.byteAt(upto), end)); return; } else { assert startUTF8.len > upto + 1; assert endUTF8.len > upto + 1; State n = newUTF8State(); // Single value leading edge start.addTransition(new Transition(startUTF8.byteAt(upto), n)); // type=single // Recurse for the rest build(n, end, startUTF8, endUTF8, 1 + upto); } } else if (startUTF8.len == endUTF8.len) { if (upto == startUTF8.len - 1) { start.addTransition( new Transition(startUTF8.byteAt(upto), endUTF8.byteAt(upto), end)); // type=startend } else { start(start, end, startUTF8, upto, false); if (endUTF8.byteAt(upto) - startUTF8.byteAt(upto) > 1) { // There is a middle all( start, end, startUTF8.byteAt(upto) + 1, endUTF8.byteAt(upto) - 1, startUTF8.len - upto - 1); } end(start, end, endUTF8, upto, false); } } else { // start start(start, end, startUTF8, upto, true); // possibly middle, spanning multiple num bytes int byteCount = 1 + startUTF8.len - upto; final int limit = endUTF8.len - upto; while (byteCount < limit) { // wasteful: we only need first byte, and, we should // statically encode this first byte: tmpUTF8a.set(startCodes[byteCount - 1]); tmpUTF8b.set(endCodes[byteCount - 1]); all(start, end, tmpUTF8a.byteAt(0), tmpUTF8b.byteAt(0), tmpUTF8a.len - 1); byteCount++; } // end end(start, end, endUTF8, upto, true); } }