static void checkMalformed(Charset cs, byte[][] malformed) throws Exception { boolean failed = false; String csn = cs.name(); System.out.printf("Check malformed <%s>...%n", csn); for (boolean direct : new boolean[] {false, true}) { for (byte[] bins : malformed) { int mlen = bins[0]; byte[] bin = Arrays.copyOfRange(bins, 1, bins.length); CoderResult cr = decodeCR(bin, cs, direct); String ashex = ""; for (int i = 0; i < bin.length; i++) { if (i > 0) ashex += " "; ashex += Integer.toString((int) bin[i] & 0xff, 16); } if (!cr.isMalformed()) { System.out.printf( " FAIL(direct=%b): [%s] not malformed. -->cr=%s\n", direct, ashex, cr.toString()); failed = true; } else if (cr.length() != mlen) { System.out.printf( " FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); failed = true; } } } if (failed) throw new RuntimeException("Check malformed failed " + csn); }
/** * Removes leading and trailing whitespaces from the specified token. * * @param token token to be trimmed * @return trimmed token */ public static byte[] trim(final byte[] token) { int s = -1; int e = token.length; while (++s < e) if (token[s] > ' ' || token[s] < 0) break; while (--e > s) if (token[e] > ' ' || token[e] < 0) break; if (++e == token.length && s == 0) return token; return s == e ? EMPTY : Arrays.copyOfRange(token, s, e); }
/** * Returns a partial token. * * @param token input text * @param start start position * @param end end position * @return resulting text */ public static byte[] subtoken(final byte[] token, final int start, final int end) { int s = Math.max(0, start); final int e = Math.min(end, token.length); if (s == 0 && e == token.length) return token; if (s >= e) return EMPTY; int t = Math.max(0, s - 4); for (; t != s && t < e; t += cl(token, t)) { if (t >= s) s = t; } for (; t < e; t += cl(token, t)) ; return Arrays.copyOfRange(token, s, t); }
/** * Returns a substring of the specified token. Note that this method does not correctly split UTF8 * character; use {@link #subtoken} instead. * * @param token input token * @param start start position * @param end end position * @return substring */ public static byte[] substring(final byte[] token, final int start, final int end) { final int s = Math.max(0, start); final int e = Math.min(end, token.length); if (s == 0 && e == token.length) return token; return s >= e ? EMPTY : Arrays.copyOfRange(token, s, e); }