@NotNull private static Pair<CharSequence, String> convertLineSeparators(@NotNull CharBuffer buffer) { int dst = 0; char prev = ' '; int crCount = 0; int lfCount = 0; int crlfCount = 0; final int length = buffer.length(); final char[] bufferArray = CharArrayUtil.fromSequenceWithoutCopying(buffer); for (int src = 0; src < length; src++) { char c = bufferArray != null ? bufferArray[src] : buffer.charAt(src); switch (c) { case '\r': if (bufferArray != null) bufferArray[dst++] = '\n'; else buffer.put(dst++, '\n'); crCount++; break; case '\n': if (prev == '\r') { crCount--; crlfCount++; } else { if (bufferArray != null) bufferArray[dst++] = '\n'; else buffer.put(dst++, '\n'); lfCount++; } break; default: if (bufferArray != null) bufferArray[dst++] = c; else buffer.put(dst++, c); break; } prev = c; } String detectedLineSeparator = null; if (crlfCount > crCount && crlfCount > lfCount) { detectedLineSeparator = "\r\n"; } else if (crCount > lfCount) { detectedLineSeparator = "\r"; } else if (lfCount > 0) { detectedLineSeparator = "\n"; } CharSequence result; if (buffer.length() == dst) { result = buffer; } else { // in Mac JDK CharBuffer.subSequence() signature differs from Oracle's // more than that, the signature has changed between jd6 and jdk7, // so use more generic CharSequence.subSequence() just in case @SuppressWarnings("UnnecessaryLocalVariable") CharSequence seq = buffer; result = seq.subSequence(0, dst); } return Pair.create(result, detectedLineSeparator); }
private void checkSplitSequence() { int len = buffer.position(); buffer.position(0); // Search for the first & or \ in the last 10 (or less) characters prevBuf = null; int j = 0; for (int i = len - 1; ((i >= 0) && (j < 10)); i--) { if ((buffer.charAt(i) == '&') || (buffer.charAt(i) == '\\')) { prevBuf = buffer.subSequence(i, len).toString(); len = i; break; } j++; } buffer.position(len); buffer.limit(len); }
/** * Check if the first X characters of a byte stream match a String. * * @param data The byte array to process * @param pattern The String to match * @return True if the pattern was found, false otherwise */ private static boolean bytesEqualsString(byte[] data, String pattern) { byte[] bytes = new byte[pattern.length()]; Charset csets = Charset.forName("US-ASCII"); boolean fin = false; int currChar = 0; // remove any CR and/or LF characters at the beginning of the article // data while (!fin) { if (currChar >= data.length) break; byte in = data[currChar]; ByteBuffer bb = ByteBuffer.wrap(new byte[] {(byte) in}); CharBuffer cb = csets.decode(bb); char c = cb.charAt(0); if (data.length > 0 && (c == '\n' || c == '\r')) currChar++; else fin = true; if (data.length == 0) fin = true; } // extract bytes (chars) to check from article data for (int i = 0; i < bytes.length && i < data.length; i++, currChar++) { byte in = data[currChar]; bytes[i] = (byte) in; } // decode byte data to characters ByteBuffer bb = ByteBuffer.wrap(bytes); CharBuffer cb = csets.decode(bb); // compare these characters to the pattern String for (int i = 0; i < pattern.length(); i++) if (cb.charAt(i) != pattern.charAt(i)) return false; return true; }
public static void main(String[] args) throws Exception { System.out.println(">>> StringCharBufferSliceTest-main: testing the slice method..."); final String in = "for testing"; System.out.println(">>> StringCharBufferSliceTest-main: testing with the position 0."); CharBuffer buff = CharBuffer.wrap(in); test(buff, buff.slice()); System.out.println(">>> StringCharBufferSliceTest-main: testing with new position."); buff.position(2); test(buff, buff.slice()); System.out.println( ">>> StringCharBufferSliceTest-main: testing with non zero initial position."); buff = CharBuffer.wrap(in, 3, in.length()); test(buff, buff.slice()); System.out.println(">>> StringCharBufferSliceTest-main: testing slice result with get()"); buff.position(4); buff.limit(7); CharBuffer slice = buff.slice(); for (int i = 0; i < 3; i++) { if (slice.get() != buff.get()) { throw new RuntimeException("Wrong characters in slice result."); } } System.out.println(">>> StringCharBufferSliceTest-main: testing slice result with get(int)"); buff.position(4); buff.limit(7); slice = buff.slice(); for (int i = 0; i < 3; i++) { if (slice.get(i) != buff.get(4 + i)) { throw new RuntimeException("Wrong characters in slice result."); } } System.out.println(">>> StringCharBufferSliceTest-main: testing slice with result of slice"); buff.position(0); buff.limit(buff.capacity()); slice = buff.slice(); for (int i = 0; i < 4; i++) { slice.position(i); CharBuffer nextSlice = slice.slice(); if (nextSlice.position() != 0) throw new RuntimeException("New buffer's position should be zero"); if (!nextSlice.equals(slice)) throw new RuntimeException("New buffer should be equal"); slice = nextSlice; } System.out.println(">>> StringCharBufferSliceTest-main: testing toString."); buff.position(4); buff.limit(7); slice = buff.slice(); if (!slice.toString().equals("tes")) { throw new RuntimeException("bad toString() after slice(): " + slice.toString()); } System.out.println(">>> StringCharBufferSliceTest-main: testing subSequence."); buff.position(4); buff.limit(8); slice = buff.slice(); CharSequence subSeq = slice.subSequence(1, 3); if (subSeq.charAt(0) != 'e' || subSeq.charAt(1) != 's') { throw new RuntimeException("bad subSequence() after slice(): '" + subSeq + "'"); } System.out.println(">>> StringCharBufferSliceTest-main: testing duplicate."); buff.position(4); buff.limit(8); slice = buff.slice(); CharBuffer dupe = slice.duplicate(); if (dupe.charAt(0) != 't' || dupe.charAt(1) != 'e' || dupe.charAt(2) != 's' || dupe.charAt(3) != 't') { throw new RuntimeException("bad duplicate() after slice(): '" + dupe + "'"); } System.out.println(">>> StringCharBufferSliceTest-main: done!"); }