private void recordSurrogateError(StringBuilder inputData, int index, char codePoint) throws SnuggleParseException { sessionContext.registerError( new InputError( CoreErrorCode.TTEG05, null, Integer.toHexString(codePoint), Integer.valueOf(index))); inputData.setCharAt(index, ' '); }
private int[] calculateNewlineIndicesAndCheckCodePoints(StringBuilder inputData) throws SnuggleParseException { List<Integer> newlineIndicesBuilder = new ArrayList<Integer>(); newlineIndicesBuilder.add(Integer.valueOf(-1)); char lastChar = 0; char thisChar; /* (16 bit char only) */ int codePoint; /* (Full Unicode code point */ for (int i = 0, length = inputData.length(); i < length; i++, lastChar = thisChar) { thisChar = inputData.charAt(i); if (thisChar == '\n') { newlineIndicesBuilder.add(Integer.valueOf(i)); } if (Character.isHighSurrogate(lastChar)) { if (Character.isLowSurrogate(thisChar)) { codePoint = Character.toCodePoint(lastChar, thisChar); } else { /* Error: last was bad surrogate character */ recordSurrogateError(inputData, i - 1, lastChar); continue; } } else if (Character.isLowSurrogate(thisChar)) { /* Error: this is bad surrogate character */ recordSurrogateError(inputData, i, thisChar); continue; } else { codePoint = thisChar; } /* Check that we allow this codepoint */ if (Character.isISOControl(codePoint) && !(codePoint == '\r' || codePoint == '\n' || codePoint == '\t')) { sessionContext.registerError( new InputError( CoreErrorCode.TTEG02, null, Integer.toHexString(codePoint), Integer.valueOf(i))); inputData.setCharAt(i, ' '); } } /* Make sure last character wasn't surrogate pair starter */ if (Character.isHighSurrogate(lastChar)) { recordSurrogateError(inputData, inputData.length() - 1, lastChar); } /* Finally store newline information */ int[] calculatedNewlineIndices = new int[newlineIndicesBuilder.size()]; for (int i = 0; i < calculatedNewlineIndices.length; i++) { calculatedNewlineIndices[i] = newlineIndicesBuilder.get(i); } return calculatedNewlineIndices; }