Esempio n. 1
0
 /**
  * This method ensures that the output String has only valid XML unicode characters as specified
  * by the XML 1.0 standard. For reference, please see the standard. This method will return an
  * empty String if the input is null or empty.
  *
  * @author Donoiu Cristian, GPL The String whose non-valid characters we want to remove. in
  *     String, stripped of non-valid characters.
  */
 public static String removeInvalidXMLCharacters(String s) {
   StringBuilder out = new StringBuilder(); // Used to hold the output.
   int codePoint; // Used to reference the current character.
   // String ss = "\ud801\udc00";                           // This is actualy one unicode
   // character, represented by two code units!!!.
   // System.out.println(ss.codePointCount(0, ss.length()));// See: 1
   int i = 0;
   while (i < s.length()) {
     // System.out.println("i=" + i);
     codePoint = s.codePointAt(i); // This is the unicode code of the character.
     if ((codePoint == 0x9)
         || // Consider testing larger ranges first to improve speed.
         (codePoint == 0xA)
         || (codePoint == 0xD)
         || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
         || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
         || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {
       out.append(Character.toChars(codePoint));
     }
     i +=
         Character.charCount(
             codePoint); // Increment with the number of code units(java chars) needed to represent
                         // a Unicode char.
   }
   return out.toString();
 }