/** * Convert a well-formed (but not necessarily valid) XML string into a JSONObject. Some * information may be lost in this transformation because JSON is a data format and XML is a * document format. XML uses elements, attributes, and content text, while JSON uses unordered * collections of name/value pairs and arrays of values. JSON does not does not like to * distinguish between elements and attributes. Sequences of similar elements are represented as * JSONArrays. Content text may be placed in a "content" member. Comments, prologs, DTDs, and * <code><[ [ ]]></code> are ignored. * * @param string The source string. * @return A JSONObject containing the structured data from the XML string. * @throws JSONException */ public static JSONObject toJSONObject(String string) throws JSONException { JSONObject jo = new JSONObject(); XMLTokener x = new XMLTokener(string); while (x.more() && x.skipPast("<")) { parse(x, jo, null); } return jo; }
/** * Convert a well-formed (but not necessarily valid) XML string into a JSONObject. Some * information may be lost in this transformation because JSON is a data format and XML is a * document format. XML uses elements, attributes, and content text, while JSON uses unordered * collections of name/value pairs and arrays of values. JSON does not does not like to * distinguish between elements and attributes. Sequences of similar elements are represented as * JSONArrays. Content text may be placed in a "content" member. Comments, prologs, DTDs, and * <code><[ [ ]]></code> are ignored. * * @param string The source string. * @return A JSONObject containing the structured data from the XML string. * @throws JSONException */ public static JSONObject toJSONObject(final String string) throws JSONException { final JSONObject o = new JSONObject(); final XMLTokener x = new XMLTokener(string); while (x.more() && x.skipPast("<")) { parse(x, o, null); } return o; }
/** * Parse XML values and store them in a JSONArray. * * @param x The XMLTokener containing the source string. * @param arrayForm true if array form, false if object form. * @param ja The JSONArray that is containing the current tag or null if we are at the outermost * level. * @param keepStrings Don't type-convert text nodes and attibute values * @return A JSONArray if the value is the outermost tag, otherwise null. * @throws JSONException */ private static Object parse(XMLTokener x, boolean arrayForm, JSONArray ja, boolean keepStrings) throws JSONException { String attribute; char c; String closeTag = null; int i; JSONArray newja = null; JSONObject newjo = null; Object token; String tagName = null; // Test for and skip past these forms: // <!-- ... --> // <![ ... ]]> // <! ... > // <? ... ?> while (true) { if (!x.more()) { throw x.syntaxError("Bad XML"); } token = x.nextContent(); if (token == XML.LT) { token = x.nextToken(); if (token instanceof Character) { if (token == XML.SLASH) { // Close tag </ token = x.nextToken(); if (!(token instanceof String)) { throw new JSONException("Expected a closing name instead of '" + token + "'."); } if (x.nextToken() != XML.GT) { throw x.syntaxError("Misshaped close tag"); } return token; } else if (token == XML.BANG) { // <! c = x.next(); if (c == '-') { if (x.next() == '-') { x.skipPast("-->"); } else { x.back(); } } else if (c == '[') { token = x.nextToken(); if (token.equals("CDATA") && x.next() == '[') { if (ja != null) { ja.put(x.nextCDATA()); } } else { throw x.syntaxError("Expected 'CDATA['"); } } else { i = 1; do { token = x.nextMeta(); if (token == null) { throw x.syntaxError("Missing '>' after '<!'."); } else if (token == XML.LT) { i += 1; } else if (token == XML.GT) { i -= 1; } } while (i > 0); } } else if (token == XML.QUEST) { // <? x.skipPast("?>"); } else { throw x.syntaxError("Misshaped tag"); } // Open tag < } else { if (!(token instanceof String)) { throw x.syntaxError("Bad tagName '" + token + "'."); } tagName = (String) token; newja = new JSONArray(); newjo = new JSONObject(); if (arrayForm) { newja.put(tagName); if (ja != null) { ja.put(newja); } } else { newjo.put("tagName", tagName); if (ja != null) { ja.put(newjo); } } token = null; for (; ; ) { if (token == null) { token = x.nextToken(); } if (token == null) { throw x.syntaxError("Misshaped tag"); } if (!(token instanceof String)) { break; } // attribute = value attribute = (String) token; if (!arrayForm && ("tagName".equals(attribute) || "childNode".equals(attribute))) { throw x.syntaxError("Reserved attribute."); } token = x.nextToken(); if (token == XML.EQ) { token = x.nextToken(); if (!(token instanceof String)) { throw x.syntaxError("Missing value"); } newjo.accumulate(attribute, keepStrings ? token : XML.stringToValue((String) token)); token = null; } else { newjo.accumulate(attribute, ""); } } if (arrayForm && newjo.length() > 0) { newja.put(newjo); } // Empty tag <.../> if (token == XML.SLASH) { if (x.nextToken() != XML.GT) { throw x.syntaxError("Misshaped tag"); } if (ja == null) { if (arrayForm) { return newja; } else { return newjo; } } // Content, between <...> and </...> } else { if (token != XML.GT) { throw x.syntaxError("Misshaped tag"); } closeTag = (String) parse(x, arrayForm, newja, keepStrings); if (closeTag != null) { if (!closeTag.equals(tagName)) { throw x.syntaxError("Mismatched '" + tagName + "' and '" + closeTag + "'"); } tagName = null; if (!arrayForm && newja.length() > 0) { newjo.put("childNodes", newja); } if (ja == null) { if (arrayForm) { return newja; } else { return newjo; } } } } } } else { if (ja != null) { ja.put( token instanceof String ? keepStrings ? token : XML.stringToValue((String) token) : token); } } } }
/** * Scan the content following the named tag, attaching it to the context. * * @param x The XMLTokener containing the source string. * @param context The JSONObject that will include the new material. * @param name The tag name. * @return true if the close tag is processed. * @throws JSONException */ private static boolean parse(XMLTokener x, JSONObject context, String name) throws JSONException { char c; int i; JSONObject jsonobject = null; String string; String tagName; Object token; // Test for and skip past these forms: // <!-- ... --> // <! ... > // <![ ... ]]> // <? ... ?> // Report errors for these forms: // <> // <= // << token = x.nextToken(); // <! if (token == BANG) { c = x.next(); if (c == '-') { if (x.next() == '-') { x.skipPast("-->"); return false; } x.back(); } else if (c == '[') { token = x.nextToken(); if ("CDATA".equals(token)) { if (x.next() == '[') { string = x.nextCDATA(); if (string.length() > 0) { context.accumulate("content", string); } return false; } } throw x.syntaxError("Expected 'CDATA['"); } i = 1; do { token = x.nextMeta(); if (token == null) { throw x.syntaxError("Missing '>' after '<!'."); } else if (token == LT) { i += 1; } else if (token == GT) { i -= 1; } } while (i > 0); return false; } else if (token == QUEST) { // <? x.skipPast("?>"); return false; } else if (token == SLASH) { // Close tag </ token = x.nextToken(); if (name == null) { throw x.syntaxError("Mismatched close tag " + token); } if (!token.equals(name)) { throw x.syntaxError("Mismatched " + name + " and " + token); } if (x.nextToken() != GT) { throw x.syntaxError("Misshaped close tag"); } return true; } else if (token instanceof Character) { throw x.syntaxError("Misshaped tag"); // Open tag < } else { tagName = (String) token; token = null; jsonobject = new JSONObject(); for (; ; ) { if (token == null) { token = x.nextToken(); } // attribute = value if (token instanceof String) { string = (String) token; token = x.nextToken(); if (token == EQ) { token = x.nextToken(); if (!(token instanceof String)) { throw x.syntaxError("Missing value"); } jsonobject.accumulate(string, JSONObject.stringToValue((String) token)); token = null; } else { jsonobject.accumulate(string, ""); } } else if (token == SLASH) { // Empty tag <.../> if (x.nextToken() != GT) { throw x.syntaxError("Misshaped tag"); } if (jsonobject.length() > 0) { context.accumulate(tagName, jsonobject); } else { context.accumulate(tagName, ""); } return false; } else if (token == GT) { // Content, between <...> and </...> for (; ; ) { token = x.nextContent(); if (token == null) { if (tagName != null) { throw x.syntaxError("Unclosed tag " + tagName); } return false; } else if (token instanceof String) { string = (String) token; if (string.length() > 0) { jsonobject.accumulate("content", JSONObject.stringToValue(string)); } } else if (token == LT) { // Nested element if (parse(x, jsonobject, tagName)) { if (jsonobject.length() == 0) { context.accumulate(tagName, ""); } else if (jsonobject.length() == 1 && jsonobject.opt("content") != null) { context.accumulate(tagName, jsonobject.opt("content")); } else { context.accumulate(tagName, jsonobject); } return false; } } } } else { throw x.syntaxError("Misshaped tag"); } } } }
/* */ private static JSONArray parse(XMLTokener x, JSONArray ja) /* */ throws JSONException /* */ { /* */ while (true) /* */ { /* 63 */ Object t = x.nextContent(); /* 64 */ if (t == XML.LT) { /* 65 */ t = x.nextToken(); /* 66 */ if ((t instanceof Character)) /* */ { /* 70 */ if (t == XML.BANG) { /* 71 */ char c = x.next(); /* 72 */ if (c == '-') { /* 73 */ if (x.next() == '-') { /* 74 */ x.skipPast("-->"); /* */ } /* 76 */ x.back(); continue; /* 77 */ } if (c == '[') { /* 78 */ t = x.nextToken(); /* 79 */ if ((t.equals("CDATA")) && (x.next() == '[')) { /* 80 */ x.nextCDATA(); continue; /* */ } /* 82 */ throw x.syntaxError("Expected 'CDATA['"); /* */ } /* */ /* 85 */ int i = 1; /* */ do { /* 87 */ t = x.nextMeta(); /* 88 */ if (t == null) /* 89 */ throw x.syntaxError("Missing '>' after '<!'."); /* 90 */ if (t == XML.LT) /* 91 */ i++; /* 92 */ else if (t == XML.GT) /* 93 */ i--; /* */ } /* 95 */ while (i > 0); continue; /* */ } /* 97 */ if (t == XML.QUEST) /* */ { /* 101 */ x.skipPast("?>"); continue; /* 102 */ } if (t == XML.SLASH) /* */ { /* 106 */ t = x.nextToken(); /* 107 */ if (ja == null) { /* 108 */ throw x.syntaxError("Mismatched close tag '" + t + "'"); /* */ } /* 110 */ if (!t.equals(ja.get(0))) { /* 111 */ throw x.syntaxError("Mismatched '" + ja.get(0) + "' and '" + t + "'"); /* */ } /* 113 */ if (x.nextToken() != XML.GT) { /* 114 */ throw x.syntaxError("Misshaped close tag"); /* */ } /* 116 */ return null; /* */ } /* 118 */ throw x.syntaxError("Misshaped tag"); /* */ } /* */ /* 124 */ JSONArray newja = new JSONArray(); /* 125 */ JSONObject attributes = new JSONObject(); /* 126 */ if (ja != null) { /* 127 */ ja.put(newja); /* */ } /* 129 */ newja.put(t); /* 130 */ t = null; /* */ while (true) { /* 132 */ if (t == null) { /* 133 */ t = x.nextToken(); /* */ } /* 135 */ if (t == null) { /* 136 */ throw x.syntaxError("Misshaped tag"); /* */ } /* 138 */ if (!(t instanceof String)) /* */ { /* */ break; /* */ } /* */ /* 144 */ String s = (String) t; /* 145 */ t = x.nextToken(); /* 146 */ if (t == XML.EQ) { /* 147 */ t = x.nextToken(); /* 148 */ if (!(t instanceof String)) { /* 149 */ throw x.syntaxError("Missing value"); /* */ } /* 151 */ attributes.accumulate(s, t); /* 152 */ t = null; continue; /* */ } /* 154 */ attributes.accumulate(s, ""); /* */ } /* */ /* 157 */ if (attributes.length() > 0) { /* 158 */ newja.put(attributes); /* */ } /* */ /* 163 */ if (t == XML.SLASH) { /* 164 */ if (x.nextToken() != XML.GT) { /* 165 */ throw x.syntaxError("Misshaped tag"); /* */ } /* 167 */ if (ja == null) { /* 168 */ return newja; /* */ } /* */ /* */ } /* */ /* 173 */ if (t == XML.GT) { /* 174 */ parse(x, newja); /* 175 */ if (ja == null) { /* 176 */ return newja; /* */ } /* */ } /* 179 */ throw x.syntaxError("Misshaped tag"); /* */ } /* */ /* 183 */ if (ja != null) /* 184 */ ja.put(t); /* */ } /* */ }
/** * Scan the content following the named tag, attaching it to the context. * * @param x The XMLTokener containing the source string. * @param context The JSONObject that will include the new material. * @param name The tag name. * @return true if the close tag is processed. * @throws JSONException */ private static boolean parse(final XMLTokener x, final JSONObject context, final String name) throws JSONException { char c; int i; String n; JSONObject o = null; String s; Object t; // Test for and skip past these forms: // <!-- ... --> // <! ... > // <![ ... ]]> // <? ... ?> // Report errors for these forms: // <> // <= // << t = x.nextToken(); // <! if (t == BANG) { c = x.next(); if (c == '-') { if (x.next() == '-') { x.skipPast("-->"); return false; } x.back(); } else if (c == '[') { t = x.nextToken(); if (t.equals("CDATA")) { if (x.next() == '[') { s = x.nextCDATA(); if (s.length() > 0) { context.accumulate("content", s); } return false; } } throw x.syntaxError("Expected 'CDATA['"); } i = 1; do { t = x.nextMeta(); if (t == null) { throw x.syntaxError("Missing '>' after '<!'."); } else if (t == LT) { i += 1; } else if (t == GT) { i -= 1; } } while (i > 0); return false; } else if (t == QUEST) { // <? x.skipPast("?>"); return false; } else if (t == SLASH) { // Close tag </ t = x.nextToken(); if (name == null) { throw x.syntaxError("Mismatched close tag" + t); } if (!t.equals(name)) { throw x.syntaxError("Mismatched " + name + " and " + t); } if (x.nextToken() != GT) { throw x.syntaxError("Misshaped close tag"); } return true; } else if (t instanceof Character) { throw x.syntaxError("Misshaped tag"); // Open tag < } else { n = (String) t; t = null; o = new JSONObject(); for (; ; ) { if (t == null) { t = x.nextToken(); } // attribute = value if (t instanceof String) { s = (String) t; t = x.nextToken(); if (t == EQ) { t = x.nextToken(); if (!(t instanceof String)) { throw x.syntaxError("Missing value"); } o.accumulate(s, t); t = null; } else { o.accumulate(s, ""); } // Empty tag <.../> } else if (t == SLASH) { if (x.nextToken() != GT) { throw x.syntaxError("Misshaped tag"); } context.accumulate(n, o); return false; // Content, between <...> and </...> } else if (t == GT) { for (; ; ) { t = x.nextContent(); if (t == null) { if (n != null) { throw x.syntaxError("Unclosed tag " + n); } return false; } else if (t instanceof String) { s = (String) t; if (s.length() > 0) { o.accumulate("content", s); } // Nested element } else if (t == LT) { if (parse(x, o, n)) { if (o.length() == 0) { context.accumulate(n, ""); } else if (o.length() == 1 && o.opt("content") != null) { context.accumulate(n, o.opt("content")); } else { context.accumulate(n, o); } return false; } } } } else { throw x.syntaxError("Misshaped tag"); } } } }