/** * Transforms the given XML string using the XSL string. * * @param xmlString The String containg the XML to transform * @param xslString The XML Stylesheet String containing the transform instructions * @return String representation of the transformation */ public static String transform(String xmlString, String xslString) { String shortString = new String(xmlString); if (shortString.length() > 100) shortString = shortString.substring(0, 100) + "..."; try { TransformerFactory tFactory = TransformerFactory.newInstance(); logger.logComment("Transforming string: " + shortString); StreamSource xslFileSource = new StreamSource(new StringReader(xslString)); Transformer transformer = tFactory.newTransformer(xslFileSource); StringWriter writer = new StringWriter(); transformer.transform( new StreamSource(new StringReader(xmlString)), new StreamResult(writer)); String shortResult = writer.toString(); if (shortResult.length() > 100) shortResult = shortResult.substring(0, 100) + "..."; logger.logComment("Result: " + shortResult); return writer.toString(); } catch (TransformerException e) { GuiUtils.showErrorMessage(logger, "Error when transforming the XML: " + shortString, e, null); return null; } }
/** * Transforms the given XML file using the specified XSL file. * * @param origXmlFile The file containg the XML to transform * @param xslString The XML Stylesheet conntaining the transform instructions * @return String representation of the transformation */ public static String transform(File origXmlFile, String xslString) { if (!origXmlFile.exists()) { GuiUtils.showErrorMessage( logger, "Warning, XML file: " + origXmlFile + " doesn't exist", null, null); return null; } try { TransformerFactory tFactory = TransformerFactory.newInstance(); StreamSource xslFileSource = new StreamSource(new StringReader(xslString)); Transformer transformer = tFactory.newTransformer(xslFileSource); StringWriter writer = new StringWriter(); transformer.transform(new StreamSource(origXmlFile), new StreamResult(writer)); return writer.toString(); } catch (Exception e) { GuiUtils.showErrorMessage(logger, "Error when loading the XML file: " + origXmlFile, e, null); return null; } }
/** * Transforms the given XML string using the specified XSL file. * * @param xmlString The String containg the XML to transform * @param xslFile The XML Stylesheet conntaining the transform instructions * @return String representation of the transformation */ public static String transform(String xmlString, File xslFile) { if (!xslFile.exists()) { GuiUtils.showErrorMessage( logger, "Warning, XSL file: " + xslFile + " doesn't exist", null, null); return null; } String shortString = new String(xmlString); if (shortString.length() > 100) shortString = shortString.substring(0, 100) + "..."; try { logger.logComment("The xslFile is " + xslFile.getAbsolutePath() + " *************"); TransformerFactory tFactory = TransformerFactory.newInstance(); logger.logComment("Transforming string: " + shortString); StreamSource xslFileSource = new StreamSource(xslFile); Transformer transformer = tFactory.newTransformer(xslFileSource); StringWriter writer = new StringWriter(); transformer.transform( new StreamSource(new StringReader(xmlString)), new StreamResult(writer)); String shortResult = writer.toString(); if (shortResult.length() > 100) shortResult = shortResult.substring(0, 100) + "..."; logger.logComment("Result: " + shortResult); return writer.toString(); } catch (TransformerException e) { GuiUtils.showErrorMessage(logger, "Error when transforming the XML: " + shortString, e, null); return null; } }
/** * Format this XML data as a String. * * @webref xml:method * @brief Formats XML data as a String * @param indent -1 for a single line (and no declaration), >= 0 for indents and newlines * @return the content * @see XML#toString() */ public String format(int indent) { try { // entities = doctype.getEntities() boolean useIndentAmount = false; TransformerFactory factory = TransformerFactory.newInstance(); if (indent != -1) { try { factory.setAttribute("indent-number", indent); } catch (IllegalArgumentException e) { useIndentAmount = true; } } Transformer transformer = factory.newTransformer(); // Add the XML declaration at the top if this node is the root and we're // not writing to a single line (indent = -1 means single line). if (indent == -1 || parent == null) { transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); } else { transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); } // transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, "sample.dtd"); transformer.setOutputProperty(OutputKeys.METHOD, "xml"); // transformer.setOutputProperty(OutputKeys.CDATA_SECTION_ELEMENTS, "yes"); // huh? // transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, // "-//W3C//DTD XHTML 1.0 Transitional//EN"); // transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, // "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"); // For Android, because (at least 2.3.3) doesn't like indent-number if (useIndentAmount) { transformer.setOutputProperty( "{http://xml.apache.org/xslt}indent-amount", String.valueOf(indent)); } // transformer.setOutputProperty(OutputKeys.ENCODING,"ISO-8859-1"); // transformer.setOutputProperty(OutputKeys.ENCODING,"UTF8"); transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); // transformer.setOutputProperty(OutputKeys.CDATA_SECTION_ELEMENTS // Always indent, otherwise the XML declaration will just be jammed // onto the first line with the XML code as well. transformer.setOutputProperty(OutputKeys.INDENT, "yes"); // Properties p = transformer.getOutputProperties(); // for (Object key : p.keySet()) { // System.out.println(key + " -> " + p.get(key)); // } // If you smell something, that's because this code stinks. No matter // the settings of the Transformer object, if the XML document already // has whitespace elements, it won't bother re-indenting/re-formatting. // So instead, transform the data once into a single line string. // If indent is -1, then we're done. Otherwise re-run and the settings // of the factory will kick in. If you know a better way to do this, // please contribute. I've wasted too much of my Sunday on it. But at // least the Giants are getting blown out by the Falcons. final String decl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; final String sep = System.getProperty("line.separator"); StringWriter tempWriter = new StringWriter(); StreamResult tempResult = new StreamResult(tempWriter); transformer.transform(new DOMSource(node), tempResult); String[] tempLines = PApplet.split(tempWriter.toString(), sep); // PApplet.println(tempLines); if (tempLines[0].startsWith("<?xml")) { // Remove XML declaration from the top before slamming into one line int declEnd = tempLines[0].indexOf("?>") + 2; // if (tempLines[0].length() == decl.length()) { if (tempLines[0].length() == declEnd) { // If it's all the XML declaration, remove it // PApplet.println("removing first line"); tempLines = PApplet.subset(tempLines, 1); } else { // PApplet.println("removing part of first line"); // If the first node has been moved to this line, be more careful // tempLines[0] = tempLines[0].substring(decl.length()); tempLines[0] = tempLines[0].substring(declEnd); } } String singleLine = PApplet.join(PApplet.trim(tempLines), ""); if (indent == -1) { return singleLine; } // Might just be whitespace, which won't be valid XML for parsing below. // https://github.com/processing/processing/issues/1796 // Since indent is not -1, that means they want valid XML, // so we'll give them the single line plus the decl... Lame? sure. if (singleLine.trim().length() == 0) { // You want whitespace? I've got your whitespace right here. return decl + sep + singleLine; } // Since the indent is not -1, bring back the XML declaration // transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); StringWriter stringWriter = new StringWriter(); StreamResult xmlOutput = new StreamResult(stringWriter); // DOMSource source = new DOMSource(node); Source source = new StreamSource(new StringReader(singleLine)); transformer.transform(source, xmlOutput); String outgoing = stringWriter.toString(); // Add the XML declaration to the top if it's not there already if (outgoing.startsWith(decl)) { int declen = decl.length(); int seplen = sep.length(); if (outgoing.length() > declen + seplen && !outgoing.substring(declen, declen + seplen).equals(sep)) { // make sure there's a line break between the XML decl and the code return outgoing.substring(0, decl.length()) + sep + outgoing.substring(decl.length()); } return outgoing; } else { return decl + sep + outgoing; } } catch (Exception e) { e.printStackTrace(); } return null; }
public void CollectData(String link) { try { // Creating an empty XML Document DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = dbfac.newDocumentBuilder(); Document doc = docBuilder.newDocument(); int flag = 0; // create the root element and add it to the document Element movie = doc.createElement("movie"); doc.appendChild(movie); movie.setAttribute("id", String.valueOf(n)); n++; // create sub elements Element genres = doc.createElement("genres"); Element actors = doc.createElement("actors"); Element reviews = doc.createElement("reviews"); URL movieUrl = new URL(link); URL reviewsURL = new URL(link + "reviews/#type=top_critics"); BufferedWriter bw3 = new BufferedWriter(new FileWriter("movies.xml", true)); int count = -1; String auth = ""; BufferedReader br3 = new BufferedReader(new InputStreamReader(movieUrl.openStream())); String str2 = ""; String info = ""; while (null != (str2 = br3.readLine())) { // start reading the html document if (str2.isEmpty()) continue; if (count == 14) break; if (count == 12) { if (!str2.contains("<h3>Cast</h3>")) continue; else count++; } if (count == 13) { if (str2.contains(">ADVERTISEMENT</p>")) { count++; movie.appendChild(actors); continue; } else { if (str2.contains("itemprop=\"name\">")) { Element actor = doc.createElement("actor"); actors.appendChild(actor); Text text = doc.createTextNode(Jsoup.parse(str2.toString()).text()); actor.appendChild(text); } else continue; } } if (count <= 11) { switch (count) { case -1: { if (!str2.contains("property=\"og:image\"")) continue; else { Pattern image = Pattern.compile("http://.*.jpg", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); Matcher match = image.matcher(str2); while (match.find()) { Element imageLink = doc.createElement("imageLink"); movie.appendChild(imageLink); Text text = doc.createTextNode(match.group()); imageLink.appendChild(text); count++; } } break; } case 0: { if (str2.contains("<title>")) { Element name = doc.createElement("name"); movie.appendChild(name); Text text = doc.createTextNode( Jsoup.parse(str2.toString().replace(" - Rotten Tomatoes", "")).text()); name.appendChild(text); count++; } break; } case 1: { if (!str2.contains("itemprop=\"ratingValue\"")) break; else { Element score = doc.createElement("score"); movie.appendChild(score); Text text = doc.createTextNode(Jsoup.parse(str2.toString()).text()); score.appendChild(text); count++; } break; } case 2: { if (!str2.contains("itemprop=\"description\">")) continue; else count++; break; } case 3: { if (!str2.contains("itemprop=\"duration\"")) info = info.concat(str2); else { Element MovieInfo = doc.createElement("MovieInfo"); movie.appendChild(MovieInfo); Text text = doc.createTextNode(Jsoup.parse(info.toString()).text()); MovieInfo.appendChild(text); info = str2; count++; } break; } case 4: { if (!str2.contains("itemprop=\"genre\"")) info = info.concat(str2); else { Element duration = doc.createElement("duration"); movie.appendChild(duration); Text text = doc.createTextNode(Jsoup.parse(info.toString()).text()); duration.appendChild(text); info = str2; count++; } break; } case 5: { if (info.contains("itemprop=\"genre\"")) { Element genre = doc.createElement("genre"); genres.appendChild(genre); Text text = doc.createTextNode(Jsoup.parse(info.toString()).text()); genre.appendChild(text); info = ""; } if (str2.contains(">Directed By:<")) { count++; movie.appendChild(genres); continue; } else { if (str2.contains("itemprop=\"genre\"")) { Element genre = doc.createElement("genre"); genres.appendChild(genre); Text text = doc.createTextNode(Jsoup.parse(str2.toString()).text()); genre.appendChild(text); } else continue; } break; } case 6: { if (!str2.contains(">Written By:<")) { if (str2.contains(">In Theaters:<")) { Element director = doc.createElement("director"); movie.appendChild(director); Text text = doc.createTextNode( Jsoup.parse(info.toString().replace("Directed By: ", "")).text()); director.appendChild(text); info = str2; count += 2; break; } info = info.concat(str2); } else { Element director = doc.createElement("director"); movie.appendChild(director); Text text = doc.createTextNode( Jsoup.parse(info.toString().replace("Directed By: ", "")).text()); director.appendChild(text); info = ""; count++; } break; } case 7: { if (!str2.contains(">In Theaters:<")) { if (str2.contains(">On DVD:<")) { Element writer = doc.createElement("writer"); movie.appendChild(writer); Text text = doc.createTextNode(Jsoup.parse(info.toString()).text()); writer.appendChild(text); info = str2; count += 2; break; } info = info.concat(str2); } else { Element writer = doc.createElement("writer"); movie.appendChild(writer); Text text = doc.createTextNode(Jsoup.parse(info.toString()).text()); writer.appendChild(text); info = str2; count++; } break; } case 8: { if (!str2.contains(">On DVD:<")) info = info.concat(str2); else { Element TheatreRelease = doc.createElement("TheatreRelease"); movie.appendChild(TheatreRelease); Text text = doc.createTextNode( Jsoup.parse(info.toString().replace("In Theaters:", "")).text()); TheatreRelease.appendChild(text); info = str2; count++; } break; } case 9: { if (!str2.contains(">US Box Office:<")) { if (str2.contains("itemprop=\"productionCompany\"")) { Element DvdRelease = doc.createElement("DvdRelease"); movie.appendChild(DvdRelease); Text text = doc.createTextNode( Jsoup.parse(info.toString().replace("On DVD:", "")).text()); DvdRelease.appendChild(text); info = str2; count += 2; break; } info = info.concat(str2); } else { Element DvdRelease = doc.createElement("DvdRelease"); movie.appendChild(DvdRelease); Text text = doc.createTextNode( Jsoup.parse(info.toString().replace("On DVD:", "")).text()); DvdRelease.appendChild(text); info = str2; count++; } break; } case 10: { if (!str2.contains("itemprop=\"productionCompany\"")) info = info.concat(str2); else { Element BOCollection = doc.createElement("BOCollection"); movie.appendChild(BOCollection); Text text = doc.createTextNode( Jsoup.parse(info.toString().replace("US Box Office:", "")).text()); BOCollection.appendChild(text); info = str2; count++; } break; } case 11: { if (!str2.contains(">Official Site")) info = info.concat(str2); else { Element Production = doc.createElement("Production"); movie.appendChild(Production); Text text = doc.createTextNode(Jsoup.parse(info.toString()).text()); Production.appendChild(text); info = str2; count++; } break; } default: break; } } } BufferedReader br4 = new BufferedReader(new InputStreamReader(reviewsURL.openStream())); String str3 = ""; String info2 = ""; int count2 = 0; while (null != (str3 = br4.readLine())) { if (count2 == 0) { if (!str3.contains("<div class=\"reviewsnippet\">")) continue; else count2++; } if (count2 == 1) { if (!str3.contains("<p class=\"small subtle\">")) info2 = info2.concat(str3); else { Element review = doc.createElement("review"); reviews.appendChild(review); Text text = doc.createTextNode(Jsoup.parse(info2.toString()).text()); review.appendChild(text); info2 = ""; count2 = 0; } } } movie.appendChild(reviews); TransformerFactory transfac = TransformerFactory.newInstance(); Transformer trans = transfac.newTransformer(); trans.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); trans.setOutputProperty(OutputKeys.INDENT, "yes"); // create string from xml tree StringWriter sw = new StringWriter(); StreamResult result = new StreamResult(sw); DOMSource source = new DOMSource(doc); trans.transform(source, result); String xmlString = sw.toString(); bw3.write(xmlString); br3.close(); br4.close(); bw3.close(); } catch (Exception ex) { ex.printStackTrace(); } }