/**
   * Transforms the given XML string using the XSL string.
   *
   * @param xmlString The String containg the XML to transform
   * @param xslString The XML Stylesheet String containing the transform instructions
   * @return String representation of the transformation
   */
  public static String transform(String xmlString, String xslString) {

    String shortString = new String(xmlString);
    if (shortString.length() > 100) shortString = shortString.substring(0, 100) + "...";

    try {

      TransformerFactory tFactory = TransformerFactory.newInstance();

      logger.logComment("Transforming string: " + shortString);

      StreamSource xslFileSource = new StreamSource(new StringReader(xslString));

      Transformer transformer = tFactory.newTransformer(xslFileSource);

      StringWriter writer = new StringWriter();

      transformer.transform(
          new StreamSource(new StringReader(xmlString)), new StreamResult(writer));

      String shortResult = writer.toString();
      if (shortResult.length() > 100) shortResult = shortResult.substring(0, 100) + "...";

      logger.logComment("Result: " + shortResult);

      return writer.toString();
    } catch (TransformerException e) {
      GuiUtils.showErrorMessage(logger, "Error when transforming the XML: " + shortString, e, null);
      return null;
    }
  }
  /**
   * Transforms the given XML file using the specified XSL file.
   *
   * @param origXmlFile The file containg the XML to transform
   * @param xslString The XML Stylesheet conntaining the transform instructions
   * @return String representation of the transformation
   */
  public static String transform(File origXmlFile, String xslString) {
    if (!origXmlFile.exists()) {
      GuiUtils.showErrorMessage(
          logger, "Warning, XML file: " + origXmlFile + " doesn't exist", null, null);
      return null;
    }

    try {

      TransformerFactory tFactory = TransformerFactory.newInstance();

      StreamSource xslFileSource = new StreamSource(new StringReader(xslString));

      Transformer transformer = tFactory.newTransformer(xslFileSource);

      StringWriter writer = new StringWriter();

      transformer.transform(new StreamSource(origXmlFile), new StreamResult(writer));

      return writer.toString();
    } catch (Exception e) {
      GuiUtils.showErrorMessage(logger, "Error when loading the XML file: " + origXmlFile, e, null);
      return null;
    }
  }
  /**
   * Transforms the given XML string using the specified XSL file.
   *
   * @param xmlString The String containg the XML to transform
   * @param xslFile The XML Stylesheet conntaining the transform instructions
   * @return String representation of the transformation
   */
  public static String transform(String xmlString, File xslFile) {

    if (!xslFile.exists()) {
      GuiUtils.showErrorMessage(
          logger, "Warning, XSL file: " + xslFile + " doesn't exist", null, null);
      return null;
    }

    String shortString = new String(xmlString);
    if (shortString.length() > 100) shortString = shortString.substring(0, 100) + "...";

    try {
      logger.logComment("The xslFile is " + xslFile.getAbsolutePath() + " *************");

      TransformerFactory tFactory = TransformerFactory.newInstance();

      logger.logComment("Transforming string: " + shortString);

      StreamSource xslFileSource = new StreamSource(xslFile);

      Transformer transformer = tFactory.newTransformer(xslFileSource);

      StringWriter writer = new StringWriter();

      transformer.transform(
          new StreamSource(new StringReader(xmlString)), new StreamResult(writer));

      String shortResult = writer.toString();
      if (shortResult.length() > 100) shortResult = shortResult.substring(0, 100) + "...";

      logger.logComment("Result: " + shortResult);

      return writer.toString();
    } catch (TransformerException e) {
      GuiUtils.showErrorMessage(logger, "Error when transforming the XML: " + shortString, e, null);
      return null;
    }
  }
Beispiel #4
0
  /**
   * Format this XML data as a String.
   *
   * @webref xml:method
   * @brief Formats XML data as a String
   * @param indent -1 for a single line (and no declaration), >= 0 for indents and newlines
   * @return the content
   * @see XML#toString()
   */
  public String format(int indent) {
    try {
      // entities = doctype.getEntities()
      boolean useIndentAmount = false;
      TransformerFactory factory = TransformerFactory.newInstance();
      if (indent != -1) {
        try {
          factory.setAttribute("indent-number", indent);
        } catch (IllegalArgumentException e) {
          useIndentAmount = true;
        }
      }
      Transformer transformer = factory.newTransformer();

      // Add the XML declaration at the top if this node is the root and we're
      // not writing to a single line (indent = -1 means single line).
      if (indent == -1 || parent == null) {
        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
      } else {
        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
      }

      //      transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, "sample.dtd");

      transformer.setOutputProperty(OutputKeys.METHOD, "xml");

      //      transformer.setOutputProperty(OutputKeys.CDATA_SECTION_ELEMENTS, "yes");  // huh?

      //      transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC,
      //          "-//W3C//DTD XHTML 1.0 Transitional//EN");
      //      transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,
      //          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd");

      // For Android, because (at least 2.3.3) doesn't like indent-number
      if (useIndentAmount) {
        transformer.setOutputProperty(
            "{http://xml.apache.org/xslt}indent-amount", String.valueOf(indent));
      }

      //      transformer.setOutputProperty(OutputKeys.ENCODING,"ISO-8859-1");
      //      transformer.setOutputProperty(OutputKeys.ENCODING,"UTF8");
      transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
      //      transformer.setOutputProperty(OutputKeys.CDATA_SECTION_ELEMENTS

      // Always indent, otherwise the XML declaration will just be jammed
      // onto the first line with the XML code as well.
      transformer.setOutputProperty(OutputKeys.INDENT, "yes");

      //      Properties p = transformer.getOutputProperties();
      //      for (Object key : p.keySet()) {
      //        System.out.println(key + " -> " + p.get(key));
      //      }

      // If you smell something, that's because this code stinks. No matter
      // the settings of the Transformer object, if the XML document already
      // has whitespace elements, it won't bother re-indenting/re-formatting.
      // So instead, transform the data once into a single line string.
      // If indent is -1, then we're done. Otherwise re-run and the settings
      // of the factory will kick in. If you know a better way to do this,
      // please contribute. I've wasted too much of my Sunday on it. But at
      // least the Giants are getting blown out by the Falcons.

      final String decl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
      final String sep = System.getProperty("line.separator");

      StringWriter tempWriter = new StringWriter();
      StreamResult tempResult = new StreamResult(tempWriter);
      transformer.transform(new DOMSource(node), tempResult);
      String[] tempLines = PApplet.split(tempWriter.toString(), sep);
      //      PApplet.println(tempLines);
      if (tempLines[0].startsWith("<?xml")) {
        // Remove XML declaration from the top before slamming into one line
        int declEnd = tempLines[0].indexOf("?>") + 2;
        // if (tempLines[0].length() == decl.length()) {
        if (tempLines[0].length() == declEnd) {
          // If it's all the XML declaration, remove it
          //          PApplet.println("removing first line");
          tempLines = PApplet.subset(tempLines, 1);
        } else {
          //          PApplet.println("removing part of first line");
          // If the first node has been moved to this line, be more careful
          // tempLines[0] = tempLines[0].substring(decl.length());
          tempLines[0] = tempLines[0].substring(declEnd);
        }
      }
      String singleLine = PApplet.join(PApplet.trim(tempLines), "");
      if (indent == -1) {
        return singleLine;
      }

      // Might just be whitespace, which won't be valid XML for parsing below.
      // https://github.com/processing/processing/issues/1796
      // Since indent is not -1, that means they want valid XML,
      // so we'll give them the single line plus the decl... Lame? sure.
      if (singleLine.trim().length() == 0) {
        // You want whitespace? I've got your whitespace right here.
        return decl + sep + singleLine;
      }

      // Since the indent is not -1, bring back the XML declaration
      // transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");

      StringWriter stringWriter = new StringWriter();
      StreamResult xmlOutput = new StreamResult(stringWriter);
      //      DOMSource source = new DOMSource(node);
      Source source = new StreamSource(new StringReader(singleLine));
      transformer.transform(source, xmlOutput);
      String outgoing = stringWriter.toString();

      // Add the XML declaration to the top if it's not there already
      if (outgoing.startsWith(decl)) {
        int declen = decl.length();
        int seplen = sep.length();
        if (outgoing.length() > declen + seplen
            && !outgoing.substring(declen, declen + seplen).equals(sep)) {
          // make sure there's a line break between the XML decl and the code
          return outgoing.substring(0, decl.length()) + sep + outgoing.substring(decl.length());
        }
        return outgoing;
      } else {
        return decl + sep + outgoing;
      }

    } catch (Exception e) {
      e.printStackTrace();
    }
    return null;
  }
  public void CollectData(String link) {

    try {
      // Creating an empty XML Document

      DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
      DocumentBuilder docBuilder = dbfac.newDocumentBuilder();
      Document doc = docBuilder.newDocument();
      int flag = 0;
      // create the root element and add it to the document
      Element movie = doc.createElement("movie");
      doc.appendChild(movie);
      movie.setAttribute("id", String.valueOf(n));
      n++;
      // create sub elements
      Element genres = doc.createElement("genres");
      Element actors = doc.createElement("actors");
      Element reviews = doc.createElement("reviews");

      URL movieUrl = new URL(link);
      URL reviewsURL = new URL(link + "reviews/#type=top_critics");
      BufferedWriter bw3 = new BufferedWriter(new FileWriter("movies.xml", true));
      int count = -1;
      String auth = "";
      BufferedReader br3 = new BufferedReader(new InputStreamReader(movieUrl.openStream()));
      String str2 = "";
      String info = "";
      while (null != (str2 = br3.readLine())) {
        // start reading the html document
        if (str2.isEmpty()) continue;
        if (count == 14) break;
        if (count == 12) {
          if (!str2.contains("<h3>Cast</h3>")) continue;
          else count++;
        }
        if (count == 13) {
          if (str2.contains(">ADVERTISEMENT</p>")) {
            count++;
            movie.appendChild(actors);
            continue;
          } else {
            if (str2.contains("itemprop=\"name\">")) {
              Element actor = doc.createElement("actor");
              actors.appendChild(actor);
              Text text = doc.createTextNode(Jsoup.parse(str2.toString()).text());
              actor.appendChild(text);
            } else continue;
          }
        }

        if (count <= 11) {
          switch (count) {
            case -1:
              {
                if (!str2.contains("property=\"og:image\"")) continue;
                else {
                  Pattern image =
                      Pattern.compile("http://.*.jpg", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
                  Matcher match = image.matcher(str2);
                  while (match.find()) {

                    Element imageLink = doc.createElement("imageLink");
                    movie.appendChild(imageLink);
                    Text text = doc.createTextNode(match.group());
                    imageLink.appendChild(text);
                    count++;
                  }
                }
                break;
              }
            case 0:
              {
                if (str2.contains("<title>")) {

                  Element name = doc.createElement("name");
                  movie.appendChild(name);
                  Text text =
                      doc.createTextNode(
                          Jsoup.parse(str2.toString().replace(" - Rotten Tomatoes", "")).text());
                  name.appendChild(text);
                  count++;
                }
                break;
              }
            case 1:
              {
                if (!str2.contains("itemprop=\"ratingValue\"")) break;
                else {
                  Element score = doc.createElement("score");
                  movie.appendChild(score);
                  Text text = doc.createTextNode(Jsoup.parse(str2.toString()).text());
                  score.appendChild(text);
                  count++;
                }
                break;
              }
            case 2:
              {
                if (!str2.contains("itemprop=\"description\">")) continue;
                else count++;
                break;
              }
            case 3:
              {
                if (!str2.contains("itemprop=\"duration\"")) info = info.concat(str2);
                else {
                  Element MovieInfo = doc.createElement("MovieInfo");
                  movie.appendChild(MovieInfo);
                  Text text = doc.createTextNode(Jsoup.parse(info.toString()).text());
                  MovieInfo.appendChild(text);
                  info = str2;
                  count++;
                }
                break;
              }
            case 4:
              {
                if (!str2.contains("itemprop=\"genre\"")) info = info.concat(str2);
                else {
                  Element duration = doc.createElement("duration");
                  movie.appendChild(duration);
                  Text text = doc.createTextNode(Jsoup.parse(info.toString()).text());
                  duration.appendChild(text);
                  info = str2;
                  count++;
                }
                break;
              }
            case 5:
              {
                if (info.contains("itemprop=\"genre\"")) {
                  Element genre = doc.createElement("genre");
                  genres.appendChild(genre);
                  Text text = doc.createTextNode(Jsoup.parse(info.toString()).text());
                  genre.appendChild(text);
                  info = "";
                }
                if (str2.contains(">Directed By:<")) {
                  count++;
                  movie.appendChild(genres);
                  continue;
                } else {

                  if (str2.contains("itemprop=\"genre\"")) {
                    Element genre = doc.createElement("genre");
                    genres.appendChild(genre);
                    Text text = doc.createTextNode(Jsoup.parse(str2.toString()).text());
                    genre.appendChild(text);
                  } else continue;
                }
                break;
              }
            case 6:
              {
                if (!str2.contains(">Written By:<")) {
                  if (str2.contains(">In Theaters:<")) {
                    Element director = doc.createElement("director");
                    movie.appendChild(director);
                    Text text =
                        doc.createTextNode(
                            Jsoup.parse(info.toString().replace("Directed By: ", "")).text());
                    director.appendChild(text);
                    info = str2;
                    count += 2;
                    break;
                  }
                  info = info.concat(str2);
                } else {
                  Element director = doc.createElement("director");
                  movie.appendChild(director);
                  Text text =
                      doc.createTextNode(
                          Jsoup.parse(info.toString().replace("Directed By: ", "")).text());
                  director.appendChild(text);
                  info = "";
                  count++;
                }
                break;
              }
            case 7:
              {
                if (!str2.contains(">In Theaters:<")) {
                  if (str2.contains(">On DVD:<")) {
                    Element writer = doc.createElement("writer");
                    movie.appendChild(writer);
                    Text text = doc.createTextNode(Jsoup.parse(info.toString()).text());
                    writer.appendChild(text);
                    info = str2;
                    count += 2;
                    break;
                  }
                  info = info.concat(str2);
                } else {
                  Element writer = doc.createElement("writer");
                  movie.appendChild(writer);
                  Text text = doc.createTextNode(Jsoup.parse(info.toString()).text());
                  writer.appendChild(text);
                  info = str2;
                  count++;
                }
                break;
              }
            case 8:
              {
                if (!str2.contains(">On DVD:<")) info = info.concat(str2);
                else {
                  Element TheatreRelease = doc.createElement("TheatreRelease");
                  movie.appendChild(TheatreRelease);
                  Text text =
                      doc.createTextNode(
                          Jsoup.parse(info.toString().replace("In Theaters:", "")).text());
                  TheatreRelease.appendChild(text);
                  info = str2;
                  count++;
                }
                break;
              }
            case 9:
              {
                if (!str2.contains(">US Box Office:<")) {
                  if (str2.contains("itemprop=\"productionCompany\"")) {
                    Element DvdRelease = doc.createElement("DvdRelease");
                    movie.appendChild(DvdRelease);
                    Text text =
                        doc.createTextNode(
                            Jsoup.parse(info.toString().replace("On DVD:", "")).text());
                    DvdRelease.appendChild(text);
                    info = str2;
                    count += 2;
                    break;
                  }
                  info = info.concat(str2);
                } else {
                  Element DvdRelease = doc.createElement("DvdRelease");
                  movie.appendChild(DvdRelease);
                  Text text =
                      doc.createTextNode(
                          Jsoup.parse(info.toString().replace("On DVD:", "")).text());
                  DvdRelease.appendChild(text);
                  info = str2;
                  count++;
                }
                break;
              }
            case 10:
              {
                if (!str2.contains("itemprop=\"productionCompany\"")) info = info.concat(str2);
                else {
                  Element BOCollection = doc.createElement("BOCollection");
                  movie.appendChild(BOCollection);
                  Text text =
                      doc.createTextNode(
                          Jsoup.parse(info.toString().replace("US Box Office:", "")).text());
                  BOCollection.appendChild(text);
                  info = str2;
                  count++;
                }
                break;
              }
            case 11:
              {
                if (!str2.contains(">Official Site")) info = info.concat(str2);
                else {
                  Element Production = doc.createElement("Production");
                  movie.appendChild(Production);
                  Text text = doc.createTextNode(Jsoup.parse(info.toString()).text());
                  Production.appendChild(text);
                  info = str2;
                  count++;
                }
                break;
              }

            default:
              break;
          }
        }
      }
      BufferedReader br4 = new BufferedReader(new InputStreamReader(reviewsURL.openStream()));
      String str3 = "";
      String info2 = "";
      int count2 = 0;
      while (null != (str3 = br4.readLine())) {
        if (count2 == 0) {

          if (!str3.contains("<div class=\"reviewsnippet\">")) continue;
          else count2++;
        }
        if (count2 == 1) {
          if (!str3.contains("<p class=\"small subtle\">")) info2 = info2.concat(str3);
          else {
            Element review = doc.createElement("review");
            reviews.appendChild(review);
            Text text = doc.createTextNode(Jsoup.parse(info2.toString()).text());
            review.appendChild(text);
            info2 = "";
            count2 = 0;
          }
        }
      }
      movie.appendChild(reviews);
      TransformerFactory transfac = TransformerFactory.newInstance();
      Transformer trans = transfac.newTransformer();
      trans.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
      trans.setOutputProperty(OutputKeys.INDENT, "yes");

      // create string from xml tree
      StringWriter sw = new StringWriter();
      StreamResult result = new StreamResult(sw);
      DOMSource source = new DOMSource(doc);
      trans.transform(source, result);
      String xmlString = sw.toString();
      bw3.write(xmlString);
      br3.close();
      br4.close();
      bw3.close();
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }