@Test
  public void testHtmlWithTags() throws Exception {
    final String htmlText =
        "<html><head><title>Title</title></head>" + "<body><p>this is a test</p></body></html>";

    // Create FetchedDatum using data
    String url = "http://domain.com/page.html";
    String contentType = "text/html; charset=utf-8";
    HttpHeaders headers = new HttpHeaders();
    headers.add(HttpHeaderNames.CONTENT_TYPE, contentType);
    ContentBytes content = new ContentBytes(htmlText.getBytes("utf-8"));
    FetchedDatum fetchedDatum =
        new FetchedDatum(url, url, System.currentTimeMillis(), headers, content, contentType, 0);

    // Call parser.parse
    SimpleParser parser = new SimpleParser(new ParserPolicy(), true);
    ParsedDatum parsedDatum = parser.parse(fetchedDatum);

    // Now take the resulting HTML, process it using Dom4J
    SAXReader reader = new SAXReader(new Parser());
    reader.setEncoding("UTF-8");
    String htmlWithMarkup = parsedDatum.getParsedText();
    Document doc = reader.read(new StringInputStream(htmlWithMarkup));

    // We have to do helicopter stunts since HTML has a global namespace on it, set
    // at the <html> element level.
    XPath xpath = DocumentHelper.createXPath("/xhtml:html/xhtml:body/xhtml:p");
    Map<String, String> namespaceUris = new HashMap<String, String>();
    namespaceUris.put("xhtml", "http://www.w3.org/1999/xhtml");
    xpath.setNamespaceURIs(namespaceUris);

    Node paragraphNode = xpath.selectSingleNode(doc);
    Assert.assertNotNull(paragraphNode);
    Assert.assertEquals("this is a test", paragraphNode.getText());
  }
Exemple #2
0
  @Override
  public void prepare(FlowProcess process, OperationCall<NullContext> opCall) {
    super.prepare(process, opCall);

    _reader = new SAXReader(new Parser());
    _reader.setXMLFilter(new DowngradeXmlFilter(_removeNamespaces));
    _reader.setEncoding("UTF-8");
    _input = new ParsedDatum();
  }
 public DefaultVCardProvider() {
   super();
   // Initialize the pool of sax readers
   for (int i = 0; i < POOL_SIZE; i++) {
     SAXReader xmlReader = new SAXReader();
     xmlReader.setEncoding("UTF-8");
     xmlReaders.add(xmlReader);
   }
 }
 @Override
 public void start() throws IllegalStateException {
   super.start();
   // Initialize the pool of sax readers
   for (int i = 0; i < POOL_SIZE; i++) {
     SAXReader xmlReader = new SAXReader();
     xmlReader.setEncoding("UTF-8");
     xmlReaders.add(xmlReader);
   }
   // Add this module as a user event listener so we can delete
   // all offline messages when a user is deleted
   UserEventDispatcher.addListener(this);
 }
 /**
  * @param file
  * @param charset
  * @return
  * @throws DocumentException
  */
 public static Document read(File file, String charset) {
   if (file == null) {
     return null;
   }
   SAXReader reader = new SAXReader();
   if (charset != null) {
     reader.setEncoding(charset);
   }
   Document document = null;
   try {
     document = reader.read(file);
   } catch (DocumentException e) {
     e.printStackTrace();
   }
   return document;
 }
  public PrivacyListProvider() {
    super();
    // Initialize the pool of sax readers
    for (int i = 0; i < POOL_SIZE; i++) {
      SAXReader xmlReader = new SAXReader();
      xmlReader.setEncoding("UTF-8");
      xmlReaders.add(xmlReader);
    }

    // Load the total number of privacy lists in the database. We're looking
    // for the (very common) special case that there are no privacy lists stored.
    // In that case, we can optimize away many database calls. In the future, a
    // better general-case solution may be to cache all privacy lists defined
    // if there are less than, say, 500.
    privacyListCount = new AtomicInteger(0);
    loadPrivacyListCount();
  }
 /**
  * @param url
  * @param charset
  * @return
  * @throws DocumentException
  */
 public static Document read(URL url, String charset) {
   if (url == null) {
     return null;
   }
   SAXReader reader = new SAXReader();
   if (charset != null) {
     reader.setEncoding(charset);
   }
   Document document = null;
   try {
     document = reader.read(url);
   } catch (DocumentException e) {
     log.error("通过指向xml文件的文件获得Document对象时出错 !", e);
     //            e.printStackTrace();
   }
   return document;
 }
Exemple #8
0
  // parse the XML model data to PTNet
  private void ParseModelToPTNet(Model model) {
    if (this.model == null) return;
    else {
      ByteArrayInputStream stream;
      SAXReader reader = new SAXReader();
      Document document;
      Element root = null;
      try {
        stream = new ByteArrayInputStream(model.XMLContent.getBytes());
        reader.setEncoding("utf-8");
        document = reader.read(stream);
        root = document.getRootElement();
      } catch (DocumentException e) {
        Log.getLogger(Config.FLOW).error(e.getMessage());
        e.printStackTrace();
      }

      ForwardPlace sp = (ForwardPlace) newPlace("ForwardPlace");
      new Parser().parse(this, sp, root, null, false);
      Log.getLogger(Config.FLOW).debug(getPtnet());
    }
  }
  public void navegar() {
    try {
      File aFile = new File(NuevoProyecto.archivo);
      SAXReader xmlReader = new SAXReader();

      // xmlReader.setEncoding("UTF-8"); // ********************************************
      xmlReader.setEncoding("iso-8859-1");

      Document doc = xmlReader.read(aFile);

      Element node = (Element) doc.selectSingleNode("//vivienda");

      for (Iterator i = node.elementIterator(); i.hasNext(); ) {
        node = (Element) i.next();

        if (!node.getName().equals("email")) {

          if (node.valueOf("@alias").equals(EstanciaNueva.seleccionado)) {
            // ESTEFANÍA: Añadido para que se coja la imagen del plano desde la carpeta
            // donde está el ejecutable. Esto se hace para que no hayan rutas absolutas
            // y si se cambia la carpeta de ejecutables de directorio, siga funcionando.
            // SIEMPRE Y CUANDO, SE MANTENGA EL MISMO NOMBRE DE LA CARPETA DONDE SE SACARON
            // LAS IMÁGENES DE LOS PLANOS AL CREAR EL PROYECTO, Y EL MISMO NOMBRE DE LA
            // IMAGEN.
            File dir_iniciall = new File("./");
            String a = dir_iniciall.getAbsolutePath();
            System.out.println("raiz=" + a);
            int ind = EstanciaNueva.imagen_e.indexOf(a);
            int lon = a.length();
            String b = EstanciaNueva.imagen_e.substring(ind + lon);
            System.out.println("ruta relativa=" + b);
            System.out.println("ruta absoluta=" + EstanciaNueva.imagen_e);
            org.dom4j.Element anadir =
                node.addElement("estancia")
                    .addAttribute(
                        "nombre",
                        EstanciaNueva.nombre_e) // .addAttribute("imagen", estancia_nueva.imagen_e)
                    .addAttribute("imagen", b);
            break;
          } // end if auxi
        } // end if
      } // end for

      String auxiliar = doc.asXML();

      FileWriter archivo;
      archivo = new FileWriter(NuevoProyecto.archivo);
      OutputFormat format = OutputFormat.createPrettyPrint();

      // format.setEncoding("UTF-8");
      format.setEncoding("iso-8859-1");

      XMLWriter writer = new XMLWriter(new FileWriter(NuevoProyecto.archivo), format);
      writer.write(doc);
      writer.close();

      //			 acciones.inicializarEstancia(estancia_nueva.nombre_e );
      Acciones.inicializarEstancia(EstanciaNueva.seleccionado, EstanciaNueva.nombre_e);

    } catch (IOException e) {
      e.printStackTrace();

    } catch (DocumentException e) {
      e.printStackTrace();
    }
  } // end navegar