@Test public void testHtmlWithTags() throws Exception { final String htmlText = "<html><head><title>Title</title></head>" + "<body><p>this is a test</p></body></html>"; // Create FetchedDatum using data String url = "http://domain.com/page.html"; String contentType = "text/html; charset=utf-8"; HttpHeaders headers = new HttpHeaders(); headers.add(HttpHeaderNames.CONTENT_TYPE, contentType); ContentBytes content = new ContentBytes(htmlText.getBytes("utf-8")); FetchedDatum fetchedDatum = new FetchedDatum(url, url, System.currentTimeMillis(), headers, content, contentType, 0); // Call parser.parse SimpleParser parser = new SimpleParser(new ParserPolicy(), true); ParsedDatum parsedDatum = parser.parse(fetchedDatum); // Now take the resulting HTML, process it using Dom4J SAXReader reader = new SAXReader(new Parser()); reader.setEncoding("UTF-8"); String htmlWithMarkup = parsedDatum.getParsedText(); Document doc = reader.read(new StringInputStream(htmlWithMarkup)); // We have to do helicopter stunts since HTML has a global namespace on it, set // at the <html> element level. XPath xpath = DocumentHelper.createXPath("/xhtml:html/xhtml:body/xhtml:p"); Map<String, String> namespaceUris = new HashMap<String, String>(); namespaceUris.put("xhtml", "http://www.w3.org/1999/xhtml"); xpath.setNamespaceURIs(namespaceUris); Node paragraphNode = xpath.selectSingleNode(doc); Assert.assertNotNull(paragraphNode); Assert.assertEquals("this is a test", paragraphNode.getText()); }
@Override public void prepare(FlowProcess process, OperationCall<NullContext> opCall) { super.prepare(process, opCall); _reader = new SAXReader(new Parser()); _reader.setXMLFilter(new DowngradeXmlFilter(_removeNamespaces)); _reader.setEncoding("UTF-8"); _input = new ParsedDatum(); }
public DefaultVCardProvider() { super(); // Initialize the pool of sax readers for (int i = 0; i < POOL_SIZE; i++) { SAXReader xmlReader = new SAXReader(); xmlReader.setEncoding("UTF-8"); xmlReaders.add(xmlReader); } }
@Override public void start() throws IllegalStateException { super.start(); // Initialize the pool of sax readers for (int i = 0; i < POOL_SIZE; i++) { SAXReader xmlReader = new SAXReader(); xmlReader.setEncoding("UTF-8"); xmlReaders.add(xmlReader); } // Add this module as a user event listener so we can delete // all offline messages when a user is deleted UserEventDispatcher.addListener(this); }
/** * @param file * @param charset * @return * @throws DocumentException */ public static Document read(File file, String charset) { if (file == null) { return null; } SAXReader reader = new SAXReader(); if (charset != null) { reader.setEncoding(charset); } Document document = null; try { document = reader.read(file); } catch (DocumentException e) { e.printStackTrace(); } return document; }
public PrivacyListProvider() { super(); // Initialize the pool of sax readers for (int i = 0; i < POOL_SIZE; i++) { SAXReader xmlReader = new SAXReader(); xmlReader.setEncoding("UTF-8"); xmlReaders.add(xmlReader); } // Load the total number of privacy lists in the database. We're looking // for the (very common) special case that there are no privacy lists stored. // In that case, we can optimize away many database calls. In the future, a // better general-case solution may be to cache all privacy lists defined // if there are less than, say, 500. privacyListCount = new AtomicInteger(0); loadPrivacyListCount(); }
/** * @param url * @param charset * @return * @throws DocumentException */ public static Document read(URL url, String charset) { if (url == null) { return null; } SAXReader reader = new SAXReader(); if (charset != null) { reader.setEncoding(charset); } Document document = null; try { document = reader.read(url); } catch (DocumentException e) { log.error("通过指向xml文件的文件获得Document对象时出错 !", e); // e.printStackTrace(); } return document; }
// parse the XML model data to PTNet private void ParseModelToPTNet(Model model) { if (this.model == null) return; else { ByteArrayInputStream stream; SAXReader reader = new SAXReader(); Document document; Element root = null; try { stream = new ByteArrayInputStream(model.XMLContent.getBytes()); reader.setEncoding("utf-8"); document = reader.read(stream); root = document.getRootElement(); } catch (DocumentException e) { Log.getLogger(Config.FLOW).error(e.getMessage()); e.printStackTrace(); } ForwardPlace sp = (ForwardPlace) newPlace("ForwardPlace"); new Parser().parse(this, sp, root, null, false); Log.getLogger(Config.FLOW).debug(getPtnet()); } }
public void navegar() { try { File aFile = new File(NuevoProyecto.archivo); SAXReader xmlReader = new SAXReader(); // xmlReader.setEncoding("UTF-8"); // ******************************************** xmlReader.setEncoding("iso-8859-1"); Document doc = xmlReader.read(aFile); Element node = (Element) doc.selectSingleNode("//vivienda"); for (Iterator i = node.elementIterator(); i.hasNext(); ) { node = (Element) i.next(); if (!node.getName().equals("email")) { if (node.valueOf("@alias").equals(EstanciaNueva.seleccionado)) { // ESTEFANÍA: Añadido para que se coja la imagen del plano desde la carpeta // donde está el ejecutable. Esto se hace para que no hayan rutas absolutas // y si se cambia la carpeta de ejecutables de directorio, siga funcionando. // SIEMPRE Y CUANDO, SE MANTENGA EL MISMO NOMBRE DE LA CARPETA DONDE SE SACARON // LAS IMÁGENES DE LOS PLANOS AL CREAR EL PROYECTO, Y EL MISMO NOMBRE DE LA // IMAGEN. File dir_iniciall = new File("./"); String a = dir_iniciall.getAbsolutePath(); System.out.println("raiz=" + a); int ind = EstanciaNueva.imagen_e.indexOf(a); int lon = a.length(); String b = EstanciaNueva.imagen_e.substring(ind + lon); System.out.println("ruta relativa=" + b); System.out.println("ruta absoluta=" + EstanciaNueva.imagen_e); org.dom4j.Element anadir = node.addElement("estancia") .addAttribute( "nombre", EstanciaNueva.nombre_e) // .addAttribute("imagen", estancia_nueva.imagen_e) .addAttribute("imagen", b); break; } // end if auxi } // end if } // end for String auxiliar = doc.asXML(); FileWriter archivo; archivo = new FileWriter(NuevoProyecto.archivo); OutputFormat format = OutputFormat.createPrettyPrint(); // format.setEncoding("UTF-8"); format.setEncoding("iso-8859-1"); XMLWriter writer = new XMLWriter(new FileWriter(NuevoProyecto.archivo), format); writer.write(doc); writer.close(); // acciones.inicializarEstancia(estancia_nueva.nombre_e ); Acciones.inicializarEstancia(EstanciaNueva.seleccionado, EstanciaNueva.nombre_e); } catch (IOException e) { e.printStackTrace(); } catch (DocumentException e) { e.printStackTrace(); } } // end navegar