/** * Stores the specified source to the specified file. * * @param in input source * @param file target file * @throws IOException I/O exception */ public static void store(final InputSource in, final IOFile file) throws IOException { // add directory if it does not exist anyway file.dir().md(); final PrintOutput po = new PrintOutput(file.path()); try { final Reader r = in.getCharacterStream(); final InputStream is = in.getByteStream(); final String id = in.getSystemId(); if (r != null) { for (int c; (c = r.read()) != -1; ) po.utf8(c); } else if (is != null) { for (int b; (b = is.read()) != -1; ) po.write(b); } else if (id != null) { final BufferInput bi = new BufferInput(IO.get(id)); try { for (int b; (b = bi.read()) != -1; ) po.write(b); } finally { bi.close(); } } } finally { po.close(); } }
private static InputSource getConfigSource(File pAppFile) throws FileNotFoundException { InputSource retSource = null; String uri = FileUtils.createFileURL(pAppFile); FileInputStream fileInputStream = new FileInputStream(pAppFile); retSource = new InputSource(fileInputStream); retSource.setSystemId(uri); return (retSource); }
public static void readerToSAX( Reader reader, String systemId, XMLReceiver xmlReceiver, XMLUtils.ParserConfiguration parserConfiguration, boolean handleLexical) { final InputSource inputSource = new InputSource(reader); inputSource.setSystemId(systemId); inputSourceToSAX(inputSource, xmlReceiver, parserConfiguration, handleLexical); }
private static InputSource getConfigSource(Class pAppClass) throws DataNotFoundException { URL resource = pAppClass.getResource(CONFIG_FILE_NAME); String resourceFileName = resource.toExternalForm(); File resourceFile = new File(resourceFileName); InputStream configResourceStream = pAppClass.getResourceAsStream(CONFIG_FILE_NAME); if (null == configResourceStream) { throw new DataNotFoundException( "unable to find XML configuration file resource: " + CONFIG_FILE_NAME + " for class: " + pAppClass.getName()); } InputSource inputSource = new InputSource(configResourceStream); if (!resourceFile.exists()) { inputSource.setSystemId(resourceFileName); } return (inputSource); }
public static void parse(String fileNameOrURL, RDFParser parser, Model model) throws IOException, SAXException, MalformedURLException, ModelException { URL url = new URL(normalizeURI(fileNameOrURL)); // maybe this model is loaded as schema... // Model model = factory.registry().get(url.toString()); // if(model != null) // return model; // Prepare input source model.setSourceURI(url.toString()); InputStream in = url.openStream(); InputSource source = new InputSource(in); source.setSystemId(url.toString()); parser.parse(source, new ModelConsumer(model)); in.close(); }
/** * Read a URL into SAX events. * * @param systemId system id of the document * @param xmlReceiver receiver to output to * @param parserConfiguration parser configuration * @param handleLexical whether the XML parser must output SAX LexicalHandler events, including * comments */ public static void urlToSAX( String systemId, XMLReceiver xmlReceiver, XMLUtils.ParserConfiguration parserConfiguration, boolean handleLexical) { try { final URL url = URLFactory.createURL(systemId); final InputStream is = url.openStream(); final InputSource inputSource = new InputSource(is); inputSource.setSystemId(systemId); try { inputSourceToSAX(inputSource, xmlReceiver, parserConfiguration, handleLexical); } finally { is.close(); } } catch (IOException e) { throw new OXFException(e); } }
private Document GetXMLDocument(String url) { try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); InputStream input = HTMLTools.inputStream_GET(url, 5000); InputStreamReader reader = new InputStreamReader(input, ENCODING_UTF8); InputSource inSrc = new InputSource(reader); inSrc.setEncoding(ENCODING_UTF8); return db.parse(inSrc); } catch (ParserConfigurationException pce) { Print.logError("Parse error: " + pce); return null; } catch (SAXException se) { Print.logError("Parse error: " + se); return null; } catch (IOException ioe) { Print.logError("IO error: " + ioe); return null; } }
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { final InputSource is = new InputSource(); is.setSystemId(systemId); is.setPublicId(publicId); final URL url = URLFactory.createURL(systemId); // Would be nice to support XML Catalogs or similar here. See: // http://xerces.apache.org/xerces2-j/faq-xcatalogs.html if (url.getProtocol().equals("http")) { logger.warn( "XML entity resolver for public id: " + publicId + " is accessing external entity via HTTP: " + url.toExternalForm()); } is.setByteStream(url.openConnection().getInputStream()); return is; }
/** SAX entity resolver */ public InputSource resolveEntity(String name, String uri) throws IOException, SAXException { InputSource retval; String mappedURI = name2uri(name); InputStream stream = mapResource(name); // prefer explicit URI mappings, then bundled resources... if (mappedURI != null) { retval = new InputSource(mappedURI); retval.setPublicId(name); return retval; } else if (stream != null) { uri = "java:resource:" + (String) id2resource.get(name); // NOI18N retval = new InputSource(stream); retval.setPublicId(name); return retval; } else { return null; } }
@Override public String parseFulltext(String html) { if (!judge(html)) return null; Parser theParser = null; HTMLSchema theSchema = null; XMLReader r; if (theParser == null) theParser = new Parser(); r = theParser; theSchema = new HTMLSchema(); try { r.setProperty(Parser.schemaProperty, theSchema); r.setFeature(Parser.namespacesFeature, false); } catch (SAXNotRecognizedException e) { logger.fatal(e.getMessage()); return null; } catch (SAXNotSupportedException e) { logger.fatal(e.getMessage()); return null; } RejuvenationResearchParserHandler h = new RejuvenationResearchParserHandler(); r.setContentHandler(h); InputStream fin = new ByteArrayInputStream(html.getBytes()); InputSource s = new InputSource(fin); s.setEncoding("utf8"); try { r.parse(s); fin.close(); return h.getContent(); } catch (IOException e) { logger.fatal(e.getMessage()); } catch (SAXException e) { logger.fatal(e.getMessage()); } return null; }
/** * Converts an HTML document to XML. * * @param io io reference * @param opts html options * @return parser * @throws IOException I/O exception */ private static IO toXML(final IO io, final HtmlOptions opts) throws IOException { // reader could not be initialized; fall back to XML if (READER == null) return io; try { // tries to extract the encoding from the input final TextInput ti = new TextInput(io); String enc = ti.encoding(); final byte[] content = ti.content(); // looks for a charset definition final byte[] encoding = token("charset="); int cs = indexOf(content, encoding); if (cs > 0) { // extracts the encoding string cs += encoding.length; int ce = cs; final int cl = content.length; while (++ce < cl && content[ce] > 0x28) ; enc = string(substring(content, cs, ce)); } // define input final InputSource is = new InputSource(new ArrayInput(content)); is.setEncoding(supported(enc) ? normEncoding(enc) : UTF8); // define output final StringWriter sw = new StringWriter(); final XMLReader reader = (XMLReader) Reflect.get(READER); final Object writer = Reflect.get(WRITER, sw); // set TagSoup options if (opts.get(HtmlOptions.HTML)) { reader.setFeature("http://xml.org/sax/features/namespaces", false); opt("method", "html"); opt("omit-xml-declaration", "yes"); } if (opts.get(HtmlOptions.NONS)) reader.setFeature("http://xml.org/sax/features/namespaces", false); if (opts.get(HtmlOptions.OMITXML)) opt("omit-xml-declaration", "yes"); if (opts.get(HtmlOptions.NOBOGONS)) reader.setFeature(FEATURES + "ignore-bogons", true); if (opts.get(HtmlOptions.NODEFAULTS)) reader.setFeature(FEATURES + "default-attributes", false); if (opts.get(HtmlOptions.NOCOLONS)) reader.setFeature(FEATURES + "translate-colons", true); if (opts.get(HtmlOptions.NORESTART)) reader.setFeature(FEATURES + "restart-elements", false); if (opts.get(HtmlOptions.IGNORABLE)) reader.setFeature(FEATURES + "ignorable-whitespace", true); if (opts.get(HtmlOptions.EMPTYBOGONS)) reader.setFeature(FEATURES + "bogons-empty", true); if (opts.get(HtmlOptions.ANY)) reader.setFeature(FEATURES + "bogons-empty", false); if (opts.get(HtmlOptions.NOROOTBOGONS)) reader.setFeature(FEATURES + "root-bogons", false); if (opts.get(HtmlOptions.NOCDATA)) reader.setFeature(FEATURES + "cdata-elements", false); if (opts.get(HtmlOptions.LEXICAL)) reader.setProperty("http://xml.org/sax/properties/lexical-handler", writer); if (opts.contains(HtmlOptions.METHOD)) opt("method", opts.get(HtmlOptions.METHOD)); if (opts.contains(HtmlOptions.DOCTYPESYS)) opt("doctype-system", opts.get(HtmlOptions.DOCTYPESYS)); if (opts.contains(HtmlOptions.DOCTYPEPUB)) opt("doctype-public", opts.get(HtmlOptions.DOCTYPEPUB)); if (opts.contains(HtmlOptions.ENCODING)) is.setEncoding(opts.get(HtmlOptions.ENCODING)); // end TagSoup options reader.setContentHandler((ContentHandler) writer); reader.parse(is); return new IOContent(token(sw.toString()), io.name()); } catch (final SAXException ex) { Util.errln(ex); return io; } }
public static MiningResult importFile(InputStream input) throws IOException { try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); Document doc; // NodeList netNodes; dbf.setValidating(false); dbf.setIgnoringComments(true); dbf.setIgnoringElementContentWhitespace(true); // dbf.setExpandEntityReferences(false); // dbf.setNamespaceAware(false); DocumentBuilder db = dbf.newDocumentBuilder(); db.setEntityResolver( new EntityResolver() { public InputSource resolveEntity(String publicId, String systemId) { if (systemId.indexOf("ARIS-Export") != -1) { return new InputSource("file:" + About.EXTLIBLOCATION() + "ARIS-Export101.dtd"); } else { return null; } } }); InputSource inpStream = new InputSource(input); inpStream.setSystemId("file:" + System.getProperty("user.dir", "")); doc = db.parse(inpStream); // check if root element is a aml tag Message.add("parsing done" + doc, Message.DEBUG); if (!(doc.getDocumentElement().getNodeName().equals("AML"))) { Message.add("aml tag not found", Message.ERROR); throw new Exception("aml tag not found"); } else { Message.add("aml root element found"); } EPCResult result = new EPCResult(null, (EPC) null); HashMap ObjDef_LinkId = new HashMap(); HashMap modelid_net = new HashMap(); HashMap ObjDef_Name = new HashMap(); HashMap function_LinkId = new HashMap(); HashMap ModelId_ModelType = new HashMap(); traverseAMLforObjectNames( ObjDef_Name, doc.getDocumentElement(), ObjDef_LinkId, ModelId_ModelType); Iterator findLinkToEpc = ObjDef_LinkId.keySet().iterator(); while (findLinkToEpc.hasNext()) { String currentObjDef = (String) findLinkToEpc.next(); String Links = (String) ObjDef_LinkId.get(currentObjDef); StringTokenizer linkSet = new StringTokenizer(Links); String realEpcLink = ""; while (linkSet.hasMoreTokens()) { String currentLink = linkSet.nextToken(); if (ModelId_ModelType.get(currentLink).equals("MT_EEPC")) { realEpcLink = currentLink; break; } } if (realEpcLink.equals(" ")) { ObjDef_LinkId.remove(currentObjDef); } else { ObjDef_LinkId.put(currentObjDef, realEpcLink); } } result = traverseAML( result, doc.getDocumentElement(), null, ObjDef_Name, ObjDef_LinkId, modelid_net, function_LinkId); Iterator hierarchicalFunctions = function_LinkId.keySet().iterator(); while (hierarchicalFunctions.hasNext()) { EPCSubstFunction f = (EPCSubstFunction) hierarchicalFunctions.next(); f.setSubstitutedEPC((EPC) modelid_net.get(function_LinkId.get(f))); // Message.add(f.getSubstitutedEPC().getName()); } return result; } catch (Throwable x) { Message.add(x.toString()); throw new IOException(x.getMessage()); } }