public KodiScraper parseScraper(KodiScraper scraper, List<File> common) throws Exception { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder parser = factory.newDocumentBuilder(); File scraperFile = new File(scraper.getFolder(), scraper.getScraperXml()); String xmlFile = FileUtils.readFileToString(scraperFile, "UTF-8"); xmlFile = KodiUtil.fixXmlHeader(xmlFile); Document xml; try { InputStream stream = new ByteArrayInputStream(xmlFile.getBytes(StandardCharsets.UTF_8)); xml = parser.parse(stream); } catch (SAXException e) { LOGGER.warn("Error parsing " + scraperFile + " - trying fallback"); // eg FilmAffinity.com scraper // replace all known entities with their unicode notation // this fixes the "entity 'Iacute' was referenced, but not declared" parsing problems, since // we do not have to add doctype entity declarations // might replace too much; so this is only a fallback for (String[] ent : EntityArrays.ISO8859_1_UNESCAPE()) { xmlFile = xmlFile.replace(ent[0], ent[1]); } InputStream stream = new ByteArrayInputStream(xmlFile.getBytes(StandardCharsets.UTF_8)); xml = parser.parse(stream); } Element docEl = xml.getDocumentElement(); NodeList nl = docEl.getChildNodes(); for (int i = 0; i < nl.getLength(); i++) { Node n = nl.item(i); if (n.getNodeType() == Node.ELEMENT_NODE) { Element el = (Element) n; ScraperFunction func = new ScraperFunction(); func.setName(el.getNodeName()); func.setClearBuffers(parseBoolean(el.getAttribute("clearbuffers"), true)); func.setAppendBuffer(parseAppendBuffer(el.getAttribute("dest"))); func.setDest(parseInt(el.getAttribute("dest"))); scraper.addFunction(func); // functions contain regexp expressions, so let's get those. processRegexps(func, el); } } // get all common scraper functions readScraperFunctions(scraper, common); return scraper; }
private void readScraperFunctions(KodiScraper scraper, List<File> common) { for (File file : common) { // System.out.println("parsing common file: " + file); try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder parser = factory.newDocumentBuilder(); Document xml = parser.parse(file); Element docEl = xml.getDocumentElement(); // only process xml files with scraperfunctions if (docEl.getNodeName() == "scraperfunctions") { NodeList nl = docEl.getChildNodes(); // extract all scraperfunctions for (int i = 0; i < nl.getLength(); i++) { Node n = nl.item(i); if (n.getNodeType() == Node.ELEMENT_NODE) { Element el = (Element) n; ScraperFunction func = new ScraperFunction(); func.setName(el.getNodeName()); func.setClearBuffers(parseBoolean(el.getAttribute("clearbuffers"), true)); func.setAppendBuffer(parseAppendBuffer(el.getAttribute("dest"))); func.setDest(parseInt(el.getAttribute("dest"))); scraper.addFunction(func); // functions contain regexp expressions, so let's get those. processRegexps(func, el); } } } } catch (Exception e) { LOGGER.error("problem parsing scraper function", e); } } }