/** * Reads an XML document from an input source and copies its values into the specified object * * @param ob The object to receive the values * @param source The location of the XML document * @throws IOException If there is an error reading the document */ public void readObject(Object ob, InputSource source) throws IOException { try { // Create a document builder to read the document DocumentBuilder builder = factory.newDocumentBuilder(); // Read the document Document doc = builder.parse(source); // Get the root element Element element = doc.getDocumentElement(); // Copy the root element into the bean readObject(ob, element); } catch (SAXException exc) { throw new IOException("Error parsing XML document: " + exc.toString()); } catch (ParserConfigurationException exc) { throw new IOException("Error parsing XML document: " + exc.toString()); } }
public static void main(String[] args) { try { DocumentBuilderFactory db = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = db.newDocumentBuilder(); Document dom = builder.parse("data/geographic-area-data.html"); XPathFactory xpath = XPathFactory.newInstance(); XPath path = xpath.newXPath(); XPathExpression table = path.compile("//div[@id='mw-content-text']/table[contains(@class,'wikitable')]/tr"); NodeList wikiData = (NodeList) table.evaluate(dom, XPathConstants.NODESET); NodeList children; String currentData, cleanData; /* Open output stream */ FileWriter fstream = new FileWriter("data/parsed.yaml"); BufferedWriter out = new BufferedWriter(fstream); for (int i = 0; i < wikiData.getLength(); i++) { if (i == 0) { continue; } out.write(new Integer(i).toString() + ":\n"); children = wikiData.item(i).getChildNodes(); for (int j = 0; j < children.getLength(); j++) { currentData = (String) children.item(j).getTextContent(); switch (j) { case 0: /* Current Data is empty */ break; case 1: cleanData = decompose(currentData).trim().replaceAll("[^a-zA-Z\\s]+", ""); out.write("\t\"Geographic entity\": \"" + cleanData + "\"\n"); break; case 2: /* Current Data is empty */ break; case 3: cleanData = decompose(currentData).trim().replaceAll(",", ""); out.write("\t\"Area\": \"" + cleanData + "\"\n"); break; case 4: /* Current Data is empty */ break; case 5: cleanData = decompose(currentData).trim(); out.write("\t\"Notes\": \"" + cleanData + "\"\n"); break; case 6: /* Current Data is empty */ break; default: /* System.out.println("[" + j + "] Hit default case statement. Current Data is: " + currentData); */ break; } } } /* Close output stream */ out.close(); } catch (Exception e) { System.out.println(e); } }