public static void main(String[] args) { try { DocumentBuilderFactory db = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = db.newDocumentBuilder(); Document dom = builder.parse("data/geographic-area-data.html"); XPathFactory xpath = XPathFactory.newInstance(); XPath path = xpath.newXPath(); XPathExpression table = path.compile("//div[@id='mw-content-text']/table[contains(@class,'wikitable')]/tr"); NodeList wikiData = (NodeList) table.evaluate(dom, XPathConstants.NODESET); NodeList children; String currentData, cleanData; /* Open output stream */ FileWriter fstream = new FileWriter("data/parsed.yaml"); BufferedWriter out = new BufferedWriter(fstream); for (int i = 0; i < wikiData.getLength(); i++) { if (i == 0) { continue; } out.write(new Integer(i).toString() + ":\n"); children = wikiData.item(i).getChildNodes(); for (int j = 0; j < children.getLength(); j++) { currentData = (String) children.item(j).getTextContent(); switch (j) { case 0: /* Current Data is empty */ break; case 1: cleanData = decompose(currentData).trim().replaceAll("[^a-zA-Z\\s]+", ""); out.write("\t\"Geographic entity\": \"" + cleanData + "\"\n"); break; case 2: /* Current Data is empty */ break; case 3: cleanData = decompose(currentData).trim().replaceAll(",", ""); out.write("\t\"Area\": \"" + cleanData + "\"\n"); break; case 4: /* Current Data is empty */ break; case 5: cleanData = decompose(currentData).trim(); out.write("\t\"Notes\": \"" + cleanData + "\"\n"); break; case 6: /* Current Data is empty */ break; default: /* System.out.println("[" + j + "] Hit default case statement. Current Data is: " + currentData); */ break; } } } /* Close output stream */ out.close(); } catch (Exception e) { System.out.println(e); } }
private void processTxt(Node operation) { List<Node> targets = getChildNodes(operation, "target"); List<Node> optionNodes = getChildNodes(operation, "opt"); List<Node> separatorNode = getChildNodes(operation, "separator"); if (targets.isEmpty() || optionNodes.isEmpty()) { return; } String defaultSeparator = "="; String globalSeparator = defaultSeparator; if (!separatorNode.isEmpty()) { globalSeparator = separatorNode.get(0).getTextContent(); if (globalSeparator.length() != 1) { globalSeparator = defaultSeparator; } } Map<String, String> options = new HashMap<String, String>(); Map<String, String> processedOptions = new HashMap<String, String>(); for (int i = 0; i < optionNodes.size(); i++) { Node option = optionNodes.get(i); String name = option.getAttributes().getNamedItem("name").getNodeValue(); String value = option.getTextContent(); if (options.containsKey(name)) { options.remove(name); } options.put(name, value); } for (int t = 0; t < targets.size(); t++) { File target = new File(absolutePath(targets.get(t).getTextContent())); File tmpFile = new File(Utils.timestamp()); BufferedWriter bw = null; BufferedReader br = null; try { Node separatorAttr = targets.get(t).getAttributes().getNamedItem("separator"); String separator = (separatorAttr == null) ? globalSeparator : separatorAttr.getNodeValue(); if (separator.length() != 1) { separator = globalSeparator; } bw = new BufferedWriter(new FileWriter(tmpFile)); if (target.exists()) { br = new BufferedReader(new FileReader(target)); for (String line; (line = br.readLine()) != null; ) { String[] parts = line.split(separator); if (parts.length < 2) { bw.write(line); bw.newLine(); continue; } String optName = parts[0].trim(); if (options.containsKey(optName)) { String optValue = options.get(optName); bw.write(optName + " " + separator + " " + optValue); bw.newLine(); processedOptions.put(optName, optValue); options.remove(optName); } else if (processedOptions.containsKey(optName)) { bw.write(optName + " " + separator + " " + processedOptions.get(optName)); bw.newLine(); } else { bw.write(line); bw.newLine(); } } br.close(); } for (Map.Entry<String, String> entry : options.entrySet()) { bw.write(entry.getKey() + " " + separator + " " + entry.getValue()); bw.newLine(); } bw.close(); FileUtils.copyFile(tmpFile, target); FileUtils.forceDelete(tmpFile); } catch (IOException ex) { Utils.onError(new Error.WriteTxtConfig(target.getPath())); } } }