public static void main(String[] args) {
    try {
      DocumentBuilderFactory db = DocumentBuilderFactory.newInstance();
      DocumentBuilder builder = db.newDocumentBuilder();
      Document dom = builder.parse("data/geographic-area-data.html");
      XPathFactory xpath = XPathFactory.newInstance();
      XPath path = xpath.newXPath();
      XPathExpression table =
          path.compile("//div[@id='mw-content-text']/table[contains(@class,'wikitable')]/tr");
      NodeList wikiData = (NodeList) table.evaluate(dom, XPathConstants.NODESET);

      NodeList children;
      String currentData, cleanData;

      /* Open output stream */
      FileWriter fstream = new FileWriter("data/parsed.yaml");
      BufferedWriter out = new BufferedWriter(fstream);

      for (int i = 0; i < wikiData.getLength(); i++) {
        if (i == 0) {
          continue;
        }
        out.write(new Integer(i).toString() + ":\n");
        children = wikiData.item(i).getChildNodes();
        for (int j = 0; j < children.getLength(); j++) {
          currentData = (String) children.item(j).getTextContent();
          switch (j) {
            case 0:
              /* Current Data is empty */
              break;
            case 1:
              cleanData = decompose(currentData).trim().replaceAll("[^a-zA-Z\\s]+", "");
              out.write("\t\"Geographic entity\": \"" + cleanData + "\"\n");
              break;
            case 2:
              /* Current Data is empty */
              break;
            case 3:
              cleanData = decompose(currentData).trim().replaceAll(",", "");
              out.write("\t\"Area\": \"" + cleanData + "\"\n");
              break;
            case 4:
              /* Current Data is empty */
              break;
            case 5:
              cleanData = decompose(currentData).trim();
              out.write("\t\"Notes\": \"" + cleanData + "\"\n");
              break;
            case 6:
              /* Current Data is empty */
              break;
            default:
              /* System.out.println("[" + j + "] Hit default case statement. Current Data is: " + currentData); */
              break;
          }
        }
      }
      /* Close output stream */
      out.close();
    } catch (Exception e) {
      System.out.println(e);
    }
  }
Example #2
0
  private void processTxt(Node operation) {
    List<Node> targets = getChildNodes(operation, "target");
    List<Node> optionNodes = getChildNodes(operation, "opt");
    List<Node> separatorNode = getChildNodes(operation, "separator");
    if (targets.isEmpty() || optionNodes.isEmpty()) {
      return;
    }
    String defaultSeparator = "=";
    String globalSeparator = defaultSeparator;
    if (!separatorNode.isEmpty()) {
      globalSeparator = separatorNode.get(0).getTextContent();
      if (globalSeparator.length() != 1) {
        globalSeparator = defaultSeparator;
      }
    }
    Map<String, String> options = new HashMap<String, String>();
    Map<String, String> processedOptions = new HashMap<String, String>();
    for (int i = 0; i < optionNodes.size(); i++) {
      Node option = optionNodes.get(i);
      String name = option.getAttributes().getNamedItem("name").getNodeValue();
      String value = option.getTextContent();
      if (options.containsKey(name)) {
        options.remove(name);
      }
      options.put(name, value);
    }
    for (int t = 0; t < targets.size(); t++) {
      File target = new File(absolutePath(targets.get(t).getTextContent()));
      File tmpFile = new File(Utils.timestamp());
      BufferedWriter bw = null;
      BufferedReader br = null;
      try {
        Node separatorAttr = targets.get(t).getAttributes().getNamedItem("separator");
        String separator = (separatorAttr == null) ? globalSeparator : separatorAttr.getNodeValue();
        if (separator.length() != 1) {
          separator = globalSeparator;
        }
        bw = new BufferedWriter(new FileWriter(tmpFile));
        if (target.exists()) {
          br = new BufferedReader(new FileReader(target));
          for (String line; (line = br.readLine()) != null; ) {
            String[] parts = line.split(separator);
            if (parts.length < 2) {
              bw.write(line);
              bw.newLine();
              continue;
            }

            String optName = parts[0].trim();
            if (options.containsKey(optName)) {
              String optValue = options.get(optName);
              bw.write(optName + " " + separator + " " + optValue);
              bw.newLine();
              processedOptions.put(optName, optValue);
              options.remove(optName);
            } else if (processedOptions.containsKey(optName)) {
              bw.write(optName + " " + separator + " " + processedOptions.get(optName));
              bw.newLine();
            } else {
              bw.write(line);
              bw.newLine();
            }
          }
          br.close();
        }
        for (Map.Entry<String, String> entry : options.entrySet()) {
          bw.write(entry.getKey() + " " + separator + " " + entry.getValue());
          bw.newLine();
        }
        bw.close();
        FileUtils.copyFile(tmpFile, target);
        FileUtils.forceDelete(tmpFile);
      } catch (IOException ex) {
        Utils.onError(new Error.WriteTxtConfig(target.getPath()));
      }
    }
  }