Document parseDocument(String filename) throws Exception {
   FileReader reader = new FileReader(filename);
   String firstLine = new BufferedReader(reader).readLine();
   reader.close();
   Document document = null;
   if (firstLine.startsWith("<?xml")) {
     System.err.println("XML detected; using default XML parser.");
   } else {
     try {
       Class nekoParserClass = Class.forName("org.cyberneko.html.parsers.DOMParser");
       Object parser = nekoParserClass.newInstance();
       Method parse = nekoParserClass.getMethod("parse", new Class[] {String.class});
       Method getDocument = nekoParserClass.getMethod("getDocument", new Class[0]);
       parse.invoke(parser, filename);
       document = (Document) getDocument.invoke(parser);
     } catch (Exception e) {
       System.err.println("NekoHTML HTML parser not found; HTML4 support disabled.");
     }
   }
   if (document == null) {
     DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
     try { // http://www.w3.org/blog/systeam/2008/02/08/w3c_s_excessive_dtd_traffic
       factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
     } catch (ParserConfigurationException e) {
       System.err.println("Warning: Could not disable external DTD loading");
     }
     DocumentBuilder builder = factory.newDocumentBuilder();
     document = builder.parse(filename);
   }
   return document;
 }
コード例 #2
0
ファイル: DocHiveTemplate.java プロジェクト: QuLogic/dochive
  // -----------------------------------------------------
  // Description: Determine if a template exists for the
  // given file and return true on success.
  // -----------------------------------------------------
  boolean templateExistFor(String fileName, String destinationDirectory) {

    templateIdentified = false;

    // from template
    int intOriginX = 0;
    int intOriginY = 0;

    // from template
    int intX = 0;
    int intY = 0;
    int intWidth = 0;
    int intHeight = 0;

    // calculated
    int viaTemplateX = 0;
    int viaTemplateY = 0;
    int viaTemplatePlusWidth = 0;
    int viaTemplatePlusHeight = 0;

    String fileName_woext = fileName.substring(0, fileName.lastIndexOf('.'));
    String pathPlusFileName_woext =
        destinationDirectory + File.separator + fileName_woext + File.separator + fileName_woext;

    try {

      File folder = new File(templateLocation);
      File[] listOfFiles = folder.listFiles();

      long startTime = System.currentTimeMillis();

      for (File file : listOfFiles) {

        if (file.isFile()) {

          templateName = "";

          DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
          DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
          Document doc = dBuilder.parse(file);
          doc.getDocumentElement().normalize();

          System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
          NodeList nList = doc.getElementsByTagName("condition");

          for (int temp = 0; temp < nList.getLength(); temp++) {

            Node nNode = nList.item(temp);
            if (nNode.getNodeType() == Node.ELEMENT_NODE) {

              Element eElement = (Element) nNode;

              String myX = getTagValue("x", eElement);
              String myY = getTagValue("y", eElement);
              String myWidth = getTagValue("width", eElement);
              String myHeight = getTagValue("height", eElement);
              String mySuffix = getTagValue("suffix", eElement);
              String myRequirements = getTagValue("required", eElement);

              intX = Integer.parseInt(myX);
              intY = Integer.parseInt(myY);
              intWidth = Integer.parseInt(myWidth);
              intHeight = Integer.parseInt(myHeight);

              viaTemplateX = intX - (intOriginX - pageX);
              viaTemplateY = intY - (intOriginY - pageY);
              viaTemplatePlusWidth = viaTemplateX + intWidth;
              viaTemplatePlusHeight = viaTemplateY + intHeight;

              Spawn.execute(
                  Settings.Programs.CONVERT.path(),
                  pathPlusFileName_woext + "_trim.png",
                  "-crop",
                  String.format("%dx%d+%d+%d", intWidth, intHeight, viaTemplateX, viaTemplateY),
                  "+repage",
                  pathPlusFileName_woext + "_" + mySuffix + ".png");
              Spawn.execute(
                  Settings.Programs.CONVERT.path(),
                  pathPlusFileName_woext + "_trim.png",
                  "-fill",
                  "none",
                  "-stroke",
                  "red",
                  "-strokewidth",
                  "3",
                  "-draw",
                  String.format(
                      "rectangle %d,%d %d,%d",
                      viaTemplateX, viaTemplateY, viaTemplatePlusWidth, viaTemplatePlusHeight),
                  "+repage",
                  pathPlusFileName_woext + "_draw.png");
              Spawn.execute(
                  Settings.Programs.TESSERACT.path(),
                  pathPlusFileName_woext + "_" + mySuffix + ".png",
                  pathPlusFileName_woext + "_" + mySuffix);

              String line = ""; // String that holds current file line
              String accumulate = "";

              try {
                FileReader input = new FileReader(pathPlusFileName_woext + "_" + mySuffix + ".txt");
                BufferedReader bufRead = new BufferedReader(input);

                line = bufRead.readLine();

                while (line != null) {
                  accumulate += line;
                  line = bufRead.readLine();
                }

                bufRead.close();
                input.close();

              } catch (IOException e) {
                e.printStackTrace();
              }

              String[] requirements = myRequirements.split("#");
              for (String requirement : requirements) {
                if (requirement.equals(accumulate.trim())) {
                  templateName = file.getName();
                  templateIdentified = true;
                  return templateIdentified;
                }
              }
            }
          }
        }
      }

      // record end time
      long endTime = System.currentTimeMillis();
      System.out.println(
          "Template Search [" + fileName + "] " + (endTime - startTime) / 1000 + " seconds");

    } catch (Exception e) {
      e.printStackTrace();
    }

    return templateIdentified;
  }
コード例 #3
0
ファイル: Generate.java プロジェクト: EmanLeli/jhmm
  public static void main(String args[]) {

    if (args.length < 3) {
      System.err.println("Usage: Generate model-file ( -s length | -c count length )");
      System.exit(1);
    }

    try {
      SAXParserFactory saxFactory = SAXParserFactory.newInstance();
      saxFactory.setNamespaceAware(true);
      SAXParser saxParser = saxFactory.newSAXParser();
      XMLReader xmlReader = saxParser.getXMLReader();

      XMLModelReader modelReader = new XMLModelReader(xmlReader);
      FileReader modelInput = new FileReader(args[0]);
      InputSource source = new InputSource(modelInput);

      Model model = modelReader.load(source);

      modelInput.close();

      model.check();

      if (args[1].equals("-s")) {
        int[][] result = model.generateSequence(Integer.parseInt(args[2]));

        List lexicon = model.getLexicon();
        for (int i = 0; i < result[0].length; i++) {
          System.out.print((Character) lexicon.get(result[0][i]));
        }
        System.out.println();
        int numberOfStates = model.getNumberOfStates();
        if (numberOfStates > 9) {
          for (int i = 0; i < result[1].length; i++) {
            System.out.print(result[1][i]);
            System.out.print(',');
          }
        } else {
          for (int i = 0; i < result[1].length; i++) {
            System.out.print(result[1][i]);
          }
        }
        System.out.println();
        for (int i = 1; i < numberOfStates; i++) {
          System.out.println(i + " = " + model.getStateName(i));
        }
      } else if (args[1].equals("-c")) {
        List lexicon = model.getLexicon();
        int count = Integer.parseInt(args[2]);
        int length = Integer.parseInt(args[3]);
        ListOfSequences uniqueSeqs = new ListOfSequences();
        for (int i = 0; i < count; i++) {
          int[][] result = model.generateSequence(length);
          uniqueSeqs.addSequence(result[0]);
        }
        Iterator seqs = uniqueSeqs.iterator();
        while (seqs.hasNext()) {
          SequenceCount seq = (SequenceCount) seqs.next();
          int[] key = seq.getSequence();
          for (int i = 0; i < key.length; i++) {
            System.out.print((Character) lexicon.get(key[i]));
          }
          System.out.print(',');
          System.out.println(seq.getCount());
        }
      }

    } catch (java.io.IOException ex) {
      ex.printStackTrace();
    } catch (org.xml.sax.SAXException ex) {
      System.err.println(ex.getMessage());
    } catch (javax.xml.parsers.ParserConfigurationException ex) {
      ex.printStackTrace();
    }
  }