Document parseDocument(String filename) throws Exception { FileReader reader = new FileReader(filename); String firstLine = new BufferedReader(reader).readLine(); reader.close(); Document document = null; if (firstLine.startsWith("<?xml")) { System.err.println("XML detected; using default XML parser."); } else { try { Class nekoParserClass = Class.forName("org.cyberneko.html.parsers.DOMParser"); Object parser = nekoParserClass.newInstance(); Method parse = nekoParserClass.getMethod("parse", new Class[] {String.class}); Method getDocument = nekoParserClass.getMethod("getDocument", new Class[0]); parse.invoke(parser, filename); document = (Document) getDocument.invoke(parser); } catch (Exception e) { System.err.println("NekoHTML HTML parser not found; HTML4 support disabled."); } } if (document == null) { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); try { // http://www.w3.org/blog/systeam/2008/02/08/w3c_s_excessive_dtd_traffic factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); } catch (ParserConfigurationException e) { System.err.println("Warning: Could not disable external DTD loading"); } DocumentBuilder builder = factory.newDocumentBuilder(); document = builder.parse(filename); } return document; }
// ----------------------------------------------------- // Description: Determine if a template exists for the // given file and return true on success. // ----------------------------------------------------- boolean templateExistFor(String fileName, String destinationDirectory) { templateIdentified = false; // from template int intOriginX = 0; int intOriginY = 0; // from template int intX = 0; int intY = 0; int intWidth = 0; int intHeight = 0; // calculated int viaTemplateX = 0; int viaTemplateY = 0; int viaTemplatePlusWidth = 0; int viaTemplatePlusHeight = 0; String fileName_woext = fileName.substring(0, fileName.lastIndexOf('.')); String pathPlusFileName_woext = destinationDirectory + File.separator + fileName_woext + File.separator + fileName_woext; try { File folder = new File(templateLocation); File[] listOfFiles = folder.listFiles(); long startTime = System.currentTimeMillis(); for (File file : listOfFiles) { if (file.isFile()) { templateName = ""; DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); Document doc = dBuilder.parse(file); doc.getDocumentElement().normalize(); System.out.println("Root element :" + doc.getDocumentElement().getNodeName()); NodeList nList = doc.getElementsByTagName("condition"); for (int temp = 0; temp < nList.getLength(); temp++) { Node nNode = nList.item(temp); if (nNode.getNodeType() == Node.ELEMENT_NODE) { Element eElement = (Element) nNode; String myX = getTagValue("x", eElement); String myY = getTagValue("y", eElement); String myWidth = getTagValue("width", eElement); String myHeight = getTagValue("height", eElement); String mySuffix = getTagValue("suffix", eElement); String myRequirements = getTagValue("required", eElement); intX = Integer.parseInt(myX); intY = Integer.parseInt(myY); intWidth = Integer.parseInt(myWidth); intHeight = Integer.parseInt(myHeight); viaTemplateX = intX - (intOriginX - pageX); viaTemplateY = intY - (intOriginY - pageY); viaTemplatePlusWidth = viaTemplateX + intWidth; viaTemplatePlusHeight = viaTemplateY + intHeight; Spawn.execute( Settings.Programs.CONVERT.path(), pathPlusFileName_woext + "_trim.png", "-crop", String.format("%dx%d+%d+%d", intWidth, intHeight, viaTemplateX, viaTemplateY), "+repage", pathPlusFileName_woext + "_" + mySuffix + ".png"); Spawn.execute( Settings.Programs.CONVERT.path(), pathPlusFileName_woext + "_trim.png", "-fill", "none", "-stroke", "red", "-strokewidth", "3", "-draw", String.format( "rectangle %d,%d %d,%d", viaTemplateX, viaTemplateY, viaTemplatePlusWidth, viaTemplatePlusHeight), "+repage", pathPlusFileName_woext + "_draw.png"); Spawn.execute( Settings.Programs.TESSERACT.path(), pathPlusFileName_woext + "_" + mySuffix + ".png", pathPlusFileName_woext + "_" + mySuffix); String line = ""; // String that holds current file line String accumulate = ""; try { FileReader input = new FileReader(pathPlusFileName_woext + "_" + mySuffix + ".txt"); BufferedReader bufRead = new BufferedReader(input); line = bufRead.readLine(); while (line != null) { accumulate += line; line = bufRead.readLine(); } bufRead.close(); input.close(); } catch (IOException e) { e.printStackTrace(); } String[] requirements = myRequirements.split("#"); for (String requirement : requirements) { if (requirement.equals(accumulate.trim())) { templateName = file.getName(); templateIdentified = true; return templateIdentified; } } } } } } // record end time long endTime = System.currentTimeMillis(); System.out.println( "Template Search [" + fileName + "] " + (endTime - startTime) / 1000 + " seconds"); } catch (Exception e) { e.printStackTrace(); } return templateIdentified; }
public static void main(String args[]) { if (args.length < 3) { System.err.println("Usage: Generate model-file ( -s length | -c count length )"); System.exit(1); } try { SAXParserFactory saxFactory = SAXParserFactory.newInstance(); saxFactory.setNamespaceAware(true); SAXParser saxParser = saxFactory.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); XMLModelReader modelReader = new XMLModelReader(xmlReader); FileReader modelInput = new FileReader(args[0]); InputSource source = new InputSource(modelInput); Model model = modelReader.load(source); modelInput.close(); model.check(); if (args[1].equals("-s")) { int[][] result = model.generateSequence(Integer.parseInt(args[2])); List lexicon = model.getLexicon(); for (int i = 0; i < result[0].length; i++) { System.out.print((Character) lexicon.get(result[0][i])); } System.out.println(); int numberOfStates = model.getNumberOfStates(); if (numberOfStates > 9) { for (int i = 0; i < result[1].length; i++) { System.out.print(result[1][i]); System.out.print(','); } } else { for (int i = 0; i < result[1].length; i++) { System.out.print(result[1][i]); } } System.out.println(); for (int i = 1; i < numberOfStates; i++) { System.out.println(i + " = " + model.getStateName(i)); } } else if (args[1].equals("-c")) { List lexicon = model.getLexicon(); int count = Integer.parseInt(args[2]); int length = Integer.parseInt(args[3]); ListOfSequences uniqueSeqs = new ListOfSequences(); for (int i = 0; i < count; i++) { int[][] result = model.generateSequence(length); uniqueSeqs.addSequence(result[0]); } Iterator seqs = uniqueSeqs.iterator(); while (seqs.hasNext()) { SequenceCount seq = (SequenceCount) seqs.next(); int[] key = seq.getSequence(); for (int i = 0; i < key.length; i++) { System.out.print((Character) lexicon.get(key[i])); } System.out.print(','); System.out.println(seq.getCount()); } } } catch (java.io.IOException ex) { ex.printStackTrace(); } catch (org.xml.sax.SAXException ex) { System.err.println(ex.getMessage()); } catch (javax.xml.parsers.ParserConfigurationException ex) { ex.printStackTrace(); } }