public static void main(String args[]) { try { SeleniumHtmlClient client = new SeleniumHtmlClient(); String testFile = null; String testSuite = null; String resultsFilename = null; for (int i = 0; i < args.length; i++) { if (args[i].equals("--host")) { i++; if (i < args.length) { client.setHost(args[i]); } else { throw new BadUsageException("--host must be followed by a hostname"); } } else if (args[i].equals("--port")) { i++; if (i < args.length) { client.setPort(Integer.parseInt(args[i])); } else { throw new BadUsageException("--port must be followed by a port number"); } } else if (args[i].equals("--browser")) { i++; if (i < args.length) { client.setBrowser(args[i]); } else { throw new BadUsageException("--browser must be followed by a browser spec"); } } else if (args[i].equals("--out")) { i++; if (i < args.length) { resultsFilename = args[i]; } else { throw new BadUsageException("--out must be followed by a filename"); } /* } else if (args[i].equals("--outdir")) { i++; if (i < args.length) { client.setResultsDir(new File(args[i])); } else { throw new BadUsageException("--outdir must be followed by a path"); } */ } else if (args[i].equals("--baseurl")) { i++; if (i < args.length) { client.setBaseUrl(args[i]); } else { throw new BadUsageException("--baseurl must be followed by a URL"); } } else if (args[i].equals("--test")) { i++; if (i < args.length) { if (testFile == null) { testFile = args[i]; } else { throw new BadUsageException("only one test file permitted"); } } else { throw new BadUsageException("--test must be followed by a test filepath"); } } else if (args[i].equals("--testsuite")) { i++; if (i < args.length) { testSuite = args[i]; } else { throw new BadUsageException("--testsuite must be followed by a testsuite filepath"); } } else if (args[i].equals("--verbose") || args[i].equals("-v")) { client.setVerbose(true); } else if (args[i].equals("--help") || args[i].equals("-h")) { printUsage(); System.exit(0); } else { throw new BadUsageException("Unknown parameter " + args[i]); } } if (testFile == null && testSuite == null) { throw new BadUsageException("No test or testsuite file specified"); } else if (testFile != null && testSuite != null) { throw new BadUsageException("A test and testsuite file cannot both be specified"); } Writer resultsWriter = null; if (resultsFilename != null) { resultsWriter = new FileWriter(resultsFilename); } else /* if (client.resultsDir == null) */ { resultsWriter = new OutputStreamWriter(System.out); } client.setResultsWriter(resultsWriter); if (testFile != null) { client.runTest(testFile); } else { client.runSuite(testSuite); } if (resultsWriter != null) resultsWriter.close(); } catch (BadUsageException e) { System.err.println("Error: " + e.getMessage()); System.err.println(); printUsage(); System.exit(1); } catch (Exception e) { e.printStackTrace(); System.exit(1); } }
/** * This is the new write method that uses XML methods to generate an XML file. * * @param location The location where the new file should be stored. * @param file The file that was being used to create this table. * @param tableID The ID of the table. * @param semanticFramework The semantic framework object. This contains get methods that are * required for the output File. * @throws IOException If the given location doesn't exist. */ private void write2(String location, File file, int tableID, SemanticFramework semanticFramework) throws IOException { try { DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); Document doc = docBuilder.newDocument(); org.w3c.dom.Element rootElement = doc.createElement("TEAFile"); doc.appendChild(rootElement); // provenance: org.w3c.dom.Element provenance = doc.createElement("provenance"); rootElement.appendChild(provenance); org.w3c.dom.Element detectionID = doc.createElement("DetectionID"); detectionID.appendChild(doc.createTextNode(tableID + "")); provenance.appendChild(detectionID); org.w3c.dom.Element fromFile = doc.createElement("fromFile"); fromFile.appendChild(doc.createTextNode(file.getName())); provenance.appendChild(fromFile); org.w3c.dom.Element fromPath = doc.createElement("fromPath"); fromPath.appendChild(doc.createTextNode(file.getAbsolutePath())); provenance.appendChild(fromPath); org.w3c.dom.Element fromPage = doc.createElement("fromPage"); fromPage.appendChild(doc.createTextNode(Integer.toString(pageNumber))); provenance.appendChild(fromPage); org.w3c.dom.Element horizontalThresholdModifier = doc.createElement("horizontalThresholdModifier"); horizontalThresholdModifier.appendChild( doc.createTextNode(this.horizontalThresholdModifier + "")); provenance.appendChild(horizontalThresholdModifier); org.w3c.dom.Element verticalThresholdModifier = doc.createElement("horizontalThresholdModifier"); verticalThresholdModifier.appendChild( doc.createTextNode(this.verticalThresholdModifier + "")); provenance.appendChild(verticalThresholdModifier); // Results: org.w3c.dom.Element results = doc.createElement("results"); rootElement.appendChild(results); org.w3c.dom.Element minX1 = doc.createElement("TableBoundaryX1"); minX1.appendChild(doc.createTextNode(this.minX1 + "")); results.appendChild(minX1); org.w3c.dom.Element maxX2 = doc.createElement("TableBoundaryX2"); maxX2.appendChild(doc.createTextNode(this.maxX2 + "")); results.appendChild(maxX2); org.w3c.dom.Element minY1 = doc.createElement("TableBoundaryY1"); minY1.appendChild(doc.createTextNode(this.minY1 + "")); results.appendChild(minY1); org.w3c.dom.Element maxY2 = doc.createElement("TableBoundaryY2"); maxY2.appendChild(doc.createTextNode(this.maxY2 + "")); results.appendChild(maxY2); org.w3c.dom.Element title1 = doc.createElement("title1"); title1.appendChild(doc.createTextNode(name)); results.appendChild(title1); org.w3c.dom.Element title2 = doc.createElement("title2"); title2.appendChild(doc.createTextNode(titleAndHeaders.toString())); results.appendChild(title2); org.w3c.dom.Element columns = doc.createElement("columns"); results.appendChild(columns); for (Column2 columnContent : dataInColumns) { org.w3c.dom.Element column = doc.createElement("column"); column.appendChild(doc.createTextNode(columnContent.toString().replace("�", ""))); columns.appendChild(column); } org.w3c.dom.Element lines = doc.createElement("lines"); results.appendChild(lines); ArrayList<ArrayList<Cell>> table = recreateTableLines(semanticFramework); for (ArrayList<Cell> line : table) { org.w3c.dom.Element XMLLine = doc.createElement("line"); XMLLine.appendChild(doc.createTextNode(line.toString().replace("�", ""))); lines.appendChild(XMLLine); } /*System.out.println("table:" + table); for(Line line : data){ if(line.getHighestY2()<=this.maxY2&&line.getLowestY1()>=this.minY1&&line.getClusterSize()>1){ org.w3c.dom.Element XMLLine = doc.createElement("line"); XMLLine.appendChild(doc.createTextNode(line.toString().replace("�", ""))); lines.appendChild(XMLLine); } else{ System.out.println("NO GOOD: " + line); } } */ if (rowSpanners.size() > 0) { org.w3c.dom.Element rowSpanners = doc.createElement("rowSpanners"); results.appendChild(rowSpanners); for (Line line : this.rowSpanners) { org.w3c.dom.Element rowSpanner = doc.createElement("rowSpanner"); rowSpanner.appendChild(doc.createTextNode(line.toString())); rowSpanners.appendChild(rowSpanner); } } // Semantics: org.w3c.dom.Element semantics = doc.createElement("tableSemantics"); rootElement.appendChild(semantics); org.w3c.dom.Element title = doc.createElement("title"); title.appendChild(doc.createTextNode(semanticFramework.getTitle().toString())); semantics.appendChild(title); org.w3c.dom.Element titleConfidence = doc.createElement("titleConfidence"); Double semanticFrameworkDouble = semanticFramework.getTitleConfidence(); titleConfidence.appendChild(doc.createTextNode(semanticFrameworkDouble.toString())); semantics.appendChild(titleConfidence); if (!semanticFramework.getRowSpanners().isEmpty()) { org.w3c.dom.Element rowSpanners = doc.createElement("subHeaders"); rowSpanners.appendChild(doc.createTextNode(semanticFramework.getRowSpanners().toString())); semantics.appendChild(rowSpanners); org.w3c.dom.Element IdentifiersConfidenceAlignment = doc.createElement("subHeadersConfidenceAlignment"); IdentifiersConfidenceAlignment.appendChild( doc.createTextNode(semanticFramework.getIdentifiersConfidenceAlignment().toString())); semantics.appendChild(IdentifiersConfidenceAlignment); org.w3c.dom.Element getIdentifiersConfidenceColumnsSpanned = doc.createElement("subHeadersConfidenceColumnsSpanned"); getIdentifiersConfidenceColumnsSpanned.appendChild( doc.createTextNode( semanticFramework.getIdentifiersConfidenceColumnsSpanned().toString())); semantics.appendChild(getIdentifiersConfidenceColumnsSpanned); org.w3c.dom.Element IdentifiersConfidenceLineDistance = doc.createElement("subHeadersConfidenceLineDistance"); IdentifiersConfidenceLineDistance.appendChild( doc.createTextNode( semanticFramework.getIdentifiersConfidenceLineDistance().toString())); semantics.appendChild(IdentifiersConfidenceLineDistance); } if (!semanticFramework.getValidatedRowSpanners().isEmpty()) { org.w3c.dom.Element rowSpanners = doc.createElement("rowSpanners"); rowSpanners.appendChild( doc.createTextNode(semanticFramework.getValidatedRowSpanners().toString())); semantics.appendChild(rowSpanners); org.w3c.dom.Element rowSpannersConfidenceAlignment = doc.createElement("rowSpannersConfidenceAlignment"); rowSpannersConfidenceAlignment.appendChild( doc.createTextNode(semanticFramework.getRowSpannersConfidenceAlignment().toString())); semantics.appendChild(rowSpannersConfidenceAlignment); org.w3c.dom.Element rowSpannersConfidenceColumnsSpanned = doc.createElement("rowSpannersConfidenceColumnsSpanned"); rowSpannersConfidenceColumnsSpanned.appendChild( doc.createTextNode( semanticFramework.getRowSpannersConfidenceColumnsSpanned().toString())); semantics.appendChild(rowSpannersConfidenceColumnsSpanned); org.w3c.dom.Element rowSpannersConfidenceLineDistance = doc.createElement("rowSpannersConfidenceLineDistance"); rowSpannersConfidenceLineDistance.appendChild( doc.createTextNode( semanticFramework.getRowSpannersConfidenceLineDistance().toString())); semantics.appendChild(rowSpannersConfidenceLineDistance); } org.w3c.dom.Element headers = doc.createElement("headers"); headers.appendChild(doc.createTextNode(semanticFramework.getHeaders().toString())); semantics.appendChild(headers); org.w3c.dom.Element headersConfidence = doc.createElement("headersConfidence"); headersConfidence.appendChild( doc.createTextNode(semanticFramework.getHeaderConfidence().toString())); semantics.appendChild(headersConfidence); // validation: org.w3c.dom.Element validation = doc.createElement("validation"); rootElement.appendChild(validation); org.w3c.dom.Element clusterCertainty = doc.createElement("columnConfidence"); clusterCertainty.appendChild( doc.createTextNode(this.validation.getClusterCertainty().toString())); validation.appendChild(clusterCertainty); org.w3c.dom.Element mostFrequentNumberOfClusters = doc.createElement("mostFrequentNumberOfClusters"); mostFrequentNumberOfClusters.appendChild( doc.createTextNode(this.validation.getMostFrequentNumberOfClusters() + "")); validation.appendChild(mostFrequentNumberOfClusters); org.w3c.dom.Element highestAmountOfClusters = doc.createElement("highestAmountOfClusters"); highestAmountOfClusters.appendChild( doc.createTextNode(this.validation.getHighestAmountOfClusters() + "")); validation.appendChild(highestAmountOfClusters); org.w3c.dom.Element highestAmountOfClustersOccurrences = doc.createElement("highestAmountOfClustersOccurrences"); highestAmountOfClustersOccurrences.appendChild( doc.createTextNode(this.validation.getHighestAmountOfClustersOccurrences() + "")); validation.appendChild(highestAmountOfClustersOccurrences); org.w3c.dom.Element clusterThreshold = doc.createElement("clusterThreshold"); clusterThreshold.appendChild(doc.createTextNode(this.validation.getLineThreshold() + "")); validation.appendChild(clusterThreshold); org.w3c.dom.Element cellsWithMissingDataAdded = doc.createElement("cellsWithMissingDataAdded"); cellsWithMissingDataAdded.appendChild( doc.createTextNode(this.validation.getCellsWithMissingDataAdded() + "")); validation.appendChild(cellsWithMissingDataAdded); if (this.validation.getCellsWithMissingDataAdded() > 0) { org.w3c.dom.Element cellsWithMissingDataAddedScores = doc.createElement("cellsWithMissingDataAddedScores"); cellsWithMissingDataAddedScores.appendChild( doc.createTextNode(this.validation.getCellsWithMissingDataAddedObjects() + "")); validation.appendChild(cellsWithMissingDataAddedScores); } org.w3c.dom.Element averageDistanceBetweenRows = doc.createElement("averageDistanceBetweenRows"); averageDistanceBetweenRows.appendChild( doc.createTextNode(this.validation.getAverageDistanceBetweenRows() + "")); validation.appendChild(averageDistanceBetweenRows); if (this.validation.getTitleConfidence().size() > 0) { org.w3c.dom.Element TitleConfidence = doc.createElement("TitleConfidence"); TitleConfidence.appendChild(doc.createTextNode(this.validation.getTitleConfidence() + "")); validation.appendChild(TitleConfidence); } org.w3c.dom.Element falsePositive = doc.createElement("falsePositive"); falsePositive.appendChild(doc.createTextNode(this.validation.getFalsePositive() + "")); validation.appendChild(falsePositive); TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); DOMSource source = new DOMSource(doc); LOGGER.info( "Written file: " + location + "\\results\\" + file.getName().substring(0, file.getName().length() - 5) + "-" + tableID + ".xml"); File file2 = new File( location + "\\results\\" + file.getName().substring(0, file.getName().length() - 5) + "-" + tableID + ".xml"); Writer output = new BufferedWriter(new FileWriter(file2)); StreamResult result = new StreamResult(output); // Output to console for testing transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); transformer.transform(source, result); output.close(); System.out.println("File saved."); } catch (ParserConfigurationException pce) { pce.printStackTrace(); } catch (TransformerConfigurationException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } }