public static void main(String[] args) throws AnalysisException, IOException, TransformationException { SegmentationEvaluator evaluator = new SegmentationEvaluator(); evaluator.ignoredLabels.add(BxZoneLabel.BODY_TABLE); evaluator.ignoredLabels.add(BxZoneLabel.BODY_FIGURE); evaluator.ignoredLabels.add(BxZoneLabel.BODY_EQUATION); File file = new File(args[0]); Collection<File> files = FileUtils.listFiles(file, new String[] {"xml"}, true); Results results = evaluator.newResults(); int i = 0; double zoneScores = 0; double lineScores = 0; double wordScores = 0; BxDocument origDoc; BxDocument testDoc; FileReader reader; for (File filee : files) { System.out.println(new Date(System.currentTimeMillis())); System.out.println(filee.getName()); reader = new FileReader(filee); origDoc = evaluator.prepareExpectedDocument(evaluator.readDocument(reader)); testDoc = evaluator.prepareActualDocument(origDoc); Results docRes = evaluator.compareDocuments(origDoc, testDoc); results.add(docRes); zoneScores += results.zoneLevel.getScore(); lineScores += results.lineLevel.getScore(); wordScores += results.wordLevel.getScore(); System.out.println(++i); } zoneScores /= i; lineScores /= i; wordScores /= i; System.out.println("Documents: " + i); System.out.println("Average zone score: " + zoneScores); System.out.println("Average line score: " + lineScores); System.out.println("Average word score: " + wordScores); results.printSummary(); }
public static void main(String[] args) throws ParseException, IOException, TransformationException, AnalysisException, CloneNotSupportedException { Options options = new Options(); CommandLineParser parser = new GnuParser(); CommandLine line = parser.parse(options, args); if (args.length != 2) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(" [-options] input-directory extension", options); System.exit(1); } String inputDirPath = line.getArgs()[0]; File inputDirFile = new File(inputDirPath); Integer docIdx = 0; HierarchicalReadingOrderResolver ror = new HierarchicalReadingOrderResolver(); DocumentsIterator iter = new DocumentsIterator(inputDirPath, line.getArgs()[1]); FeatureVectorBuilder<BxZone, BxPage> metaVectorBuilder = SVMMetadataZoneClassifier.getFeatureVectorBuilder(); FeatureVectorBuilder<BxZone, BxPage> initialVectorBuilder = SVMInitialZoneClassifier.getFeatureVectorBuilder(); SampleFilter metaSamplesFilter = new SampleFilter(BxZoneLabelCategory.CAT_METADATA); FileWriter initialStream = new FileWriter("initial_" + inputDirFile.getName() + ".dat"); BufferedWriter svmInitialFile = new BufferedWriter(initialStream); FileWriter metaStream = new FileWriter("meta_" + inputDirFile.getName() + ".dat"); BufferedWriter svmMetaFile = new BufferedWriter(metaStream); for (BxDocument doc : iter) { System.out.println(docIdx + ": " + doc.getFilename()); String filename = doc.getFilename(); doc = ror.resolve(doc); doc.setFilename(filename); for (BxZone zone : doc.asZones()) { if (zone.getLabel() != null) { if (zone.getLabel().getCategory() != BxZoneLabelCategory.CAT_METADATA) { zone.setLabel(zone.getLabel().getGeneralLabel()); } } else { zone.setLabel(BxZoneLabel.OTH_UNKNOWN); } } List<TrainingSample<BxZoneLabel>> newMetaSamples = BxDocsToTrainingSamplesConverter.getZoneTrainingSamples( doc, metaVectorBuilder, BxZoneLabel.getIdentityMap()); newMetaSamples = metaSamplesFilter.pickElements(newMetaSamples); List<TrainingSample<BxZoneLabel>> newInitialSamples = BxDocsToTrainingSamplesConverter.getZoneTrainingSamples( doc, initialVectorBuilder, BxZoneLabel.getLabelToGeneralMap()); for (TrainingSample<BxZoneLabel> sample : newMetaSamples) { toLibSVM(sample, svmMetaFile); } for (TrainingSample<BxZoneLabel> sample : newInitialSamples) { toLibSVM(sample, svmInitialFile); } ++docIdx; } svmInitialFile.close(); svmMetaFile.close(); }