// ************************************************************************* // // OUTPUT FUNCTIONS // ************************************************************************* // // This method is equivalent to calling "outputAlignmentResults (null, null)" , but faster; public Stats calculateOCRaccuracy() { if (alignment.isEmpty()) { System.out.println( "RecursiveAlignmentTool::calculateOCRaccuracy(): There are no words aligned. Aligner must be run prior to calling calculateOCRaccuracy"); return null; } // align each segments characters individually long numError = 0; long total = 0; long ocrLength = 0; for (int kk = 0; kk < alignment.size(); kk++) { AlignedSequence alignedTerm = alignment.get(kk); String refout = alignedTerm.getReference(); String candidateout = alignedTerm.getCandidate(); if (candidateout != null) { ocrLength++; } if (refout != null) { total++; if (candidateout == null || !refout.equals(candidateout)) { numError++; } } } Stats st = new Stats((total - numError), ocrLength, total); return st; // double accuracy = 1.0 - (numError / (double) total); // System.out.println("OCR accuracy: " + wordAccuracy + " (matching words= " + (total - // numError) + " reference length= " + total + ")"); }
// Alignment results are written to outputfile // if both arguments are null, then this method simply calculates OCR accuracy and outputs // nothing. public Stats outputAlignmentResults(String outputfile, String errorsFilename) { long numError = 0; long total = 0; int inc = 1; int lineWidth = 0; StringBuffer candBuffer = new StringBuffer(10000); StringBuffer refBuffer = new StringBuffer(10000); boolean colFormat = OUTPUT_FORMAT.toLowerCase().startsWith("c"); // System.out.println(OUTPUT_FORMAT); String NULL_STRING = "null"; if (WORD_LEVEL_ALIGNMENT) { lineWidth = 20; NULL_STRING = "null"; } else { lineWidth = 100; NULL_STRING = "@"; } long counter = 0; long ocrLen = 0; try { Writer writer = null; Writer errorWriter = null; if (outputfile != null) { // writer = new BufferedWriter(new FileWriter(new File(outputfile))); writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(new File(outputfile)), "UTF8")); } if (errorsFilename != null) { // errorWriter = new BufferedWriter(new FileWriter(new File(errorsFilename))); errorWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(new File(errorsFilename)), "UTF8")); } for (int kk = 0; kk < alignment.size(); kk++) { AlignedSequence alignedTerm = alignment.get(kk); String refout = alignedTerm.getReference(); String candidateout = alignedTerm.getCandidate(); if (candidateout != null) { ocrLen++; } String ref = refout == null ? NULL_STRING : refout; String candidate = candidateout == null ? NULL_STRING : candidateout; if (writer != null) { if (colFormat) { writer.append(candidate + "\t" + ref + "\n"); } else { if (counter == lineWidth) { writer.append("OCR:\t" + candBuffer + "\n"); writer.append("GT :\t" + refBuffer + "\n\n"); candBuffer.delete(0, candBuffer.length()); refBuffer.delete(0, refBuffer.length()); counter = 0; // kk--; } if (!WORD_LEVEL_ALIGNMENT) { candBuffer.append(candidate); refBuffer.append(ref); } else { String tabbedCand = candidate, tabbedRef = ref; if (candidate.length() > ref.length()) { for (int t = 0; t < (candidate.length() - ref.length()); t++) { tabbedRef += " "; } } else if (candidate.length() < ref.length()) { for (int t = 0; t < (ref.length() - candidate.length()); t++) { tabbedCand += " "; } } candBuffer.append(tabbedCand).append("\t"); refBuffer.append(tabbedRef).append("\t"); } counter++; } } if (refout != null) { total++; if (candidateout == null || !refout.equals(candidateout)) { numError++; if (errorWriter != null) { errorWriter.append(candidateout + "\t" + refout + "\t" + (kk + inc) + "\n"); } } } } if (!colFormat) { writer.append("OCR:\t" + candBuffer + "\n"); writer.append("GT :\t" + refBuffer + "\n\n"); } if (writer != null) { writer.close(); } if (errorWriter != null) { errorWriter.close(); } } catch (Exception e) { System.out.println("Error. Can not write the file: " + outputfile); } Stats st = new Stats((total - numError), ocrLen, total); return st; // System.out.println("OCR accuracy: " + wordAccuracy + " (matching words= " + (total - // numError) + " reference length= " + total + ")"); }