Пример #1
0
 private void init(File ngtName) throws Exception {
   br = new BufferedReader(new FileReader(ngtName));
   trans =
       new Transcoder(
           ClassLoader.getSystemResourceAsStream("eu/himeros/resources/transcoders/low2up.txt"));
   ngtTm = new TreeMap<>();
   ngtAl = new ArrayList<>(300000);
   prolog = new StringBuilder("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n");
   prolog.append(
       "<head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n");
   prolog.append("<meta content=\"riguadon 0.3\" name=\"ocr-system\" />\n");
   prolog.append("<meta name=\"ocr-nmber-of-pages\" content=\"???\" />\n");
   prolog.append("<meta name=\"ocr-langs\" content=\"grc lat\" />\n");
   prolog.append("<meta content=\"ocr_line ocr_page\" name=\"ocr-capabilities\" />\n");
   prolog.append("<link href=\"hocraggregate.css\" rel=\"stylesheet\" type=\"text/css\"/>\n");
   String line;
   String it1;
   String it2;
   String it3;
   String key;
   while ((line = br.readLine()) != null) {
     ngtAl.add(line);
   }
   ngtAl.add("###");
   ngtAl.add("###");
   for (int i = 0; i < ngtAl.size() - 2; i++) {
     it1 = trans.parse(ngtAl.get(i));
     it2 = trans.parse(ngtAl.get(i + 1));
     it3 = trans.parse(ngtAl.get(i + 2));
     key = (new StringBuilder(it1)).append(it2).append(it3).toString();
     if (ngtTm.containsKey(key)) {
       ngtTm.remove(key);
     } else {
       ngtTm.put(key, i);
     }
   }
   br.close();
 }
Пример #2
0
 private void parseOcrWord(Element ocrWord) {
   String word = trans.parse(ocrWord.getText()).replaceAll("[^Α-Ω’]*", "").trim();
   if (word.length() == 0) return;
   ocrAl.add(word);
 }