@Override public BxDocument segmentDocument(BxDocument document) throws AnalysisException { Map<BxPage, List<Component>> componentMap = new HashMap<BxPage, List<Component>>(); ExecutorService exec = Executors.newFixedThreadPool(PdfNLMContentExtractor.THREADS_NUMBER); ArrayList<Callable<NumBxPage>> tasks = new ArrayList<Callable<NumBxPage>>(); for (BxPage page : document.getPages()) { tasks.add(new ComponentCounter(page)); } List<Future<NumBxPage>> results; try { results = exec.invokeAll(tasks); exec.shutdown(); for (Future<NumBxPage> result : results) { NumBxPage p = result.get(); componentMap.put(p.page, p.components); } } catch (ExecutionException ex) { throw new AnalysisException("Cannot segment pages!", ex); } catch (InterruptedException ex) { throw new AnalysisException("Cannot segment pages!", ex); } this.computeDocumentOrientation(componentMap); BxDocument output = new BxDocument(); BxPage[] pages = new BxPage[document.getPages().size()]; exec = Executors.newFixedThreadPool(PdfNLMContentExtractor.THREADS_NUMBER); tasks = new ArrayList<Callable<NumBxPage>>(); int i = 0; for (BxPage page : document.getPages()) { tasks.add(new SingleSegmenter(page, i++)); } try { results = exec.invokeAll(tasks); exec.shutdown(); for (Future<NumBxPage> result : results) { NumBxPage p = result.get(); pages[p.index] = p.page; } for (BxPage p : pages) { if (p.getBounds() != null) { output.addPage(p); } } return output; } catch (ExecutionException ex) { throw new AnalysisException("Cannot segment pages!", ex); } catch (InterruptedException ex) { throw new AnalysisException("Cannot segment pages!", ex); } }
public BxDocument setPages(Collection<BxPage> pages) { if (pages != null) { this.pages.clear(); curPageNumber = 0; for (BxPage page : pages) { addPage(page); } } return this; }