@Override public void getNext(CAS aCAS) throws IOException, CollectionException { JCas jcas; try { jcas = aCAS.getJCas(); } catch (CASException e) { throw new CollectionException(e); } try { // parse the xml file File xmlFile = GlobalFileStorage.getInstance().poll(); System.out.println("Process file: " + xmlFile.getName()); SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser sp = spf.newSAXParser(); XMLReader xr = sp.getXMLReader(); LinkedList<String[]> textElements = new LinkedList<>(); FragmentContentHandler fch = new FragmentContentHandler(xr, textElements); xr.setContentHandler(fch); xr.parse(new InputSource(new FileInputStream(xmlFile))); StringBuilder docText = new StringBuilder(); for (String[] element : textElements) { int start = docText.length(); int end = start + element[1].length(); docText.append(element[1] + "\n\n"); Section section = new Section(jcas, start, end); section.setValue(element[0]); section.addToIndexes(); } jcas.setDocumentText(docText.toString().trim()); jcas.setDocumentLanguage(language); DocumentMetaData docMetaData = DocumentMetaData.create(aCAS); docMetaData.setDocumentTitle(xmlFile.getName()); docMetaData.setDocumentId(xmlFile.getAbsolutePath()); docMetaData.setDocumentBaseUri("file:" + xmlFile.getParentFile().getAbsolutePath()); docMetaData.setDocumentUri("file:" + xmlFile.getAbsolutePath()); } catch (Exception e) { // e.printStackTrace(); throw new CollectionException(e); } }
@Override public boolean hasNext() throws IOException, CollectionException { return !GlobalFileStorage.getInstance().isEmpty(); }