@Override public synchronized String nextSentence() { if (sentences == null || !sentences.hasNext()) { try { if (getReader().hasNext()) { CAS cas = resource.retrieve(); try { getReader().getNext(cas); } catch (Exception e) { log.warn("Done iterating returning an empty string"); return ""; } resource.getAnalysisEngine().process(cas); List<String> list = new ArrayList<>(); for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) { list.add(sentence.getCoveredText()); } sentences = list.iterator(); // needs to be next cas while (!sentences.hasNext()) { // sentence is empty; go to another cas if (reader.hasNext()) { cas.reset(); getReader().getNext(cas); resource.getAnalysisEngine().process(cas); for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) { list.add(sentence.getCoveredText()); } sentences = list.iterator(); } else return null; } String ret = sentences.next(); if (this.getPreProcessor() != null) ret = this.getPreProcessor().preProcess(ret); return ret; } return null; } catch (Exception e) { throw new RuntimeException(e); } } else { String ret = sentences.next(); if (this.getPreProcessor() != null) ret = this.getPreProcessor().preProcess(ret); return ret; } }
public static void main(String[] args) throws Exception { String sLine; long startTime = System.currentTimeMillis(); URL descUrl = VectorSpaceRetrieval.class.getResource( "/descriptors/retrievalsystem/VectorSpaceRetrieval.xml"); if (descUrl == null) { throw new IllegalArgumentException("Error opening VectorSpaceRetrieval.xml"); } // create AnalysisEngine XMLInputSource input = new XMLInputSource(descUrl); AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(input); AnalysisEngine anAnalysisEngine = UIMAFramework.produceAnalysisEngine(desc); CAS aCas = anAnalysisEngine.newCAS(); URL docUrl = VectorSpaceRetrieval.class.getResource("/data/documents.txt"); if (docUrl == null) { throw new IllegalArgumentException("Error opening data/documents.txt"); } BufferedReader br = new BufferedReader(new InputStreamReader(docUrl.openStream())); while ((sLine = br.readLine()) != null) { aCas.setDocumentText(sLine); anAnalysisEngine.process(aCas); aCas.reset(); } br.close(); br = null; anAnalysisEngine.collectionProcessComplete(); anAnalysisEngine.destroy(); long endTime = System.currentTimeMillis(); double totalTime = (endTime - startTime) / 1000.0; System.out.println("Total time taken: " + totalTime); }