@Test public void test() throws IOException { Storage storage = new MemoryStorage(); Document document; LuceneManager luceneManager = storage.getLuceneManager(); document = new Document(); document.add( new TextField("lexical", "dark and stormy night in document one", Field.Store.YES)); luceneManager.addDocument(document); DocumentTerm documentTerm; FlexibleParameters parameters; parameters = new FlexibleParameters(); parameters.addParameter("string", "It was a dark and stormy night."); parameters.addParameter("string", "It was the best of times it was the worst of times."); parameters.addParameter("tool", "StepEnabledIndexedCorpusCreator"); CorpusCreator creator = new CorpusCreator(storage, parameters); creator.run(); parameters.setParameter("corpus", creator.getStoredId()); parameters.setParameter("tool", "DocumentTermFrequencies"); DocumentTerms documentTermFrequencies; List<DocumentTerm> documentTerms; parameters.setParameter("query", "dar*"); documentTermFrequencies = new DocumentTerms(storage, parameters); documentTermFrequencies.run(); documentTerms = documentTermFrequencies.getDocumentTerms(); assertEquals(1, documentTerms.size()); documentTerm = documentTerms.get(0); assertEquals("dark", documentTerm.getTerm()); assertEquals(1, documentTerm.getRawFrequency()); assertEquals(0, documentTerm.getDocumentIndex()); parameters.setParameter("query", "it was"); documentTermFrequencies = new DocumentTerms(storage, parameters); documentTermFrequencies.run(); // we sort by reverse frequency by default documentTerms = documentTermFrequencies.getDocumentTerms(); assertEquals(2, documentTerms.size()); documentTerm = documentTerms.get(0); assertEquals(1, documentTerm.getDocumentIndex()); assertEquals("it was", documentTerm.getTerm()); assertEquals(2, documentTerm.getRawFrequency()); documentTerm = documentTerms.get(1); assertEquals(0, documentTerm.getDocumentIndex()); assertEquals("it was", documentTerm.getTerm()); assertEquals(1, documentTerm.getRawFrequency()); parameters.removeParameter("query"); documentTermFrequencies = new DocumentTerms(storage, parameters); documentTermFrequencies.run(); documentTerms = documentTermFrequencies.getDocumentTerms(); assertEquals(14, documentTerms.size()); documentTerm = documentTerms.get(0); assertEquals("it", documentTerm.getTerm()); assertEquals(2, documentTerm.getRawFrequency()); parameters.setParameter("limit", 1); documentTermFrequencies = new DocumentTerms(storage, parameters); documentTermFrequencies.run(); documentTerms = documentTermFrequencies.getDocumentTerms(); assertEquals(1, documentTerms.size()); documentTerm = documentTerms.get(0); assertEquals("it", documentTerm.getTerm()); assertEquals(2, documentTerm.getRawFrequency()); parameters.setParameter("start", 1); documentTermFrequencies = new DocumentTerms(storage, parameters); documentTermFrequencies.run(); documentTerms = documentTermFrequencies.getDocumentTerms(); assertEquals(1, documentTerms.size()); documentTerm = documentTerms.get(0); assertEquals("of", documentTerm.getTerm()); assertEquals(2, documentTerm.getRawFrequency()); parameters.setParameter("start", 50); documentTermFrequencies = new DocumentTerms(storage, parameters); documentTermFrequencies.run(); documentTerms = documentTermFrequencies.getDocumentTerms(); assertEquals(0, documentTerms.size()); // with stopwords parameters.setParameter("stopList", "stop.en.taporware.txt"); parameters.removeParameter("start"); parameters.removeParameter("limit"); documentTermFrequencies = new DocumentTerms(storage, parameters); documentTermFrequencies.run(); documentTerms = documentTermFrequencies.getDocumentTerms(); assertEquals(6, documentTerms.size()); documentTerm = documentTerms.get(0); assertEquals("times", documentTerm.getTerm()); documentTerm = documentTerms.get(documentTerms.size() - 1); assertEquals("worst", documentTerm.getTerm()); storage.destroy(); }