public List<NewsStory> getStories(NewsCategory t) { ArrayList<NewsStory> storiesByTopic = new ArrayList<NewsStory>(); for (NewsStory s : allStoriesMap.values()) { if (s.getTopic().equals(t)) { storiesByTopic.add(s); } } return storiesByTopic; }
public List<NewsStory> findStoriesByTitle(String title) { List<NewsStory> results = new ArrayList<NewsStory>(); if (title != null) { for (NewsStory s : allStoriesMap.values()) { if (title.equalsIgnoreCase(s.getTitle())) { results.add(s); } } } return results; }
public void loadStories(boolean assignTopics) { if (documentDir == null) { throw new UnsupportedOperationException( "ERROR:" + " You must set the directory before you use the load() method!"); } if (allTopicsMap == null || allTopicsMap.size() == 0) { throw new UnsupportedOperationException( "ERROR:" + " You must load the topics before you use the load() method!"); } if (documentNames == null) { File dirFile = new File(documentDir); documentNames = dirFile.list( // skip directories new FilenameFilter() { public boolean accept(File dir, String name) { File f = new File(dir, name); return f.isFile(); } }); } allStoriesMap = new HashMap<String, NewsStory>(); for (int i = 0, n = documentNames.length; i < n; i++) { String id = String.valueOf(i); String name = documentNames[i]; NewsStory newsStory = createStory(id, name, topTerms); if (assignTopics) { String topicName = getTopicKey(name); NewsCategory newsCategory = allTopicsMap.get(topicName); newsStory.setTopic(newsCategory); } addStory(newsStory); } }
private NewsStory createStory(String docId, String docName, int topNTerms) { Content content = loadContent(documentDir, docName, topNTerms); File file = new File(documentDir, docName); NewsStory docItem = new NewsStory(docId, docName, content); try { docItem.setUrl(file.toURI().toURL().toExternalForm()); } catch (MalformedURLException e) { throw new RuntimeException("Failed to create file url for doc:" + docName); } return docItem; }
private void indexDocument(IndexWriter iw, NewsStory newsStory) throws IOException { org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorPositions(true); customType.setStoreTermVectorOffsets(false); doc.add(new Field(INDEX_FIELD_CONTENT, newsStory.getContent().getText(), customType)); doc.add(new StringField(INDEX_FIELD_URL, newsStory.getUrl(), Field.Store.YES)); doc.add(new StringField(INDEX_FIELD_DOC_ID, newsStory.getId(), Field.Store.YES)); doc.add(new TextField(INDEX_FIELD_TITLE, newsStory.getTitle(), Field.Store.YES)); iw.addDocument(doc); }
private void addStory(NewsStory newsStory) { allStoriesMap.put(newsStory.getId(), newsStory); }