/** * Updates the lucene index for a single page. * * @param page The WikiPage to check * @param text The page text to index. */ protected synchronized void updateLuceneIndex(WikiPage page, String text) { IndexWriter writer = null; log.debug("Updating Lucene index for page '" + page.getName() + "'..."); Directory luceneDir = null; try { pageRemoved(page); // Now add back the new version. luceneDir = new SimpleFSDirectory(new File(m_luceneDirectory), null); writer = getIndexWriter(luceneDir); luceneIndexPage(page, text, writer); } catch (IOException e) { log.error("Unable to update page '" + page.getName() + "' from Lucene index", e); // reindexPage( page ); } catch (Exception e) { log.error("Unexpected Lucene exception - please check configuration!", e); // reindexPage( page ); } finally { close(writer); } log.debug("Done updating Lucene index for page '" + page.getName() + "'."); }
/** {@inheritDoc} */ public void pageRemoved(WikiPage page) { IndexWriter writer = null; try { Directory luceneDir = new SimpleFSDirectory(new File(m_luceneDirectory), null); writer = getIndexWriter(luceneDir); Query query = new TermQuery(new Term(LUCENE_ID, page.getName())); writer.deleteDocuments(query); } catch (Exception e) { log.error("Unable to remove page '" + page.getName() + "' from Lucene index", e); } finally { close(writer); } }
/** * Adds a page-text pair to the lucene update queue. Safe to call always * * @param page WikiPage to add to the update queue. */ public void reindexPage(WikiPage page) { if (page != null) { String text; // TODO: Think if this was better done in the thread itself? if (page instanceof Attachment) { text = getAttachmentContent((Attachment) page); } else { text = m_engine.getPureText(page); } if (text != null) { // Add work item to m_updates queue. Object[] pair = new Object[2]; pair[0] = page; pair[1] = text; m_updates.add(pair); log.debug("Scheduling page " + page.getName() + " for index update"); } } }
/** * Indexes page using the given IndexWriter. * * @param page WikiPage * @param text Page text to index * @param writer The Lucene IndexWriter to use for indexing * @return the created index Document * @throws IOException If there's an indexing problem */ protected Document luceneIndexPage(WikiPage page, String text, IndexWriter writer) throws IOException { if (log.isDebugEnabled()) log.debug("Indexing " + page.getName() + "..."); // make a new, empty document Document doc = new Document(); if (text == null) return doc; // Raw name is the keyword we'll use to refer to this document for updates. Field field = new Field(LUCENE_ID, page.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED); doc.add(field); // Body text. It is stored in the doc for search contexts. field = new Field( LUCENE_PAGE_CONTENTS, text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); doc.add(field); // Allow searching by page name. Both beautified and raw String unTokenizedTitle = StringUtils.replaceChars( page.getName(), MarkupParser.PUNCTUATION_CHARS_ALLOWED, c_punctuationSpaces); field = new Field( LUCENE_PAGE_NAME, TextUtil.beautifyString(page.getName()) + " " + unTokenizedTitle, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); doc.add(field); // Allow searching by authorname if (page.getAuthor() != null) { field = new Field( LUCENE_AUTHOR, page.getAuthor(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); doc.add(field); } // Now add the names of the attachments of this page try { Collection attachments = m_engine.getAttachmentManager().listAttachments(page); String attachmentNames = ""; for (Iterator it = attachments.iterator(); it.hasNext(); ) { Attachment att = (Attachment) it.next(); attachmentNames += att.getName() + ";"; } field = new Field( LUCENE_ATTACHMENTS, attachmentNames, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); doc.add(field); } catch (ProviderException e) { // Unable to read attachments log.error("Failed to get attachments for page", e); } writer.addDocument(doc); return doc; }
/** * Performs a full Lucene reindex, if necessary. * * @throws IOException If there's a problem during indexing */ protected void doFullLuceneReindex() throws IOException { File dir = new File(m_luceneDirectory); String[] filelist = dir.list(); if (filelist == null) { throw new IOException( "Invalid Lucene directory: cannot produce listing: " + dir.getAbsolutePath()); } try { if (filelist.length == 0) { // // No files? Reindex! // Date start = new Date(); IndexWriter writer = null; log.info("Starting Lucene reindexing, this can take a couple minutes..."); Directory luceneDir = new SimpleFSDirectory(dir, null); try { writer = getIndexWriter(luceneDir); Collection allPages = m_engine.getPageManager().getAllPages(); for (Iterator iterator = allPages.iterator(); iterator.hasNext(); ) { WikiPage page = (WikiPage) iterator.next(); try { String text = m_engine .getPageManager() .getPageText(page.getName(), WikiProvider.LATEST_VERSION); luceneIndexPage(page, text, writer); } catch (IOException e) { log.warn("Unable to index page " + page.getName() + ", continuing to next ", e); } } Collection allAttachments = m_engine.getAttachmentManager().getAllAttachments(); for (Iterator iterator = allAttachments.iterator(); iterator.hasNext(); ) { Attachment att = (Attachment) iterator.next(); try { String text = getAttachmentContent(att.getName(), WikiProvider.LATEST_VERSION); luceneIndexPage(att, text, writer); } catch (IOException e) { log.warn("Unable to index attachment " + att.getName() + ", continuing to next", e); } } } finally { close(writer); } Date end = new Date(); log.info( "Full Lucene index finished in " + (end.getTime() - start.getTime()) + " milliseconds."); } else { log.info("Files found in Lucene directory, not reindexing."); } } catch (NoClassDefFoundError e) { log.info("Lucene libraries do not exist - not using Lucene."); } catch (IOException e) { log.error("Problem while creating Lucene index - not using Lucene.", e); } catch (ProviderException e) { log.error("Problem reading pages while creating Lucene index (JSPWiki won't start.)", e); throw new IllegalArgumentException("unable to create Lucene index"); } catch (Exception e) { log.error("Unable to start lucene", e); } }