/** {@inheritDoc} */ @Override public final int doWikiStartTag() throws IOException { WikiEngine engine = m_wikiContext.getEngine(); WikiPage page = m_wikiContext.getPage(); if (page != null) { if (page instanceof Attachment) { pageContext.getOut().print(engine.beautifyTitle(((Attachment) page).getParentName())); } else { String name = page.getName(); int entrystart = name.indexOf("_blogentry_"); if (entrystart != -1) { name = name.substring(0, entrystart); } int commentstart = name.indexOf("_comments_"); if (commentstart != -1) { name = name.substring(0, commentstart); } pageContext.getOut().print(engine.beautifyTitle(name)); } } return SKIP_BODY; }
/** {@inheritDoc} */ @Override public void initialize(WikiEngine engine, Properties properties) throws WikiException { super.initialize(engine, properties); m_storageDir = WikiEngine.getRequiredProperty(properties, PROP_STORAGEDIR); String patternString = engine.getWikiProperties().getProperty(PROP_DISABLECACHE); if (patternString != null) { m_disableCache = Pattern.compile(patternString); } // // Check if the directory exists - if it doesn't, create it. // File f = new File(m_storageDir); if (!f.exists()) { f.mkdirs(); } // // Some sanity checks // if (!f.exists()) throw new WikiException( "Could not find or create attachment storage directory '" + m_storageDir + "'"); if (!f.canWrite()) throw new WikiException( "Cannot write to the attachment storage directory '" + m_storageDir + "'"); if (!f.isDirectory()) throw new WikiException( "Your attachment storage points to a file, not a directory: '" + m_storageDir + "'"); }
// FIXME: Should also use events! public void deleteAttachment(Attachment att) throws ProviderException { if (m_provider == null) return; m_provider.deleteAttachment(att); m_engine.getSearchManager().pageRemoved(att); m_engine.getReferenceManager().clearPageEntries(att.getName()); }
/** * Makes WikiText from a Collection. * * @param links Collection to make into WikiText. * @param separator Separator string to use. * @param numItems How many items to show. * @return The WikiText */ protected String wikitizeCollection(Collection links, String separator, int numItems) { if (links == null || links.isEmpty()) return ""; StringBuffer output = new StringBuffer(); Iterator it = links.iterator(); int count = 0; // // The output will be B Item[1] A S B Item[2] A S B Item[3] A // while (it.hasNext() && ((count < numItems) || (numItems == ALL_ITEMS))) { String value = (String) it.next(); if (count > 0) { output.append(m_after); output.append(m_separator); } output.append(m_before); // Make a Wiki markup link. See TranslatorReader. output.append("[" + m_engine.beautifyTitle(value) + "|" + value + "]"); count++; } // // Output final item - if there have been none, no "after" is printed // if (count > 0) output.append(m_after); return output.toString(); }
public final int doWikiStartTag() throws IOException { WikiEngine engine = m_wikiContext.getEngine(); WikiPage page = m_wikiContext.getPage(); try { if (page != null) { long size = page.getSize(); if (size == -1 && engine.pageExists(page)) // should never happen with attachments { size = engine.getPureText(page.getName(), page.getVersion()).length(); page.setSize(size); } pageContext.getOut().write(Long.toString(size)); } } catch (ProviderException e) { log.warn("Providers did not work: ", e); pageContext.getOut().write("Error determining page size: " + e.getMessage()); } return SKIP_BODY; }
/** * Stores an attachment directly from a stream. If the attachment did not exist previously, this * method will create it. If it did exist, it stores a new version. * * @param att Attachment to store this under. * @param in InputStream from which the attachment contents will be read. * @throws IOException If writing the attachment failed. * @throws ProviderException If something else went wrong. */ public void storeAttachment(Attachment att, InputStream in) throws IOException, ProviderException { if (m_provider == null) { return; } // // Checks if the actual, real page exists without any modifications // or aliases. We cannot store an attachment to a non-existant page. // if (!m_engine.getPageManager().pageExists(att.getParentName())) { // the caller should catch the exception and use the exception text as an i18n key throw new ProviderException("attach.parent.not.exist"); } m_provider.putAttachmentData(att, in); m_engine.getReferenceManager().updateReferences(att.getName(), new java.util.Vector()); WikiPage parent = new WikiPage(m_engine, att.getParentName()); m_engine.updateReferences(parent); m_engine.getSearchManager().reindexPage(att); }
/** * Fetches the attachment content from the repository. Content is flat text that can be used for * indexing/searching or display * * @param attachmentName Name of the attachment. * @param version The version of the attachment. * @return the content of the Attachment as a String. */ protected String getAttachmentContent(String attachmentName, int version) { AttachmentManager mgr = m_engine.getAttachmentManager(); try { Attachment att = mgr.getAttachmentInfo(attachmentName, version); // FIXME: Find out why sometimes att is null if (att != null) { return getAttachmentContent(att); } } catch (ProviderException e) { log.error("Attachment cannot be loaded", e); } // Something was wrong, no result is returned. return null; }
// FIXME: This API should be changed to return a List. @SuppressWarnings("unchecked") public Collection listAttachments(WikiPage wikipage) throws ProviderException { if (m_provider == null) { return new ArrayList(); } Collection atts = m_provider.listAttachments(wikipage); // // This is just a sanity check; all of our providers return a Collection. // if (atts instanceof List) { m_engine.getPageSorter().sortPages((List) atts); } return atts; }
/** {@inheritDoc} */ public void initialize(WikiEngine engine, Properties props) throws NoRequiredPropertyException, IOException { m_engine = engine; m_luceneDirectory = engine.getWorkDir() + File.separator + LUCENE_DIR; int initialDelay = TextUtil.getIntegerProperty(props, PROP_LUCENE_INITIALDELAY, LuceneUpdater.INITIAL_DELAY); int indexDelay = TextUtil.getIntegerProperty(props, PROP_LUCENE_INDEXDELAY, LuceneUpdater.INDEX_DELAY); m_analyzerClass = TextUtil.getStringProperty(props, PROP_LUCENE_ANALYZER, m_analyzerClass); // FIXME: Just to be simple for now, we will do full reindex // only if no files are in lucene directory. File dir = new File(m_luceneDirectory); log.info("Lucene enabled, cache will be in: " + dir.getAbsolutePath()); try { if (!dir.exists()) { dir.mkdirs(); } if (!dir.exists() || !dir.canWrite() || !dir.canRead()) { log.error("Cannot write to Lucene directory, disabling Lucene: " + dir.getAbsolutePath()); throw new IOException("Invalid Lucene directory."); } String[] filelist = dir.list(); if (filelist == null) { throw new IOException( "Invalid Lucene directory: cannot produce listing: " + dir.getAbsolutePath()); } } catch (IOException e) { log.error("Problem while creating Lucene index - not using Lucene.", e); } // Start the Lucene update thread, which waits first // for a little while before starting to go through // the Lucene "pages that need updating". LuceneUpdater updater = new LuceneUpdater(m_engine, this, initialDelay, indexDelay); updater.start(); }
/** * Adds a page-text pair to the lucene update queue. Safe to call always * * @param page WikiPage to add to the update queue. */ public void reindexPage(WikiPage page) { if (page != null) { String text; // TODO: Think if this was better done in the thread itself? if (page instanceof Attachment) { text = getAttachmentContent((Attachment) page); } else { text = m_engine.getPureText(page); } if (text != null) { // Add work item to m_updates queue. Object[] pair = new Object[2]; pair[0] = page; pair[1] = text; m_updates.add(pair); log.debug("Scheduling page " + page.getName() + " for index update"); } } }
/** * @param att Attachment to get content for. Filename extension is used to determine the type of * the attachment. * @return String representing the content of the file. FIXME This is a very simple implementation * of some text-based attachment, mainly used for testing. This should be replaced /moved to * Attachment search providers or some other 'pluggable' wat to search attachments */ protected String getAttachmentContent(Attachment att) { AttachmentManager mgr = m_engine.getAttachmentManager(); // FIXME: Add attachment plugin structure String filename = att.getFileName(); boolean searchSuffix = false; for (String suffix : SEARCHABLE_FILE_SUFFIXES) { if (filename.endsWith(suffix)) { searchSuffix = true; } } if (searchSuffix) { InputStream attStream; try { attStream = mgr.getAttachmentStream(att); StringWriter sout = new StringWriter(); FileUtil.copyContents(new InputStreamReader(attStream), sout); attStream.close(); sout.close(); return sout.toString(); } catch (ProviderException e) { log.error("Attachment cannot be loaded", e); return null; } catch (IOException e) { log.error("Attachment cannot be loaded", e); return null; } } return null; }
/** * Filters a collection according to the include and exclude parameters. * * @param c The collection to filter. * @return A filtered collection. */ protected Collection filterCollection(Collection c) { ArrayList<Object> result = new ArrayList<Object>(); PatternMatcher pm = new Perl5Matcher(); for (Iterator i = c.iterator(); i.hasNext(); ) { String pageName = null; Object objectje = i.next(); if (objectje instanceof WikiPage) { pageName = ((WikiPage) objectje).getName(); } else { pageName = (String) objectje; } // // If include parameter exists, then by default we include only // those // pages in it (excluding the ones in the exclude pattern list). // // include='*' means the same as no include. // boolean includeThis = m_include == null; if (m_include != null) { for (int j = 0; j < m_include.length; j++) { if (pm.matches(pageName, m_include[j])) { includeThis = true; break; } } } if (m_exclude != null) { for (int j = 0; j < m_exclude.length; j++) { if (pm.matches(pageName, m_exclude[j])) { includeThis = false; break; // The inner loop, continue on the next item } } } if (includeThis) { if (objectje instanceof WikiPage) { result.add(objectje); } else { result.add(pageName); } // // if we want to show the last modified date of the most // recently change page, we keep a "high watermark" here: WikiPage page = null; if (m_lastModified) { page = m_engine.getPage(pageName); if (page != null) { Date lastModPage = page.getLastModified(); if (log.isDebugEnabled()) { log.debug("lastModified Date of page " + pageName + " : " + m_dateLastModified); } if (lastModPage.after(m_dateLastModified)) { m_dateLastModified = lastModPage; } } } } } return result; }
/** * Figures out the full attachment name from the context and attachment name. * * @param context The current WikiContext * @param attachmentname The file name of the attachment. * @param version A particular version. * @return Attachment, or null, if no such attachment or version exists. * @throws ProviderException If something goes wrong. */ public Attachment getAttachmentInfo(WikiContext context, String attachmentname, int version) throws ProviderException { if (m_provider == null) { return null; } WikiPage currentPage = null; if (context != null) { currentPage = context.getPage(); } // // Figure out the parent page of this attachment. If we can't find it, // we'll assume this refers directly to the attachment. // int cutpt = attachmentname.lastIndexOf('/'); if (cutpt != -1) { String parentPage = attachmentname.substring(0, cutpt); parentPage = MarkupParser.cleanLink(parentPage); attachmentname = attachmentname.substring(cutpt + 1); // If we for some reason have an empty parent page name; // this can't be an attachment if (parentPage.length() == 0) return null; currentPage = m_engine.getPage(parentPage); // // Go check for legacy name // // FIXME: This should be resolved using CommandResolver, // not this adhoc way. This also assumes that the // legacy charset is a subset of the full allowed set. if (currentPage == null) { currentPage = m_engine.getPage(MarkupParser.wikifyLink(parentPage)); } } // // If the page cannot be determined, we cannot possibly find the // attachments. // if (currentPage == null || currentPage.getName().length() == 0) { return null; } // System.out.println("Seeking info on "+currentPage+"::"+attachmentname); // // Finally, figure out whether this is a real attachment or a generated // attachment. // Attachment att; att = getDynamicAttachment(currentPage.getName() + "/" + attachmentname); if (att == null) { att = m_provider.getAttachmentInfo(currentPage, attachmentname, version); } return att; }
/** * Searches pages using a particular combination of flags. * * @param query The query to perform in Lucene query language * @param flags A set of flags * @return A Collection of SearchResult instances * @throws ProviderException if there is a problem with the backend */ public Collection findPages(String query, int flags) throws ProviderException { IndexSearcher searcher = null; ArrayList<SearchResult> list = null; Highlighter highlighter = null; try { String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS }; QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_36, queryfields, getLuceneAnalyzer()); // QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() ); Query luceneQuery = qp.parse(query); if ((flags & FLAG_CONTEXTS) != 0) { highlighter = new Highlighter( new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"), new SimpleHTMLEncoder(), new QueryScorer(luceneQuery)); } try { File dir = new File(m_luceneDirectory); Directory luceneDir = new SimpleFSDirectory(dir, null); IndexReader reader = IndexReader.open(luceneDir); searcher = new IndexSearcher(reader); } catch (Exception ex) { log.info("Lucene not yet ready; indexing not started", ex); return null; } ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs; list = new ArrayList<SearchResult>(hits.length); for (int curr = 0; curr < hits.length; curr++) { int docID = hits[curr].doc; Document doc = searcher.doc(docID); String pageName = doc.get(LUCENE_ID); WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION); if (page != null) { if (page instanceof Attachment) { // Currently attachments don't look nice on the search-results page // When the search-results are cleaned up this can be enabled again. } int score = (int) (hits[curr].score * 100); // Get highlighted search contexts String text = doc.get(LUCENE_PAGE_CONTENTS); String[] fragments = new String[0]; if (text != null && highlighter != null) { TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text)); fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS); } SearchResult result = new SearchResultImpl(page, score, fragments); list.add(result); } else { log.error( "Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache"); pageRemoved(new WikiPage(m_engine, pageName)); } } } catch (IOException e) { log.error("Failed during lucene search", e); } catch (ParseException e) { log.info("Broken query; cannot parse query ", e); throw new ProviderException( "You have entered a query Lucene cannot process: " + e.getMessage()); } catch (InvalidTokenOffsetsException e) { log.error("Tokens are incompatible with provided text ", e); } finally { if (searcher != null) { try { searcher.close(); } catch (IOException e) { log.error(e); } } } return list; }
/** * Indexes page using the given IndexWriter. * * @param page WikiPage * @param text Page text to index * @param writer The Lucene IndexWriter to use for indexing * @return the created index Document * @throws IOException If there's an indexing problem */ protected Document luceneIndexPage(WikiPage page, String text, IndexWriter writer) throws IOException { if (log.isDebugEnabled()) log.debug("Indexing " + page.getName() + "..."); // make a new, empty document Document doc = new Document(); if (text == null) return doc; // Raw name is the keyword we'll use to refer to this document for updates. Field field = new Field(LUCENE_ID, page.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED); doc.add(field); // Body text. It is stored in the doc for search contexts. field = new Field( LUCENE_PAGE_CONTENTS, text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); doc.add(field); // Allow searching by page name. Both beautified and raw String unTokenizedTitle = StringUtils.replaceChars( page.getName(), MarkupParser.PUNCTUATION_CHARS_ALLOWED, c_punctuationSpaces); field = new Field( LUCENE_PAGE_NAME, TextUtil.beautifyString(page.getName()) + " " + unTokenizedTitle, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); doc.add(field); // Allow searching by authorname if (page.getAuthor() != null) { field = new Field( LUCENE_AUTHOR, page.getAuthor(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); doc.add(field); } // Now add the names of the attachments of this page try { Collection attachments = m_engine.getAttachmentManager().listAttachments(page); String attachmentNames = ""; for (Iterator it = attachments.iterator(); it.hasNext(); ) { Attachment att = (Attachment) it.next(); attachmentNames += att.getName() + ";"; } field = new Field( LUCENE_ATTACHMENTS, attachmentNames, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); doc.add(field); } catch (ProviderException e) { // Unable to read attachments log.error("Failed to get attachments for page", e); } writer.addDocument(doc); return doc; }
/** * Performs a full Lucene reindex, if necessary. * * @throws IOException If there's a problem during indexing */ protected void doFullLuceneReindex() throws IOException { File dir = new File(m_luceneDirectory); String[] filelist = dir.list(); if (filelist == null) { throw new IOException( "Invalid Lucene directory: cannot produce listing: " + dir.getAbsolutePath()); } try { if (filelist.length == 0) { // // No files? Reindex! // Date start = new Date(); IndexWriter writer = null; log.info("Starting Lucene reindexing, this can take a couple minutes..."); Directory luceneDir = new SimpleFSDirectory(dir, null); try { writer = getIndexWriter(luceneDir); Collection allPages = m_engine.getPageManager().getAllPages(); for (Iterator iterator = allPages.iterator(); iterator.hasNext(); ) { WikiPage page = (WikiPage) iterator.next(); try { String text = m_engine .getPageManager() .getPageText(page.getName(), WikiProvider.LATEST_VERSION); luceneIndexPage(page, text, writer); } catch (IOException e) { log.warn("Unable to index page " + page.getName() + ", continuing to next ", e); } } Collection allAttachments = m_engine.getAttachmentManager().getAllAttachments(); for (Iterator iterator = allAttachments.iterator(); iterator.hasNext(); ) { Attachment att = (Attachment) iterator.next(); try { String text = getAttachmentContent(att.getName(), WikiProvider.LATEST_VERSION); luceneIndexPage(att, text, writer); } catch (IOException e) { log.warn("Unable to index attachment " + att.getName() + ", continuing to next", e); } } } finally { close(writer); } Date end = new Date(); log.info( "Full Lucene index finished in " + (end.getTime() - start.getTime()) + " milliseconds."); } else { log.info("Files found in Lucene directory, not reindexing."); } } catch (NoClassDefFoundError e) { log.info("Lucene libraries do not exist - not using Lucene."); } catch (IOException e) { log.error("Problem while creating Lucene index - not using Lucene.", e); } catch (ProviderException e) { log.error("Problem reading pages while creating Lucene index (JSPWiki won't start.)", e); throw new IllegalArgumentException("unable to create Lucene index"); } catch (Exception e) { log.error("Unable to start lucene", e); } }