Пример #1
0
  // FIXME: Should also use events!
  public void deleteAttachment(Attachment att) throws ProviderException {
    if (m_provider == null) return;

    m_provider.deleteAttachment(att);

    m_engine.getSearchManager().pageRemoved(att);

    m_engine.getReferenceManager().clearPageEntries(att.getName());
  }
  /**
   * Makes WikiText from a Collection.
   *
   * @param links Collection to make into WikiText.
   * @param separator Separator string to use.
   * @param numItems How many items to show.
   * @return The WikiText
   */
  protected String wikitizeCollection(Collection links, String separator, int numItems) {
    if (links == null || links.isEmpty()) return "";

    StringBuffer output = new StringBuffer();

    Iterator it = links.iterator();
    int count = 0;

    //
    //  The output will be B Item[1] A S B Item[2] A S B Item[3] A
    //
    while (it.hasNext() && ((count < numItems) || (numItems == ALL_ITEMS))) {
      String value = (String) it.next();

      if (count > 0) {
        output.append(m_after);
        output.append(m_separator);
      }

      output.append(m_before);

      // Make a Wiki markup link. See TranslatorReader.
      output.append("[" + m_engine.beautifyTitle(value) + "|" + value + "]");
      count++;
    }

    //
    //  Output final item - if there have been none, no "after" is printed
    //
    if (count > 0) output.append(m_after);

    return output.toString();
  }
Пример #3
0
  /**
   * Stores an attachment directly from a stream. If the attachment did not exist previously, this
   * method will create it. If it did exist, it stores a new version.
   *
   * @param att Attachment to store this under.
   * @param in InputStream from which the attachment contents will be read.
   * @throws IOException If writing the attachment failed.
   * @throws ProviderException If something else went wrong.
   */
  public void storeAttachment(Attachment att, InputStream in)
      throws IOException, ProviderException {
    if (m_provider == null) {
      return;
    }

    //
    //  Checks if the actual, real page exists without any modifications
    //  or aliases.  We cannot store an attachment to a non-existant page.
    //
    if (!m_engine.getPageManager().pageExists(att.getParentName())) {
      // the caller should catch the exception and use the exception text as an i18n key
      throw new ProviderException("attach.parent.not.exist");
    }

    m_provider.putAttachmentData(att, in);

    m_engine.getReferenceManager().updateReferences(att.getName(), new java.util.Vector());

    WikiPage parent = new WikiPage(m_engine, att.getParentName());
    m_engine.updateReferences(parent);

    m_engine.getSearchManager().reindexPage(att);
  }
Пример #4
0
  /**
   * Fetches the attachment content from the repository. Content is flat text that can be used for
   * indexing/searching or display
   *
   * @param attachmentName Name of the attachment.
   * @param version The version of the attachment.
   * @return the content of the Attachment as a String.
   */
  protected String getAttachmentContent(String attachmentName, int version) {
    AttachmentManager mgr = m_engine.getAttachmentManager();

    try {
      Attachment att = mgr.getAttachmentInfo(attachmentName, version);
      // FIXME: Find out why sometimes att is null
      if (att != null) {
        return getAttachmentContent(att);
      }
    } catch (ProviderException e) {
      log.error("Attachment cannot be loaded", e);
    }
    // Something was wrong, no result is returned.
    return null;
  }
Пример #5
0
  // FIXME: This API should be changed to return a List.
  @SuppressWarnings("unchecked")
  public Collection listAttachments(WikiPage wikipage) throws ProviderException {
    if (m_provider == null) {
      return new ArrayList();
    }

    Collection atts = m_provider.listAttachments(wikipage);

    //
    //  This is just a sanity check; all of our providers return a Collection.
    //
    if (atts instanceof List) {
      m_engine.getPageSorter().sortPages((List) atts);
    }

    return atts;
  }
Пример #6
0
  /** {@inheritDoc} */
  public void initialize(WikiEngine engine, Properties props)
      throws NoRequiredPropertyException, IOException {
    m_engine = engine;

    m_luceneDirectory = engine.getWorkDir() + File.separator + LUCENE_DIR;

    int initialDelay =
        TextUtil.getIntegerProperty(props, PROP_LUCENE_INITIALDELAY, LuceneUpdater.INITIAL_DELAY);
    int indexDelay =
        TextUtil.getIntegerProperty(props, PROP_LUCENE_INDEXDELAY, LuceneUpdater.INDEX_DELAY);

    m_analyzerClass = TextUtil.getStringProperty(props, PROP_LUCENE_ANALYZER, m_analyzerClass);
    // FIXME: Just to be simple for now, we will do full reindex
    // only if no files are in lucene directory.

    File dir = new File(m_luceneDirectory);

    log.info("Lucene enabled, cache will be in: " + dir.getAbsolutePath());

    try {
      if (!dir.exists()) {
        dir.mkdirs();
      }

      if (!dir.exists() || !dir.canWrite() || !dir.canRead()) {
        log.error("Cannot write to Lucene directory, disabling Lucene: " + dir.getAbsolutePath());
        throw new IOException("Invalid Lucene directory.");
      }

      String[] filelist = dir.list();

      if (filelist == null) {
        throw new IOException(
            "Invalid Lucene directory: cannot produce listing: " + dir.getAbsolutePath());
      }
    } catch (IOException e) {
      log.error("Problem while creating Lucene index - not using Lucene.", e);
    }

    // Start the Lucene update thread, which waits first
    // for a little while before starting to go through
    // the Lucene "pages that need updating".
    LuceneUpdater updater = new LuceneUpdater(m_engine, this, initialDelay, indexDelay);
    updater.start();
  }
  /**
   * Makes HTML with common parameters.
   *
   * @param context The WikiContext
   * @param wikitext The wikitext to render
   * @return HTML
   * @since 1.6.4
   */
  protected String makeHTML(WikiContext context, String wikitext) {
    String result = "";

    RenderingManager mgr = m_engine.getRenderingManager();

    try {
      MarkupParser parser = mgr.getParser(context, wikitext);

      parser.addLinkTransmutator(new CutMutator(m_maxwidth));
      parser.enableImageInlining(false);

      WikiDocument doc = parser.parse();

      result = mgr.getHTML(context, doc);
    } catch (IOException e) {
      log.error("Failed to convert page data to HTML", e);
    }

    return result;
  }
Пример #8
0
  /**
   * Adds a page-text pair to the lucene update queue. Safe to call always
   *
   * @param page WikiPage to add to the update queue.
   */
  public void reindexPage(WikiPage page) {
    if (page != null) {
      String text;

      // TODO: Think if this was better done in the thread itself?

      if (page instanceof Attachment) {
        text = getAttachmentContent((Attachment) page);
      } else {
        text = m_engine.getPureText(page);
      }

      if (text != null) {
        // Add work item to m_updates queue.
        Object[] pair = new Object[2];
        pair[0] = page;
        pair[1] = text;
        m_updates.add(pair);
        log.debug("Scheduling page " + page.getName() + " for index update");
      }
    }
  }
Пример #9
0
  /**
   * @param att Attachment to get content for. Filename extension is used to determine the type of
   *     the attachment.
   * @return String representing the content of the file. FIXME This is a very simple implementation
   *     of some text-based attachment, mainly used for testing. This should be replaced /moved to
   *     Attachment search providers or some other 'pluggable' wat to search attachments
   */
  protected String getAttachmentContent(Attachment att) {
    AttachmentManager mgr = m_engine.getAttachmentManager();
    // FIXME: Add attachment plugin structure

    String filename = att.getFileName();

    boolean searchSuffix = false;
    for (String suffix : SEARCHABLE_FILE_SUFFIXES) {
      if (filename.endsWith(suffix)) {
        searchSuffix = true;
      }
    }

    if (searchSuffix) {
      InputStream attStream;

      try {
        attStream = mgr.getAttachmentStream(att);

        StringWriter sout = new StringWriter();
        FileUtil.copyContents(new InputStreamReader(attStream), sout);

        attStream.close();
        sout.close();

        return sout.toString();
      } catch (ProviderException e) {
        log.error("Attachment cannot be loaded", e);
        return null;
      } catch (IOException e) {
        log.error("Attachment cannot be loaded", e);
        return null;
      }
    }

    return null;
  }
Пример #10
0
  public String execute(WikiContext context, Map params) throws PluginException {
    WikiEngine engine = context.getEngine();
    WikiPage page = context.getPage();

    if (context.getVariable(VAR_ALREADY_PROCESSING) != null) return "Table of Contents";

    StringBuffer sb = new StringBuffer();

    sb.append("<div class=\"toc\">\n");
    sb.append("<div class=\"collapsebox\">\n");

    String title = (String) params.get(PARAM_TITLE);
    if (title != null) {
      sb.append("<h4>" + TextUtil.replaceEntities(title) + "</h4>\n");
    } else {
      sb.append("<h4>Table of Contents</h4>\n");
    }

    // should we use an ordered list?
    m_usingNumberedList = false;
    if (params.containsKey(PARAM_NUMBERED)) {
      String numbered = (String) params.get(PARAM_NUMBERED);
      if (numbered.equalsIgnoreCase("true")) {
        m_usingNumberedList = true;
      } else if (numbered.equalsIgnoreCase("yes")) {
        m_usingNumberedList = true;
      }
    }

    // if we are using a numbered list, get the rest of the parameters (if any) ...
    if (m_usingNumberedList) {
      int start = 0;
      String startStr = (String) params.get(PARAM_START);
      if ((startStr != null) && (startStr.matches("^\\d+$"))) {
        start = Integer.parseInt(startStr);
      }
      if (start < 0) start = 0;

      m_starting = start;
      m_level1Index = start - 1;
      if (m_level1Index < 0) m_level1Index = 0;
      m_level2Index = 0;
      m_level3Index = 0;
      m_prefix = (String) params.get(PARAM_PREFIX);
      if (m_prefix == null) m_prefix = "";
      m_lastLevel = Heading.HEADING_LARGE;
    }

    try {
      String wikiText = engine.getPureText(page);

      context.setVariable(VAR_ALREADY_PROCESSING, "x");
      JSPWikiMarkupParser parser = new JSPWikiMarkupParser(context, new StringReader(wikiText));
      parser.addHeadingListener(this);

      parser.parse();

      sb.append("<ul>\n" + m_buf.toString() + "</ul>\n");
    } catch (IOException e) {
      log.error("Could not construct table of contents", e);
      throw new PluginException("Unable to construct table of contents (see logs)");
    }

    sb.append("</div>\n</div>\n");

    return sb.toString();
  }
Пример #11
0
  /**
   * Figures out the full attachment name from the context and attachment name.
   *
   * @param context The current WikiContext
   * @param attachmentname The file name of the attachment.
   * @param version A particular version.
   * @return Attachment, or null, if no such attachment or version exists.
   * @throws ProviderException If something goes wrong.
   */
  public Attachment getAttachmentInfo(WikiContext context, String attachmentname, int version)
      throws ProviderException {
    if (m_provider == null) {
      return null;
    }

    WikiPage currentPage = null;

    if (context != null) {
      currentPage = context.getPage();
    }

    //
    //  Figure out the parent page of this attachment.  If we can't find it,
    //  we'll assume this refers directly to the attachment.
    //
    int cutpt = attachmentname.lastIndexOf('/');

    if (cutpt != -1) {
      String parentPage = attachmentname.substring(0, cutpt);
      parentPage = MarkupParser.cleanLink(parentPage);
      attachmentname = attachmentname.substring(cutpt + 1);

      // If we for some reason have an empty parent page name;
      // this can't be an attachment
      if (parentPage.length() == 0) return null;

      currentPage = m_engine.getPage(parentPage);

      //
      // Go check for legacy name
      //
      // FIXME: This should be resolved using CommandResolver,
      //        not this adhoc way.  This also assumes that the
      //        legacy charset is a subset of the full allowed set.
      if (currentPage == null) {
        currentPage = m_engine.getPage(MarkupParser.wikifyLink(parentPage));
      }
    }

    //
    //  If the page cannot be determined, we cannot possibly find the
    //  attachments.
    //
    if (currentPage == null || currentPage.getName().length() == 0) {
      return null;
    }

    // System.out.println("Seeking info on "+currentPage+"::"+attachmentname);

    //
    //  Finally, figure out whether this is a real attachment or a generated
    //  attachment.
    //
    Attachment att;

    att = getDynamicAttachment(currentPage.getName() + "/" + attachmentname);

    if (att == null) {
      att = m_provider.getAttachmentInfo(currentPage, attachmentname, version);
    }

    return att;
  }
  /**
   * Filters a collection according to the include and exclude parameters.
   *
   * @param c The collection to filter.
   * @return A filtered collection.
   */
  protected Collection filterCollection(Collection c) {
    ArrayList<Object> result = new ArrayList<Object>();

    PatternMatcher pm = new Perl5Matcher();

    for (Iterator i = c.iterator(); i.hasNext(); ) {
      String pageName = null;
      Object objectje = i.next();
      if (objectje instanceof WikiPage) {
        pageName = ((WikiPage) objectje).getName();
      } else {
        pageName = (String) objectje;
      }

      //
      //  If include parameter exists, then by default we include only those
      //  pages in it (excluding the ones in the exclude pattern list).
      //
      //  include='*' means the same as no include.
      //
      boolean includeThis = m_include == null;

      if (m_include != null) {
        for (int j = 0; j < m_include.length; j++) {
          if (pm.matches(pageName, m_include[j])) {
            includeThis = true;
            break;
          }
        }
      }

      if (m_exclude != null) {
        for (int j = 0; j < m_exclude.length; j++) {
          if (pm.matches(pageName, m_exclude[j])) {
            includeThis = false;
            break; // The inner loop, continue on the next item
          }
        }
      }

      if (includeThis) {
        if (objectje instanceof WikiPage) {
          result.add(objectje);
        } else {
          result.add(pageName);
        }
        //
        //  if we want to show the last modified date of the most recently change page, we keep a
        // "high watermark" here:
        WikiPage page = null;
        if (m_lastModified) {
          page = m_engine.getPage(pageName);
          if (page != null) {
            Date lastModPage = page.getLastModified();
            if (log.isDebugEnabled()) {
              log.debug("lastModified Date of page " + pageName + " : " + m_dateLastModified);
            }
            if (lastModPage.after(m_dateLastModified)) {
              m_dateLastModified = lastModPage;
            }
          }
        }
      }
    }

    return result;
  }
Пример #13
0
  /**
   * Searches pages using a particular combination of flags.
   *
   * @param query The query to perform in Lucene query language
   * @param flags A set of flags
   * @return A Collection of SearchResult instances
   * @throws ProviderException if there is a problem with the backend
   */
  public Collection findPages(String query, int flags) throws ProviderException {
    IndexSearcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;

    try {
      String[] queryfields = {
        LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS
      };
      QueryParser qp =
          new MultiFieldQueryParser(Version.LUCENE_36, queryfields, getLuceneAnalyzer());

      // QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
      Query luceneQuery = qp.parse(query);

      if ((flags & FLAG_CONTEXTS) != 0) {
        highlighter =
            new Highlighter(
                new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                new SimpleHTMLEncoder(),
                new QueryScorer(luceneQuery));
      }

      try {
        File dir = new File(m_luceneDirectory);
        Directory luceneDir = new SimpleFSDirectory(dir, null);
        IndexReader reader = IndexReader.open(luceneDir);
        searcher = new IndexSearcher(reader);
      } catch (Exception ex) {
        log.info("Lucene not yet ready; indexing not started", ex);
        return null;
      }

      ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;

      list = new ArrayList<SearchResult>(hits.length);
      for (int curr = 0; curr < hits.length; curr++) {
        int docID = hits[curr].doc;
        Document doc = searcher.doc(docID);
        String pageName = doc.get(LUCENE_ID);
        WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);

        if (page != null) {
          if (page instanceof Attachment) {
            // Currently attachments don't look nice on the search-results page
            // When the search-results are cleaned up this can be enabled again.
          }

          int score = (int) (hits[curr].score * 100);

          // Get highlighted search contexts
          String text = doc.get(LUCENE_PAGE_CONTENTS);

          String[] fragments = new String[0];
          if (text != null && highlighter != null) {
            TokenStream tokenStream =
                getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text));
            fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);
          }

          SearchResult result = new SearchResultImpl(page, score, fragments);
          list.add(result);
        } else {
          log.error(
              "Lucene found a result page '"
                  + pageName
                  + "' that could not be loaded, removing from Lucene cache");
          pageRemoved(new WikiPage(m_engine, pageName));
        }
      }
    } catch (IOException e) {
      log.error("Failed during lucene search", e);
    } catch (ParseException e) {
      log.info("Broken query; cannot parse query ", e);

      throw new ProviderException(
          "You have entered a query Lucene cannot process: " + e.getMessage());
    } catch (InvalidTokenOffsetsException e) {
      log.error("Tokens are incompatible with provided text ", e);
    } finally {
      if (searcher != null) {
        try {
          searcher.close();
        } catch (IOException e) {
          log.error(e);
        }
      }
    }

    return list;
  }
Пример #14
0
  /**
   * Indexes page using the given IndexWriter.
   *
   * @param page WikiPage
   * @param text Page text to index
   * @param writer The Lucene IndexWriter to use for indexing
   * @return the created index Document
   * @throws IOException If there's an indexing problem
   */
  protected Document luceneIndexPage(WikiPage page, String text, IndexWriter writer)
      throws IOException {
    if (log.isDebugEnabled()) log.debug("Indexing " + page.getName() + "...");

    // make a new, empty document
    Document doc = new Document();

    if (text == null) return doc;

    // Raw name is the keyword we'll use to refer to this document for updates.
    Field field = new Field(LUCENE_ID, page.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED);
    doc.add(field);

    // Body text.  It is stored in the doc for search contexts.
    field =
        new Field(
            LUCENE_PAGE_CONTENTS, text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
    doc.add(field);

    // Allow searching by page name. Both beautified and raw
    String unTokenizedTitle =
        StringUtils.replaceChars(
            page.getName(), MarkupParser.PUNCTUATION_CHARS_ALLOWED, c_punctuationSpaces);

    field =
        new Field(
            LUCENE_PAGE_NAME,
            TextUtil.beautifyString(page.getName()) + " " + unTokenizedTitle,
            Field.Store.YES,
            Field.Index.ANALYZED,
            Field.TermVector.NO);
    doc.add(field);

    // Allow searching by authorname

    if (page.getAuthor() != null) {
      field =
          new Field(
              LUCENE_AUTHOR,
              page.getAuthor(),
              Field.Store.YES,
              Field.Index.ANALYZED,
              Field.TermVector.NO);
      doc.add(field);
    }

    // Now add the names of the attachments of this page
    try {
      Collection attachments = m_engine.getAttachmentManager().listAttachments(page);
      String attachmentNames = "";

      for (Iterator it = attachments.iterator(); it.hasNext(); ) {
        Attachment att = (Attachment) it.next();
        attachmentNames += att.getName() + ";";
      }
      field =
          new Field(
              LUCENE_ATTACHMENTS,
              attachmentNames,
              Field.Store.YES,
              Field.Index.ANALYZED,
              Field.TermVector.NO);
      doc.add(field);

    } catch (ProviderException e) {
      // Unable to read attachments
      log.error("Failed to get attachments for page", e);
    }
    writer.addDocument(doc);

    return doc;
  }
Пример #15
0
  /**
   * Performs a full Lucene reindex, if necessary.
   *
   * @throws IOException If there's a problem during indexing
   */
  protected void doFullLuceneReindex() throws IOException {
    File dir = new File(m_luceneDirectory);

    String[] filelist = dir.list();

    if (filelist == null) {
      throw new IOException(
          "Invalid Lucene directory: cannot produce listing: " + dir.getAbsolutePath());
    }

    try {
      if (filelist.length == 0) {
        //
        //  No files? Reindex!
        //
        Date start = new Date();
        IndexWriter writer = null;

        log.info("Starting Lucene reindexing, this can take a couple minutes...");

        Directory luceneDir = new SimpleFSDirectory(dir, null);

        try {
          writer = getIndexWriter(luceneDir);
          Collection allPages = m_engine.getPageManager().getAllPages();

          for (Iterator iterator = allPages.iterator(); iterator.hasNext(); ) {
            WikiPage page = (WikiPage) iterator.next();

            try {
              String text =
                  m_engine
                      .getPageManager()
                      .getPageText(page.getName(), WikiProvider.LATEST_VERSION);
              luceneIndexPage(page, text, writer);
            } catch (IOException e) {
              log.warn("Unable to index page " + page.getName() + ", continuing to next ", e);
            }
          }

          Collection allAttachments = m_engine.getAttachmentManager().getAllAttachments();
          for (Iterator iterator = allAttachments.iterator(); iterator.hasNext(); ) {
            Attachment att = (Attachment) iterator.next();

            try {
              String text = getAttachmentContent(att.getName(), WikiProvider.LATEST_VERSION);
              luceneIndexPage(att, text, writer);
            } catch (IOException e) {
              log.warn("Unable to index attachment " + att.getName() + ", continuing to next", e);
            }
          }

        } finally {
          close(writer);
        }

        Date end = new Date();
        log.info(
            "Full Lucene index finished in "
                + (end.getTime() - start.getTime())
                + " milliseconds.");
      } else {
        log.info("Files found in Lucene directory, not reindexing.");
      }
    } catch (NoClassDefFoundError e) {
      log.info("Lucene libraries do not exist - not using Lucene.");
    } catch (IOException e) {
      log.error("Problem while creating Lucene index - not using Lucene.", e);
    } catch (ProviderException e) {
      log.error("Problem reading pages while creating Lucene index (JSPWiki won't start.)", e);
      throw new IllegalArgumentException("unable to create Lucene index");
    } catch (Exception e) {
      log.error("Unable to start lucene", e);
    }
  }