private void writePages( Writer writer, String virtualWiki, List<String> topicNames, boolean excludeHistory) throws DataAccessException, IOException, MigrationException { // note that effort is being made to re-use temporary objects as this // code can generate an OOM "GC overhead limit exceeded" with HUGE (500MB) topics // since the garbage collector ends up being invoked excessively. TopicVersion topicVersion; Topic topic; WikiUser user; // choose 100,000 as an arbitrary max Pagination pagination = new Pagination(100000, 0); List<Integer> topicVersionIds; Map<String, String> textAttributes = new HashMap<String, String>(); textAttributes.put("xml:space", "preserve"); for (String topicName : topicNames) { topicVersionIds = new ArrayList<Integer>(); topic = WikiBase.getDataHandler().lookupTopic(virtualWiki, topicName, false); if (topic == null) { throw new MigrationException( "Failure while exporting: topic " + topicName + " does not exist"); } writer.append("\n<page>"); writer.append('\n'); XMLUtil.buildTag(writer, "title", topic.getName(), true); writer.append('\n'); XMLUtil.buildTag(writer, "id", topic.getTopicId()); if (excludeHistory) { // only include the most recent version topicVersionIds.add(topic.getCurrentVersionId()); } else { // FIXME - changes sorted newest-to-oldest, should be reverse List<RecentChange> changes = WikiBase.getDataHandler().getTopicHistory(topic, pagination, true); for (int i = (changes.size() - 1); i >= 0; i--) { topicVersionIds.add(changes.get(i).getTopicVersionId()); } } for (int topicVersionId : topicVersionIds) { topicVersion = WikiBase.getDataHandler().lookupTopicVersion(topicVersionId); writer.append("\n<revision>"); writer.append('\n'); XMLUtil.buildTag(writer, "id", topicVersion.getTopicVersionId()); writer.append('\n'); XMLUtil.buildTag( writer, "timestamp", this.parseJAMWikiTimestamp(topicVersion.getEditDate()), true); writer.append("\n<contributor>"); user = (topicVersion.getAuthorId() != null) ? WikiBase.getDataHandler().lookupWikiUser(topicVersion.getAuthorId()) : null; if (user != null) { writer.append('\n'); XMLUtil.buildTag(writer, "username", user.getUsername(), true); writer.append('\n'); XMLUtil.buildTag(writer, "id", user.getUserId()); } else if (Utilities.isIpAddress(topicVersion.getAuthorDisplay())) { writer.append('\n'); XMLUtil.buildTag(writer, "ip", topicVersion.getAuthorDisplay(), true); } else { writer.append('\n'); XMLUtil.buildTag(writer, "username", topicVersion.getAuthorDisplay(), true); } writer.append("\n</contributor>"); writer.append('\n'); XMLUtil.buildTag(writer, "comment", topicVersion.getEditComment(), true); writer.append('\n'); XMLUtil.buildTag(writer, "text", topicVersion.getVersionContent(), textAttributes, true); writer.append("\n</revision>"); // explicitly null out temp variables to improve garbage collection and // avoid OOM "GC overhead limit exceeded" errors on HUGE (500MB) topics topicVersion = null; user = null; } writer.append("\n</page>"); } }