/** * Tweaks DTD entity on-the-fly for escaping entity that we need to appear as-they-are inside * rendered HTML. * * @author Laurent Caillette */ public class DtdTools { private static final Logger LOGGER = LoggerFactory.getLogger(DtdTools.class); private static final Pattern PATTERN = Pattern.compile("\\<\\!ENTITY\\s+(\\w+)\\s+\\\"(&#(?:\\d|\\w|\\;)+)\\\"\\s*?\\>"); static { LOGGER.debug("Crafted regex ", PATTERN.pattern()); } private static final String REPLACEMENT = "<!ENTITY $1 \"&$1;\" > "; private DtdTools() {} public static InputSource escapeEntities(final InputSource unescapedInputSource) throws IOException { final String unescapedDtd; if (null == unescapedInputSource.getCharacterStream()) { if (null == unescapedInputSource.getByteStream()) { throw new IllegalArgumentException("unescapedInputSource provides no valid stream"); } else { unescapedDtd = IOUtils.toString(unescapedInputSource.getByteStream()); } } else { unescapedDtd = IOUtils.toString(unescapedInputSource.getCharacterStream()); } final Matcher matcher = PATTERN.matcher(unescapedDtd); final String escapedDtd = matcher.replaceAll(REPLACEMENT); final InputSource escapedInputSource = new InputSource(unescapedInputSource.getSystemId()); escapedInputSource.setEncoding(unescapedInputSource.getEncoding()); escapedInputSource.setPublicId(unescapedInputSource.getPublicId()); escapedInputSource.setCharacterStream(new StringReader(escapedDtd)); return escapedInputSource; } }
/** * Unique implementation encapsulating the request for a rendered document or a resource. * * <p>TODO: use ANTLR for parsing. * * @author Laurent Caillette */ public final class GenericRequest implements DocumentRequest, ResourceRequest { private static final Logger LOGGER = LoggerFactory.getLogger(GenericRequest.class); public static final String ERRORPAGE_SUFFIX = "/error.html"; public static final String TAGSET_PARAMETER_NAME = "tags"; public static final ImmutableSet<String> SUPPORTED_PARAMETER_NAMES = ImmutableSet.of(ALTERNATE_STYLESHEET_PARAMETER_NAME, TAGSET_PARAMETER_NAME); /** <a href="http://www.ietf.org/rfc/rfc2396.txt" >RFC</a> p. 26-27. */ public static final String LIST_SEPARATOR = ";"; // ======= // For all // ======= private final String originalTarget; @Override public String getOriginalTarget() { return originalTarget; } private final String documentSourceName; @Override public String getDocumentSourceName() { return documentSourceName; } private final boolean rendered; @Override public boolean isRendered() { return rendered; } // ======== // Rendered // ======== private final RenditionMimeType renditionMimeType; @Override public RenditionMimeType getRenditionMimeType() { return renditionMimeType; } private final ResourceName alternateStylesheet; @Override public ResourceName getAlternateStylesheet() { return alternateStylesheet; } private final ImmutableSet<Tag> tags; @Override public ImmutableSet<Tag> getTags() { return tags; } private final boolean displayProblems; @Override public boolean getDisplayProblems() { return displayProblems; } private final PageIdentifier pageIdentifier; @Override public PageIdentifier getPageIdentifier() { return pageIdentifier; } // ============ // Non-rendered // ============ private final String resourceExtension; /** * Always null if {@link #isRendered()} is true. Never null nor blank if {@link #isRendered()} is * false. * * @return a {@code String} that may be null, but never blank. */ @Override public String getResourceExtension() { return resourceExtension; } // ============ // Constructors // ============ private GenericRequest( final String originalTarget, final String documentSourceName, final boolean displayProblems, final RenditionMimeType renditionMimeType, final PageIdentifier pageIdentifier, final ResourceName alternateStylesheet, final ImmutableSet<Tag> tags) { this.pageIdentifier = pageIdentifier; checkHasCharacters(originalTarget); checkHasCharacters(documentSourceName); this.documentSourceName = documentSourceName; this.rendered = true; this.renditionMimeType = checkNotNull(renditionMimeType); this.alternateStylesheet = alternateStylesheet; this.tags = checkNotNull(tags); this.displayProblems = displayProblems; this.resourceExtension = null; this.originalTarget = rebuildOriginalTarget(false); } private String rebuildOriginalTarget(final boolean addProblemPage) { final ImmutableList.Builder<String> parametersBuilder = ImmutableList.builder(); if (alternateStylesheet != null) { parametersBuilder.add( ALTERNATE_STYLESHEET_PARAMETER_NAME + "=" + alternateStylesheet.getName()); } if (!getTags().isEmpty()) { final Iterable<String> tagNames = Iterables.transform(getTags(), Tag.EXTRACT_TAG_NAME); parametersBuilder.add(TAGSET_PARAMETER_NAME + "=" + Joiner.on(LIST_SEPARATOR).join(tagNames)); } final ImmutableList<String> parameters = parametersBuilder.build(); return documentSourceName + (pageIdentifier == null ? "" : PAGEIDENTIFIER_PREFIX + pageIdentifier.getName()) + "." + renditionMimeType.getFileExtension() + (addProblemPage ? ERRORPAGE_SUFFIX : "") + (parameters.isEmpty() ? "" : "?" + Joiner.on("&").join(parameters)); } private GenericRequest( final String originalTarget, final String documentSourceName, final String resourceExtension) { checkHasCharacters(originalTarget); this.originalTarget = documentSourceName + "." + resourceExtension; checkHasCharacters(documentSourceName); this.documentSourceName = documentSourceName; this.rendered = false; checkHasCharacters(resourceExtension); this.resourceExtension = resourceExtension; renditionMimeType = null; pageIdentifier = null; alternateStylesheet = null; tags = null; displayProblems = false; } private static void checkHasCharacters(final String string) { checkArgument(!StringUtils.isBlank(string)); } // ======= // Parsing // ======= private static final String TAG_PATTERN = "[a-zA-Z0-9][a-zA-Z0-9\\-_]*"; /** * Allow non-word characters only in the middle of word characters and if they are not * consecutive. */ private static final String PATH_SEGMENT_PATTERN = "[A-Za-z0-9]+(?:(?:-|_|\\.)[A-Za-z0-9]+)*"; private static Pattern createPattern() { final StringBuilder buffer = new StringBuilder(); buffer.append("("); // The path without extension. No double dots for security reasons (forbid '..'). buffer.append("((?:\\/" + PATH_SEGMENT_PATTERN + ")+)"); // Page identifier. buffer.append("(?:--("); buffer.append(PageIdentifier.PATTERN.pattern()); buffer.append("))?"); // The extension defining the MIME type. buffer.append("(?:\\.("); final ImmutableList<String> allExtensions = ImmutableList.<String>builder() .addAll(RenditionMimeType.getFileExtensions()) .addAll(RawResource.getFileExtensions()) .build(); buffer.append(Joiner.on("|").join(allExtensions)); buffer.append("))"); buffer.append(")"); // This duplicates the 'tag' rule in ANTLR grammar. Shame. final String parameter = "([a-zA-Z0-9\\-\\=_&\\./" + LIST_SEPARATOR + "]+)"; buffer.append("(?:\\?"); buffer.append(parameter); buffer.append(")?"); return Pattern.compile(buffer.toString()); } private static final Pattern DOCUMENT_PATTERN = createPattern(); static { LOGGER.debug("Crafted regex: ", DOCUMENT_PATTERN.pattern()); } private static String extractExtension(final String path) throws MalformedRequestException { final Matcher matcher = DOCUMENT_PATTERN.matcher(path); if (matcher.find() && matcher.groupCount() >= 4) { return matcher.group(4); } else { throw new MalformedRequestException("Doesn't contain an extension: '" + path + "'"); } } private static ImmutableMap<String, String> getQueryMap(final String query) throws MalformedRequestException { if (StringUtils.isBlank(query)) { return ImmutableMap.of(); } else { final Iterable<String> params = Splitter.on('&').split(query); final ImmutableMap.Builder<String, String> map = ImmutableMap.builder(); for (final String param : params) { final ImmutableList<String> strings = ImmutableList.copyOf(Splitter.on('=').split(param)); final String name = strings.get(0); if (strings.size() > 2) { throw new MalformedRequestException("Multiple '=' for parameter " + name); } final String value; if (strings.size() > 1) { value = strings.get(1); } else { value = null; } if (map.build().keySet().contains(name)) { throw new MalformedRequestException("Duplicate value for parameter " + name); } map.put(name, value); } return map.build(); } } private static ResourceName extractResourceName(final ImmutableMap<String, String> parameterMap) { final String parameterValue = parameterMap.get(ALTERNATE_STYLESHEET_PARAMETER_NAME); if (parameterValue == null) { return null; } else { return new ResourceName(parameterValue); } } private static final Pattern TAGS_PATTERN = Pattern.compile(TAG_PATTERN + "(?:" + LIST_SEPARATOR + TAG_PATTERN + ")*"); private static final Pattern TAGS_SEPARATOR_PATTERN = Pattern.compile(LIST_SEPARATOR); private static ImmutableSet<Tag> parseTags(final String value) throws MalformedRequestException { if (TAGS_PATTERN.matcher(value).matches()) { return RenderingTools.toTagSet(ImmutableSet.copyOf(TAGS_SEPARATOR_PATTERN.split(value))); } else { throw new MalformedRequestException("Bad tags: '" + value + "'"); } } private static ImmutableSet<Tag> extractTags(final ImmutableMap<String, String> parameterMap) throws MalformedRequestException { final String parameterValue = parameterMap.get(TAGSET_PARAMETER_NAME); if (parameterValue == null) { return ImmutableSet.of(); } else { return parseTags(parameterValue); } } private static void verifyAllParameterNames(final Set<String> parameterNames) throws MalformedRequestException { for (final String parameterName : parameterNames) { if (!SUPPORTED_PARAMETER_NAMES.contains(parameterName)) { throw new MalformedRequestException("Unsupported query parameter: '" + parameterName + "'"); } } } public static AnyRequest parse(final String originalTarget) throws MalformedRequestException { final Matcher matcher = DOCUMENT_PATTERN.matcher(originalTarget); if (matcher.find() && matcher.groupCount() >= 4) { // Document source name plus extension, minus page identifier. final String fullTarget = matcher.group(2) + "." + matcher.group(4); final boolean showProblems = fullTarget.endsWith(ERRORPAGE_SUFFIX); final String targetMinusError; if (showProblems) { targetMinusError = fullTarget.substring(0, fullTarget.length() - ERRORPAGE_SUFFIX.length()); } else { targetMinusError = fullTarget; } final String rawDocumentMimeType = extractExtension(targetMinusError); final String rawDocumentSourceName = targetMinusError.substring( 0, targetMinusError.length() - rawDocumentMimeType.length() - 1); final String maybePageIdentifier = matcher.group(3); final PageIdentifier pageIdentifier = maybePageIdentifier == null ? null : new PageIdentifier(maybePageIdentifier); final RenditionMimeType renditionMimeType = RenditionMimeType.maybeValueOf( rawDocumentMimeType == null ? null : rawDocumentMimeType.toUpperCase()); final ImmutableMap<String, String> parameterMap = matcher.groupCount() >= 5 ? getQueryMap(matcher.group(5)) : ImmutableMap.<String, String>of(); verifyAllParameterNames(parameterMap.keySet()); final ResourceName alternateStylesheet = extractResourceName(parameterMap); final ImmutableSet<Tag> tagset = extractTags(parameterMap); final AnyRequest request; if (renditionMimeType == null) { request = new GenericRequest(originalTarget, rawDocumentSourceName, rawDocumentMimeType); } else { request = new GenericRequest( originalTarget, rawDocumentSourceName, showProblems, renditionMimeType, pageIdentifier, alternateStylesheet, tagset); } return request; } else { throw new MalformedRequestException("Could not parse: '" + originalTarget + "'."); } } // ========= // Utilities // ========= /** * Return the document name, plus the page identifier if any. * * @param documentRequest a non-null object. * @return a non-null, non-empty {@code String}. */ public static String getDocumentNameWithPageIdentifier(final DocumentRequest documentRequest) { return documentRequest.getDocumentSourceName() + (documentRequest.getPageIdentifier() == null ? "" : DocumentRequest.PAGEIDENTIFIER_PREFIX + documentRequest.getPageIdentifier().getName()); } /** * Return the URL path and parameters for an error page. * * @param documentRequest a non-null object. Must be a {@link GenericRequest} instance. * @return a non-null, non-empty {@code String}. */ public static String getRedirectionWithError(final DocumentRequest documentRequest) { return ((GenericRequest) documentRequest).rebuildOriginalTarget(true); } // ================ // java.lang.Object // ================ @Override public String toString() { final StringBuilder stringBuilder = new StringBuilder(getClass().getSimpleName() + "["); if (isRendered() && getDisplayProblems()) { stringBuilder.append("displayProblems=true; "); } stringBuilder.append(getOriginalTarget()); stringBuilder.append("]"); return stringBuilder.toString(); } @Override public boolean equals(final Object other) { if (this == other) { return true; } if (other == null || getClass() != other.getClass()) { return false; } final GenericRequest that = (GenericRequest) other; if (displayProblems != that.displayProblems) { return false; } if (rendered != that.rendered) { return false; } if (alternateStylesheet != null ? !alternateStylesheet.equals(that.alternateStylesheet) : that.alternateStylesheet != null) { return false; } if (!documentSourceName.equals(that.documentSourceName)) { return false; } if (!originalTarget.equals(that.originalTarget)) { return false; } if (renditionMimeType != that.renditionMimeType) { return false; } if (resourceExtension != null ? !resourceExtension.equals(that.resourceExtension) : that.resourceExtension != null) { return false; } if (tags != null ? !tags.equals(that.tags) : that.tags != null) { return false; } return true; } @Override public int hashCode() { int result = originalTarget.hashCode(); result = 31 * result + documentSourceName.hashCode(); result = 31 * result + (pageIdentifier != null ? pageIdentifier.hashCode() : 0); result = 31 * result + (rendered ? 1 : 0); result = 31 * result + (renditionMimeType != null ? renditionMimeType.hashCode() : 0); result = 31 * result + (alternateStylesheet != null ? alternateStylesheet.hashCode() : 0); result = 31 * result + (tags != null ? tags.hashCode() : 0); result = 31 * result + (displayProblems ? 1 : 0); result = 31 * result + (resourceExtension != null ? resourceExtension.hashCode() : 0); return result; } }
/** * Tests for {@link DesignatorInterpreter#enrich(Treepath, FragmentMapper)} which modifies * identifier stuff in a {@code Treepath}. * * @author Laurent Caillette */ public class DesignatorInterpreterEnrichmentTest { @Test public void enrichNothing() { verifyEnrich(tree(NOVELLA), tree(NOVELLA), new FragmentMapperBuilder().build()); } @Test public void enrichWithSimpleAbsoluteIdentifier() { final SyntacticTree levelTree = tree(_LEVEL, tree(ABSOLUTE_IDENTIFIER, tree("L0"))); final SyntacticTree partTree = tree(NOVELLA, levelTree); final Treepath<SyntacticTree> levelTreepath = Treepath.create(partTree, 0); verifyEnrich( tree(NOVELLA, tree(_LEVEL, tree(_EXPLICIT_IDENTIFIER, "L0"))), partTree, new FragmentMapperBuilder() .addPure( new FragmentIdentifier("L0"), RobustPath.create(levelTreepath, DesignatorTools.IDENTIFIER_TREE_FILTER)) .build()); } /** * The {@link DesignatorInterpreter#enrich(Treepath, FragmentMapper)} method adds and removes * trees so it introduces an index shift. By calling this method two times we check proper * handling of index shift. */ @Test public void enrichTwoTimesToCheckResistanceToIndexShift() { final SyntacticTree levelTree0 = tree(_LEVEL, tree(ABSOLUTE_IDENTIFIER, tree("L0"))); final SyntacticTree levelTree1 = tree(_LEVEL, tree(ABSOLUTE_IDENTIFIER, tree("L1"))); final SyntacticTree partTree = tree(NOVELLA, levelTree0, levelTree1); final RobustPath<SyntacticTree> path0 = RobustPath.create(Treepath.create(partTree, 0), DesignatorTools.IDENTIFIER_TREE_FILTER); final RobustPath<SyntacticTree> path1 = RobustPath.create(Treepath.create(partTree, 1), DesignatorTools.IDENTIFIER_TREE_FILTER); verifyEnrich( tree( NOVELLA, tree(_LEVEL, tree(_EXPLICIT_IDENTIFIER, "L0")), tree(_LEVEL, tree(_EXPLICIT_IDENTIFIER, "L1"))), partTree, new FragmentMapperBuilder() .addPure(new FragmentIdentifier("L0"), path0) .addPure(new FragmentIdentifier("L1"), path1) .build()); } @Test public void enrichWithSimpleImplicitIdentifier() { final SyntacticTree levelTree = tree(_LEVEL); final SyntacticTree partTree = tree(NOVELLA, levelTree); final Treepath<SyntacticTree> levelTreepath = Treepath.create(partTree, 0); final FragmentMapper<RobustPath<SyntacticTree>> mapper = new FragmentMapperBuilder() .addDerived( new FragmentIdentifier("L0"), RobustPath.create(levelTreepath, DesignatorTools.IDENTIFIER_TREE_FILTER)) .build(); verifyEnrich(tree(NOVELLA, tree(_LEVEL, tree(_IMPLICIT_IDENTIFIER, "L0"))), partTree, mapper); } @Test public void beSureOfWhatHappensWithArrayComparison() { final int[] array1 = {0, 1, 2, 3}; final int[] array2 = {0, 1, 2, 3}; Assert.assertTrue(Arrays.equals(array1, array2)); } // ======= // Fixture // ======= private static final Logger LOGGER = LoggerFactory.getLogger(DesignatorInterpreterEnrichmentTest.class); private static class FragmentMapperBuilder { private final ImmutableMap.Builder<FragmentIdentifier, RobustPath<SyntacticTree>> pureIdentifierMapBuilder = new ImmutableMap.Builder<FragmentIdentifier, RobustPath<SyntacticTree>>(); private final ImmutableMap.Builder<FragmentIdentifier, RobustPath<SyntacticTree>> derivedIdentifierMapBuilder = new ImmutableMap.Builder<FragmentIdentifier, RobustPath<SyntacticTree>>(); public FragmentMapperBuilder addPure( final FragmentIdentifier key, final RobustPath<SyntacticTree> value) { pureIdentifierMapBuilder.put(key, value); return this; } public FragmentMapperBuilder addDerived( final FragmentIdentifier key, final RobustPath<SyntacticTree> value) { derivedIdentifierMapBuilder.put(key, value); return this; } public FragmentMapper<RobustPath<SyntacticTree>> build() { final Map<FragmentIdentifier, RobustPath<SyntacticTree>> pure = pureIdentifierMapBuilder.build(); final Map<FragmentIdentifier, RobustPath<SyntacticTree>> derived = derivedIdentifierMapBuilder.build(); return new FragmentMapper<RobustPath<SyntacticTree>>() { @Override public Map<FragmentIdentifier, RobustPath<SyntacticTree>> getPureIdentifierMap() { return pure; } @Override public Map<FragmentIdentifier, RobustPath<SyntacticTree>> getDerivedIdentifierMap() { return derived; } }; } } private static void verifyEnrich( final SyntacticTree expectedTree, final SyntacticTree originalTree, final FragmentMapper<RobustPath<SyntacticTree>> fragmentMapper) { LOGGER.info("Flat tree: ", TreeFixture.asString(originalTree)); LOGGER.info("Expected tree: ", TreeFixture.asString(expectedTree)); final Treepath<SyntacticTree> expectedTreepath = Treepath.create(expectedTree); final Treepath<SyntacticTree> originalTreepath = Treepath.create(originalTree); final Treepath<SyntacticTree> rehierarchized = DesignatorInterpreterAccessor.enrich( DesignatorTools.TRAVERSAL.first(originalTreepath), // originalTreepath, fragmentMapper); TreeFixture.assertEqualsNoSeparators( expectedTreepath.getTreeAtEnd(), rehierarchized.getTreeAtEnd()); } private abstract static class DesignatorInterpreterAccessor extends DesignatorInterpreter { /** Just make the compiler happy. */ private DesignatorInterpreterAccessor() { super(null); } public static Treepath<SyntacticTree> enrich( final Treepath<SyntacticTree> treepath, final FragmentMapper<RobustPath<SyntacticTree>> mapper) { return DesignatorInterpreter.enrich(treepath, mapper); } } }