Beispiel #1
0
/**
 * Tweaks DTD entity on-the-fly for escaping entity that we need to appear as-they-are inside
 * rendered HTML.
 *
 * @author Laurent Caillette
 */
public class DtdTools {

  private static final Logger LOGGER = LoggerFactory.getLogger(DtdTools.class);

  private static final Pattern PATTERN =
      Pattern.compile("\\<\\!ENTITY\\s+(\\w+)\\s+\\\"(&#(?:\\d|\\w|\\;)+)\\\"\\s*?\\>");

  static {
    LOGGER.debug("Crafted regex ", PATTERN.pattern());
  }

  private static final String REPLACEMENT = "<!ENTITY $1 \"&amp;$1;\" > ";

  private DtdTools() {}

  public static InputSource escapeEntities(final InputSource unescapedInputSource)
      throws IOException {
    final String unescapedDtd;
    if (null == unescapedInputSource.getCharacterStream()) {
      if (null == unescapedInputSource.getByteStream()) {
        throw new IllegalArgumentException("unescapedInputSource provides no valid stream");
      } else {
        unescapedDtd = IOUtils.toString(unescapedInputSource.getByteStream());
      }
    } else {
      unescapedDtd = IOUtils.toString(unescapedInputSource.getCharacterStream());
    }

    final Matcher matcher = PATTERN.matcher(unescapedDtd);

    final String escapedDtd = matcher.replaceAll(REPLACEMENT);
    final InputSource escapedInputSource = new InputSource(unescapedInputSource.getSystemId());
    escapedInputSource.setEncoding(unescapedInputSource.getEncoding());
    escapedInputSource.setPublicId(unescapedInputSource.getPublicId());
    escapedInputSource.setCharacterStream(new StringReader(escapedDtd));
    return escapedInputSource;
  }
}
Beispiel #2
0
/**
 * Unique implementation encapsulating the request for a rendered document or a resource.
 *
 * <p>TODO: use ANTLR for parsing.
 *
 * @author Laurent Caillette
 */
public final class GenericRequest implements DocumentRequest, ResourceRequest {

  private static final Logger LOGGER = LoggerFactory.getLogger(GenericRequest.class);

  public static final String ERRORPAGE_SUFFIX = "/error.html";
  public static final String TAGSET_PARAMETER_NAME = "tags";
  public static final ImmutableSet<String> SUPPORTED_PARAMETER_NAMES =
      ImmutableSet.of(ALTERNATE_STYLESHEET_PARAMETER_NAME, TAGSET_PARAMETER_NAME);

  /** <a href="http://www.ietf.org/rfc/rfc2396.txt" >RFC</a> p. 26-27. */
  public static final String LIST_SEPARATOR = ";";

  // =======
  // For all
  // =======

  private final String originalTarget;

  @Override
  public String getOriginalTarget() {
    return originalTarget;
  }

  private final String documentSourceName;

  @Override
  public String getDocumentSourceName() {
    return documentSourceName;
  }

  private final boolean rendered;

  @Override
  public boolean isRendered() {
    return rendered;
  }

  // ========
  // Rendered
  // ========

  private final RenditionMimeType renditionMimeType;

  @Override
  public RenditionMimeType getRenditionMimeType() {
    return renditionMimeType;
  }

  private final ResourceName alternateStylesheet;

  @Override
  public ResourceName getAlternateStylesheet() {
    return alternateStylesheet;
  }

  private final ImmutableSet<Tag> tags;

  @Override
  public ImmutableSet<Tag> getTags() {
    return tags;
  }

  private final boolean displayProblems;

  @Override
  public boolean getDisplayProblems() {
    return displayProblems;
  }

  private final PageIdentifier pageIdentifier;

  @Override
  public PageIdentifier getPageIdentifier() {
    return pageIdentifier;
  }
  // ============
  // Non-rendered
  // ============

  private final String resourceExtension;

  /**
   * Always null if {@link #isRendered()} is true. Never null nor blank if {@link #isRendered()} is
   * false.
   *
   * @return a {@code String} that may be null, but never blank.
   */
  @Override
  public String getResourceExtension() {
    return resourceExtension;
  }

  // ============
  // Constructors
  // ============

  private GenericRequest(
      final String originalTarget,
      final String documentSourceName,
      final boolean displayProblems,
      final RenditionMimeType renditionMimeType,
      final PageIdentifier pageIdentifier,
      final ResourceName alternateStylesheet,
      final ImmutableSet<Tag> tags) {
    this.pageIdentifier = pageIdentifier;
    checkHasCharacters(originalTarget);
    checkHasCharacters(documentSourceName);
    this.documentSourceName = documentSourceName;
    this.rendered = true;
    this.renditionMimeType = checkNotNull(renditionMimeType);
    this.alternateStylesheet = alternateStylesheet;
    this.tags = checkNotNull(tags);
    this.displayProblems = displayProblems;

    this.resourceExtension = null;
    this.originalTarget = rebuildOriginalTarget(false);
  }

  private String rebuildOriginalTarget(final boolean addProblemPage) {
    final ImmutableList.Builder<String> parametersBuilder = ImmutableList.builder();
    if (alternateStylesheet != null) {
      parametersBuilder.add(
          ALTERNATE_STYLESHEET_PARAMETER_NAME + "=" + alternateStylesheet.getName());
    }

    if (!getTags().isEmpty()) {
      final Iterable<String> tagNames = Iterables.transform(getTags(), Tag.EXTRACT_TAG_NAME);

      parametersBuilder.add(TAGSET_PARAMETER_NAME + "=" + Joiner.on(LIST_SEPARATOR).join(tagNames));
    }
    final ImmutableList<String> parameters = parametersBuilder.build();
    return documentSourceName
        + (pageIdentifier == null ? "" : PAGEIDENTIFIER_PREFIX + pageIdentifier.getName())
        + "."
        + renditionMimeType.getFileExtension()
        + (addProblemPage ? ERRORPAGE_SUFFIX : "")
        + (parameters.isEmpty() ? "" : "?" + Joiner.on("&").join(parameters));
  }

  private GenericRequest(
      final String originalTarget,
      final String documentSourceName,
      final String resourceExtension) {
    checkHasCharacters(originalTarget);
    this.originalTarget = documentSourceName + "." + resourceExtension;
    checkHasCharacters(documentSourceName);
    this.documentSourceName = documentSourceName;
    this.rendered = false;
    checkHasCharacters(resourceExtension);
    this.resourceExtension = resourceExtension;

    renditionMimeType = null;
    pageIdentifier = null;
    alternateStylesheet = null;
    tags = null;
    displayProblems = false;
  }

  private static void checkHasCharacters(final String string) {
    checkArgument(!StringUtils.isBlank(string));
  }

  // =======
  // Parsing
  // =======

  private static final String TAG_PATTERN = "[a-zA-Z0-9][a-zA-Z0-9\\-_]*";

  /**
   * Allow non-word characters only in the middle of word characters and if they are not
   * consecutive.
   */
  private static final String PATH_SEGMENT_PATTERN = "[A-Za-z0-9]+(?:(?:-|_|\\.)[A-Za-z0-9]+)*";

  private static Pattern createPattern() {
    final StringBuilder buffer = new StringBuilder();

    buffer.append("(");

    // The path without extension. No double dots for security reasons (forbid '..').
    buffer.append("((?:\\/" + PATH_SEGMENT_PATTERN + ")+)");

    // Page identifier.
    buffer.append("(?:--(");
    buffer.append(PageIdentifier.PATTERN.pattern());
    buffer.append("))?");

    // The extension defining the MIME type.
    buffer.append("(?:\\.(");

    final ImmutableList<String> allExtensions =
        ImmutableList.<String>builder()
            .addAll(RenditionMimeType.getFileExtensions())
            .addAll(RawResource.getFileExtensions())
            .build();

    buffer.append(Joiner.on("|").join(allExtensions));
    buffer.append("))");
    buffer.append(")");

    // This duplicates the 'tag' rule in ANTLR grammar. Shame.
    final String parameter = "([a-zA-Z0-9\\-\\=_&\\./" + LIST_SEPARATOR + "]+)";

    buffer.append("(?:\\?");
    buffer.append(parameter);
    buffer.append(")?");

    return Pattern.compile(buffer.toString());
  }

  private static final Pattern DOCUMENT_PATTERN = createPattern();

  static {
    LOGGER.debug("Crafted regex: ", DOCUMENT_PATTERN.pattern());
  }

  private static String extractExtension(final String path) throws MalformedRequestException {
    final Matcher matcher = DOCUMENT_PATTERN.matcher(path);
    if (matcher.find() && matcher.groupCount() >= 4) {
      return matcher.group(4);
    } else {
      throw new MalformedRequestException("Doesn't contain an extension: '" + path + "'");
    }
  }

  private static ImmutableMap<String, String> getQueryMap(final String query)
      throws MalformedRequestException {
    if (StringUtils.isBlank(query)) {
      return ImmutableMap.of();
    } else {
      final Iterable<String> params = Splitter.on('&').split(query);
      final ImmutableMap.Builder<String, String> map = ImmutableMap.builder();
      for (final String param : params) {
        final ImmutableList<String> strings = ImmutableList.copyOf(Splitter.on('=').split(param));
        final String name = strings.get(0);
        if (strings.size() > 2) {
          throw new MalformedRequestException("Multiple '=' for parameter " + name);
        }
        final String value;
        if (strings.size() > 1) {
          value = strings.get(1);
        } else {
          value = null;
        }
        if (map.build().keySet().contains(name)) {
          throw new MalformedRequestException("Duplicate value for parameter " + name);
        }
        map.put(name, value);
      }
      return map.build();
    }
  }

  private static ResourceName extractResourceName(final ImmutableMap<String, String> parameterMap) {
    final String parameterValue = parameterMap.get(ALTERNATE_STYLESHEET_PARAMETER_NAME);
    if (parameterValue == null) {
      return null;
    } else {
      return new ResourceName(parameterValue);
    }
  }

  private static final Pattern TAGS_PATTERN =
      Pattern.compile(TAG_PATTERN + "(?:" + LIST_SEPARATOR + TAG_PATTERN + ")*");
  private static final Pattern TAGS_SEPARATOR_PATTERN = Pattern.compile(LIST_SEPARATOR);

  private static ImmutableSet<Tag> parseTags(final String value) throws MalformedRequestException {
    if (TAGS_PATTERN.matcher(value).matches()) {
      return RenderingTools.toTagSet(ImmutableSet.copyOf(TAGS_SEPARATOR_PATTERN.split(value)));
    } else {
      throw new MalformedRequestException("Bad tags: '" + value + "'");
    }
  }

  private static ImmutableSet<Tag> extractTags(final ImmutableMap<String, String> parameterMap)
      throws MalformedRequestException {
    final String parameterValue = parameterMap.get(TAGSET_PARAMETER_NAME);
    if (parameterValue == null) {
      return ImmutableSet.of();
    } else {
      return parseTags(parameterValue);
    }
  }

  private static void verifyAllParameterNames(final Set<String> parameterNames)
      throws MalformedRequestException {
    for (final String parameterName : parameterNames) {
      if (!SUPPORTED_PARAMETER_NAMES.contains(parameterName)) {
        throw new MalformedRequestException("Unsupported query parameter: '" + parameterName + "'");
      }
    }
  }

  public static AnyRequest parse(final String originalTarget) throws MalformedRequestException {
    final Matcher matcher = DOCUMENT_PATTERN.matcher(originalTarget);
    if (matcher.find() && matcher.groupCount() >= 4) {

      // Document source name plus extension, minus page identifier.
      final String fullTarget = matcher.group(2) + "." + matcher.group(4);

      final boolean showProblems = fullTarget.endsWith(ERRORPAGE_SUFFIX);

      final String targetMinusError;
      if (showProblems) {
        targetMinusError = fullTarget.substring(0, fullTarget.length() - ERRORPAGE_SUFFIX.length());
      } else {
        targetMinusError = fullTarget;
      }

      final String rawDocumentMimeType = extractExtension(targetMinusError);
      final String rawDocumentSourceName =
          targetMinusError.substring(
              0, targetMinusError.length() - rawDocumentMimeType.length() - 1);

      final String maybePageIdentifier = matcher.group(3);
      final PageIdentifier pageIdentifier =
          maybePageIdentifier == null ? null : new PageIdentifier(maybePageIdentifier);

      final RenditionMimeType renditionMimeType =
          RenditionMimeType.maybeValueOf(
              rawDocumentMimeType == null ? null : rawDocumentMimeType.toUpperCase());

      final ImmutableMap<String, String> parameterMap =
          matcher.groupCount() >= 5
              ? getQueryMap(matcher.group(5))
              : ImmutableMap.<String, String>of();
      verifyAllParameterNames(parameterMap.keySet());

      final ResourceName alternateStylesheet = extractResourceName(parameterMap);

      final ImmutableSet<Tag> tagset = extractTags(parameterMap);

      final AnyRequest request;
      if (renditionMimeType == null) {
        request = new GenericRequest(originalTarget, rawDocumentSourceName, rawDocumentMimeType);
      } else {
        request =
            new GenericRequest(
                originalTarget,
                rawDocumentSourceName,
                showProblems,
                renditionMimeType,
                pageIdentifier,
                alternateStylesheet,
                tagset);
      }
      return request;

    } else {
      throw new MalformedRequestException("Could not parse: '" + originalTarget + "'.");
    }
  }

  // =========
  // Utilities
  // =========

  /**
   * Return the document name, plus the page identifier if any.
   *
   * @param documentRequest a non-null object.
   * @return a non-null, non-empty {@code String}.
   */
  public static String getDocumentNameWithPageIdentifier(final DocumentRequest documentRequest) {
    return documentRequest.getDocumentSourceName()
        + (documentRequest.getPageIdentifier() == null
            ? ""
            : DocumentRequest.PAGEIDENTIFIER_PREFIX
                + documentRequest.getPageIdentifier().getName());
  }

  /**
   * Return the URL path and parameters for an error page.
   *
   * @param documentRequest a non-null object. Must be a {@link GenericRequest} instance.
   * @return a non-null, non-empty {@code String}.
   */
  public static String getRedirectionWithError(final DocumentRequest documentRequest) {
    return ((GenericRequest) documentRequest).rebuildOriginalTarget(true);
  }

  // ================
  // java.lang.Object
  // ================

  @Override
  public String toString() {
    final StringBuilder stringBuilder = new StringBuilder(getClass().getSimpleName() + "[");
    if (isRendered() && getDisplayProblems()) {
      stringBuilder.append("displayProblems=true; ");
    }
    stringBuilder.append(getOriginalTarget());
    stringBuilder.append("]");
    return stringBuilder.toString();
  }

  @Override
  public boolean equals(final Object other) {
    if (this == other) {
      return true;
    }
    if (other == null || getClass() != other.getClass()) {
      return false;
    }

    final GenericRequest that = (GenericRequest) other;

    if (displayProblems != that.displayProblems) {
      return false;
    }
    if (rendered != that.rendered) {
      return false;
    }
    if (alternateStylesheet != null
        ? !alternateStylesheet.equals(that.alternateStylesheet)
        : that.alternateStylesheet != null) {
      return false;
    }
    if (!documentSourceName.equals(that.documentSourceName)) {
      return false;
    }
    if (!originalTarget.equals(that.originalTarget)) {
      return false;
    }
    if (renditionMimeType != that.renditionMimeType) {
      return false;
    }
    if (resourceExtension != null
        ? !resourceExtension.equals(that.resourceExtension)
        : that.resourceExtension != null) {
      return false;
    }
    if (tags != null ? !tags.equals(that.tags) : that.tags != null) {
      return false;
    }

    return true;
  }

  @Override
  public int hashCode() {
    int result = originalTarget.hashCode();
    result = 31 * result + documentSourceName.hashCode();
    result = 31 * result + (pageIdentifier != null ? pageIdentifier.hashCode() : 0);
    result = 31 * result + (rendered ? 1 : 0);
    result = 31 * result + (renditionMimeType != null ? renditionMimeType.hashCode() : 0);
    result = 31 * result + (alternateStylesheet != null ? alternateStylesheet.hashCode() : 0);
    result = 31 * result + (tags != null ? tags.hashCode() : 0);
    result = 31 * result + (displayProblems ? 1 : 0);
    result = 31 * result + (resourceExtension != null ? resourceExtension.hashCode() : 0);
    return result;
  }
}
/**
 * Tests for {@link DesignatorInterpreter#enrich(Treepath, FragmentMapper)} which modifies
 * identifier stuff in a {@code Treepath}.
 *
 * @author Laurent Caillette
 */
public class DesignatorInterpreterEnrichmentTest {

  @Test
  public void enrichNothing() {
    verifyEnrich(tree(NOVELLA), tree(NOVELLA), new FragmentMapperBuilder().build());
  }

  @Test
  public void enrichWithSimpleAbsoluteIdentifier() {
    final SyntacticTree levelTree = tree(_LEVEL, tree(ABSOLUTE_IDENTIFIER, tree("L0")));

    final SyntacticTree partTree = tree(NOVELLA, levelTree);

    final Treepath<SyntacticTree> levelTreepath = Treepath.create(partTree, 0);

    verifyEnrich(
        tree(NOVELLA, tree(_LEVEL, tree(_EXPLICIT_IDENTIFIER, "L0"))),
        partTree,
        new FragmentMapperBuilder()
            .addPure(
                new FragmentIdentifier("L0"),
                RobustPath.create(levelTreepath, DesignatorTools.IDENTIFIER_TREE_FILTER))
            .build());
  }

  /**
   * The {@link DesignatorInterpreter#enrich(Treepath, FragmentMapper)} method adds and removes
   * trees so it introduces an index shift. By calling this method two times we check proper
   * handling of index shift.
   */
  @Test
  public void enrichTwoTimesToCheckResistanceToIndexShift() {
    final SyntacticTree levelTree0 = tree(_LEVEL, tree(ABSOLUTE_IDENTIFIER, tree("L0")));

    final SyntacticTree levelTree1 = tree(_LEVEL, tree(ABSOLUTE_IDENTIFIER, tree("L1")));

    final SyntacticTree partTree = tree(NOVELLA, levelTree0, levelTree1);

    final RobustPath<SyntacticTree> path0 =
        RobustPath.create(Treepath.create(partTree, 0), DesignatorTools.IDENTIFIER_TREE_FILTER);
    final RobustPath<SyntacticTree> path1 =
        RobustPath.create(Treepath.create(partTree, 1), DesignatorTools.IDENTIFIER_TREE_FILTER);

    verifyEnrich(
        tree(
            NOVELLA,
            tree(_LEVEL, tree(_EXPLICIT_IDENTIFIER, "L0")),
            tree(_LEVEL, tree(_EXPLICIT_IDENTIFIER, "L1"))),
        partTree,
        new FragmentMapperBuilder()
            .addPure(new FragmentIdentifier("L0"), path0)
            .addPure(new FragmentIdentifier("L1"), path1)
            .build());
  }

  @Test
  public void enrichWithSimpleImplicitIdentifier() {

    final SyntacticTree levelTree = tree(_LEVEL);

    final SyntacticTree partTree = tree(NOVELLA, levelTree);

    final Treepath<SyntacticTree> levelTreepath = Treepath.create(partTree, 0);

    final FragmentMapper<RobustPath<SyntacticTree>> mapper =
        new FragmentMapperBuilder()
            .addDerived(
                new FragmentIdentifier("L0"),
                RobustPath.create(levelTreepath, DesignatorTools.IDENTIFIER_TREE_FILTER))
            .build();

    verifyEnrich(tree(NOVELLA, tree(_LEVEL, tree(_IMPLICIT_IDENTIFIER, "L0"))), partTree, mapper);
  }

  @Test
  public void beSureOfWhatHappensWithArrayComparison() {
    final int[] array1 = {0, 1, 2, 3};
    final int[] array2 = {0, 1, 2, 3};
    Assert.assertTrue(Arrays.equals(array1, array2));
  }

  // =======
  // Fixture
  // =======

  private static final Logger LOGGER =
      LoggerFactory.getLogger(DesignatorInterpreterEnrichmentTest.class);

  private static class FragmentMapperBuilder {
    private final ImmutableMap.Builder<FragmentIdentifier, RobustPath<SyntacticTree>>
        pureIdentifierMapBuilder =
            new ImmutableMap.Builder<FragmentIdentifier, RobustPath<SyntacticTree>>();
    private final ImmutableMap.Builder<FragmentIdentifier, RobustPath<SyntacticTree>>
        derivedIdentifierMapBuilder =
            new ImmutableMap.Builder<FragmentIdentifier, RobustPath<SyntacticTree>>();

    public FragmentMapperBuilder addPure(
        final FragmentIdentifier key, final RobustPath<SyntacticTree> value) {
      pureIdentifierMapBuilder.put(key, value);
      return this;
    }

    public FragmentMapperBuilder addDerived(
        final FragmentIdentifier key, final RobustPath<SyntacticTree> value) {
      derivedIdentifierMapBuilder.put(key, value);
      return this;
    }

    public FragmentMapper<RobustPath<SyntacticTree>> build() {
      final Map<FragmentIdentifier, RobustPath<SyntacticTree>> pure =
          pureIdentifierMapBuilder.build();
      final Map<FragmentIdentifier, RobustPath<SyntacticTree>> derived =
          derivedIdentifierMapBuilder.build();
      return new FragmentMapper<RobustPath<SyntacticTree>>() {
        @Override
        public Map<FragmentIdentifier, RobustPath<SyntacticTree>> getPureIdentifierMap() {
          return pure;
        }

        @Override
        public Map<FragmentIdentifier, RobustPath<SyntacticTree>> getDerivedIdentifierMap() {
          return derived;
        }
      };
    }
  }

  private static void verifyEnrich(
      final SyntacticTree expectedTree,
      final SyntacticTree originalTree,
      final FragmentMapper<RobustPath<SyntacticTree>> fragmentMapper) {
    LOGGER.info("Flat tree: ", TreeFixture.asString(originalTree));
    LOGGER.info("Expected tree: ", TreeFixture.asString(expectedTree));
    final Treepath<SyntacticTree> expectedTreepath = Treepath.create(expectedTree);
    final Treepath<SyntacticTree> originalTreepath = Treepath.create(originalTree);

    final Treepath<SyntacticTree> rehierarchized =
        DesignatorInterpreterAccessor.enrich(
            DesignatorTools.TRAVERSAL.first(originalTreepath),
            //        originalTreepath,
            fragmentMapper);

    TreeFixture.assertEqualsNoSeparators(
        expectedTreepath.getTreeAtEnd(), rehierarchized.getTreeAtEnd());
  }

  private abstract static class DesignatorInterpreterAccessor extends DesignatorInterpreter {

    /** Just make the compiler happy. */
    private DesignatorInterpreterAccessor() {
      super(null);
    }

    public static Treepath<SyntacticTree> enrich(
        final Treepath<SyntacticTree> treepath,
        final FragmentMapper<RobustPath<SyntacticTree>> mapper) {
      return DesignatorInterpreter.enrich(treepath, mapper);
    }
  }
}