public static Expr parse(Query query, String s, boolean checkAllUsed) { try { Reader in = new StringReader(s); ARQParser parser = new ARQParser(in); parser.setQuery(query); Expr expr = parser.Expression(); if (checkAllUsed) { Token t = parser.getNextToken(); if (t.kind != ARQParserTokenManager.EOF) throw new QueryParseException( "Extra tokens beginning \"" + t.image + "\" starting line " + t.beginLine + ", column " + t.beginColumn, t.beginLine, t.beginColumn); } return expr; } catch (ParseException ex) { throw new QueryParseException( ex.getMessage(), ex.currentToken.beginLine, ex.currentToken.beginLine); } catch (TokenMgrError tErr) { throw new QueryParseException(tErr.getMessage(), -1, -1); } catch (Error err) { // The token stream can throw java.lang.Error's String tmp = err.getMessage(); if (tmp == null) throw new QueryParseException(err, -1, -1); throw new QueryParseException(tmp, -1, -1); } }
public ExParValue getExParValue() { ExParValue v = null; try { // ExDesignTreeParser parser = new ExDesignTreeParser(new // StringReader(text.getText()), Base.getEncoding()); ExDesignTreeParser parser = new ExDesignTreeParser(new StringReader(text.getText())); v = parser.assignableParameterValue(); } catch (ParseException pex) { new de.pxlab.pxl.NonFatalError(pex.getMessage()); } catch (TokenMgrError tex) { new de.pxlab.pxl.NonFatalError(tex.getMessage()); } return v; }
/** * Parse the content of a purported HTML stream digital object and store the results in RepInfo. * * @param stream An InputStream, positioned at its beginning, which is generated from the object * to be parsed. If multiple calls to <code>parse</code> are made on the basis of a nonzero * value being returned, a new InputStream must be provided each time. * @param info A fresh (on the first call) RepInfo object which will be modified to reflect the * results of the parsing If multiple calls to <code>parse</code> are made on the basis of a * nonzero value being returned, the same RepInfo object should be passed with each call. * @param parseIndex Must be 0 in first call to <code>parse</code>. If <code>parse</code> returns * a nonzero value, it must be called again with <code>parseIndex</code> equal to that return * value. */ public int parse(InputStream stream, RepInfo info, int parseIndex) throws IOException { if (parseIndex != 0) { // Coming in with parseIndex = 1 indicates that we've determined // this is XHTML; so we invoke the XML module to parse it. // If parseIndex is 100, this is the first invocation of the // XML module, so we call it with 0; otherwise we call it with // the value of parseIndex. if (isXmlAvailable()) { edu.harvard.hul.ois.jhove.module.XmlModule xmlMod = new edu.harvard.hul.ois.jhove.module.XmlModule(); if (parseIndex == 100) { parseIndex = 0; } xmlMod.setApp(_app); xmlMod.setBase(_je); xmlMod.setDefaultParams(_defaultParams); try { xmlMod.applyDefaultParams(); } catch (Exception e) { // really shouldn't happen } xmlMod.setXhtmlDoctype(_doctype); return xmlMod.parse(stream, info, parseIndex); } else { // The XML module shouldn't be missing from any installation, // but someone who really wanted to could remove it. In // that case, you deserve what you get. info.setMessage(new ErrorMessage("XML-HUL module required to validate XHTML documents")); info.setWellFormed(false); // Treat it as completely wrong return 0; } } else { /* parseIndex = 0, first call only */ _doctype = null; } // Test if textMD is to be generated if (_defaultParams != null) { Iterator iter = _defaultParams.iterator(); while (iter.hasNext()) { String param = (String) iter.next(); if (param.toLowerCase().equals("withtextmd=true")) { _withTextMD = true; } } } initParse(); info.setFormat(_format[0]); info.setMimeType(_mimeType[0]); info.setModule(this); if (_textMD == null || parseIndex == 0) { _textMD = new TextMDMetadata(); } /* We may have already done the checksums while converting a temporary file. */ Checksummer ckSummer = null; if (_je != null && _je.getChecksumFlag() && info.getChecksum().size() == 0) { ckSummer = new Checksummer(); _cstream = new ChecksumInputStream(stream, ckSummer); _dstream = getBufferedDataStream(_cstream, _je != null ? _je.getBufferSize() : 0); } else { _dstream = getBufferedDataStream(stream, _je != null ? _je.getBufferSize() : 0); } ParseHtml parser = null; HtmlMetadata metadata = null; HtmlCharStream cstream = null; try { cstream = new HtmlCharStream(_dstream, "ISO-8859-1"); parser = new ParseHtml(cstream); } catch (UnsupportedEncodingException e) { info.setMessage(new ErrorMessage("Internal error: " + e.getMessage())); info.setWellFormed(false); return 0; // shouldn't happen! } int type = 0; try { List elements = parser.HtmlDoc(); if (elements.isEmpty()) { // Consider an empty document bad info.setWellFormed(false); info.setMessage(new ErrorMessage("Document is empty")); return 0; } type = checkDoctype(elements); if (type < 0) { info.setWellFormed(false); info.setMessage(new ErrorMessage("DOCTYPE is not HTML")); return 0; } /* Check if there is at least one html, head, body or title tag. * A plain text document * might be interpreted as a single PCDATA, which is in some * ethereal sense well-formed HTML, but it's pointless to consider * it such. It might also use angle brackets as a text delimiter, * and that shouldn't count as HTML either. */ boolean hasElements = false; Iterator iter = elements.iterator(); while (iter.hasNext()) { Object o = iter.next(); if (o instanceof JHOpenTag) { String name = ((JHOpenTag) o).getName(); if ("html".equals(name) || "head".equals(name) || "body".equals(name) || "title".equals(name)) { hasElements = true; } break; } } if (!hasElements) { info.setMessage(new ErrorMessage("Document contains no html, head, body or title tags")); info.setWellFormed(false); return 0; } // CRLF from HtmlCharStream ... String lineEnd = cstream.getKindOfLineEnd(); if (lineEnd == null) { info.setMessage(new InfoMessage("Not able to determine type of end of line")); _textMD.setLinebreak(TextMDMetadata.NILL); } else if (lineEnd.equalsIgnoreCase("CR")) { _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CR); } else if (lineEnd.equalsIgnoreCase("LF")) { _textMD.setLinebreak(TextMDMetadata.LINEBREAK_LF); } else if (lineEnd.equalsIgnoreCase("CRLF")) { _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CRLF); } if (type == 0) { /* If we can't find a doctype, it still might be XHTML * if the elements start with an XML declaration and * the root element is "html" */ switch (seemsToBeXHTML(elements)) { case 0: // Not XML break; // fall through case 1: // XML but not HTML info.setMessage( new ErrorMessage( "Document has XML declaration but no DOCTYPE; " + "probably XML rather than HTML")); info.setWellFormed(false); return 0; case 2: // probably XHTML return 100; } info.setMessage( new ErrorMessage( "Unrecognized or missing DOCTYPE declaration; " + "validation continuing as HTML 3.2")); info.setValid(false); // But keep going } HtmlDocDesc docDesc = null; switch (type) { case HTML_3_2: default: docDesc = new Html3_2DocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("3.2"); break; case HTML_4_0_FRAMESET: docDesc = new Html4_0FrameDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.0"); break; case HTML_4_0_TRANSITIONAL: docDesc = new Html4_0TransDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.0"); break; case HTML_4_0_STRICT: docDesc = new Html4_0StrictDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.0"); break; case HTML_4_01_FRAMESET: docDesc = new Html4_01FrameDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.01"); break; case HTML_4_01_TRANSITIONAL: docDesc = new Html4_01TransDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.01"); break; case HTML_4_01_STRICT: docDesc = new Html4_01StrictDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.01"); break; case XHTML_1_0_STRICT: case XHTML_1_0_TRANSITIONAL: case XHTML_1_0_FRAMESET: case XHTML_1_1: // Force a second call to parse as XML. 100 is a // magic code for the first XML call. return 100; } _textMD.setMarkup_language(_doctype); if (docDesc == null) { info.setMessage( new InfoMessage( "Code for appropriate HTML version not available yet:" + "substituting HTML 3.2")); docDesc = new Html3_2DocDesc(); } docDesc.validate(elements, info); metadata = docDesc.getMetadata(); // Try to get the charset from the meta Content if (metadata.getCharset() != null) { _textMD.setCharset(metadata.getCharset()); } else { _textMD.setCharset(TextMDMetadata.CHARSET_ISO8859_1); } String textMDEncoding = _textMD.getCharset(); if (textMDEncoding.indexOf("UTF") != -1) { _textMD.setByte_order( _bigEndian ? TextMDMetadata.BYTE_ORDER_BIG : TextMDMetadata.BYTE_ORDER_LITTLE); _textMD.setByte_size("8"); _textMD.setCharacter_size("variable"); } else { _textMD.setByte_order( _bigEndian ? TextMDMetadata.BYTE_ORDER_BIG : TextMDMetadata.BYTE_ORDER_LITTLE); _textMD.setByte_size("8"); _textMD.setCharacter_size("1"); } } catch (ParseException e) { Token t = e.currentToken; info.setMessage( new ErrorMessage("Parse error", "Line = " + t.beginLine + ", column = " + t.beginColumn)); info.setWellFormed(false); } catch (TokenMgrError f) { info.setMessage(new ErrorMessage("TokenMgrError: " + f.getLocalizedMessage())); info.setWellFormed(false); } if (info.getWellFormed() == RepInfo.FALSE) { return 0; } if (type != 0) { if (profileNames[type] != null) { info.setProfile(profileNames[type]); } info.setVersion(versionNames[type]); } if (metadata != null) { Property property = metadata.toProperty(_withTextMD ? _textMD : null); if (property != null) { info.setProperty(property); } } if (ckSummer != null) { info.setSize(_cstream.getNBytes()); info.setChecksum(new Checksum(ckSummer.getCRC32(), ChecksumType.CRC32)); String value = ckSummer.getMD5(); if (value != null) { info.setChecksum(new Checksum(value, ChecksumType.MD5)); } if ((value = ckSummer.getSHA1()) != null) { info.setChecksum(new Checksum(value, ChecksumType.SHA1)); } } return 0; }
/** * @param reader Reads the template source code * @param cfg The FreeMarker configuration settings; some of them influences parsing, also the * resulting {@link UnboundTemplate} will be bound to this. * @param assumedEncoding This is the name of the charset that we are supposed to be using. This * is only needed to check if the encoding specified in the {@code #ftl} header (if any) * matches this. If this is non-{@code null} and they don't match, a {@link * WrongEncodingException} will be thrown by the parser. * @param sourceName Shown in error messages as the template "file" location. */ UnboundTemplate(Reader reader, String sourceName, Configuration cfg, String assumedEncoding) throws IOException { NullArgumentException.check(cfg); this.cfg = cfg; this.sourceName = sourceName; this.templateLanguageVersion = normalizeTemplateLanguageVersion(cfg.getIncompatibleImprovements()); LineTableBuilder ltbReader; try { if (!(reader instanceof BufferedReader)) { reader = new BufferedReader(reader, 0x1000); } ltbReader = new LineTableBuilder(reader); reader = ltbReader; try { FMParser parser = new FMParser( this, reader, assumedEncoding, cfg.getStrictSyntaxMode(), cfg.getWhitespaceStripping(), cfg.getTagSyntax(), cfg.getNamingConvention(), cfg.getIncompatibleImprovements().intValue()); TemplateElement rootElement; try { rootElement = parser.Root(); } catch (IndexOutOfBoundsException exc) { // There's a JavaCC bug where the Reader throws a RuntimeExcepton and then JavaCC fails // with // IndexOutOfBoundsException. If that wasn't the case, we just rethrow. Otherwise we // suppress the // IndexOutOfBoundsException and let the real cause to be thrown later. if (!ltbReader.hasFailure()) { throw exc; } rootElement = null; } this.rootElement = rootElement; this.actualTagSyntax = parser._getLastTagSyntax(); this.actualNamingConvention = parser._getLastNamingConvention(); this.templateSpecifiedEncoding = parser._getTemplateSpecifiedEncoding(); } catch (TokenMgrError exc) { // TokenMgrError VS ParseException is not an interesting difference for the user, so we just // convert it // to ParseException throw exc.toParseException(this); } } catch (ParseException e) { e.setTemplateName(getSourceName()); throw e; } finally { reader.close(); } // Throws any exception that JavaCC has silently treated as EOF: ltbReader.throwFailure(); if (prefixToNamespaceURIMapping != null) { prefixToNamespaceURIMapping = Collections.unmodifiableMap(prefixToNamespaceURIMapping); namespaceURIToPrefixMapping = Collections.unmodifiableMap(namespaceURIToPrefixMapping); } }