Java RepInfo Exemples

Langage de programmation: Java

Espace de nommage/Pack: edu.harvard.hul.ois.jhove.module.html

Class/Type: RepInfo

Exemples au hotexamples.com: 2

Java RepInfo - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de edu.harvard.hul.ois.jhove.module.html.RepInfo extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

setFormat(2)

setMimeType(2)

setModule(2)

setWellFormed(2)

getChecksum(1)

getWellFormed(1)

setChecksum(1)

setMessage(1)

setProfile(1)

setProperty(1)

setSigMatch(1)

setSize(1)

setValid(1)

setVersion(1)

Méthodes fréquemment utilisées

setFormat (2)

setMimeType (2)

setModule (2)

setWellFormed (2)

getChecksum (1)

getWellFormed (1)

setChecksum (1)

setMessage (1)

setProfile (1)

setProperty (1)

Méthodes fréquemment utilisées

setSigMatch (1)

setSize (1)

setValid (1)

setVersion (1)

Associées

PointerUtils

UserRoot

DatabaseTemplateService

Person

ScreenServer

OrderStateStringValues.INTERRUPTED

HashMap

Stack

CardsImpl

IndexerDocument

Related in langs

generate_port_link (PHP)

Partecipante (PHP)

LoginPage (C#)

ExtensionData (C#)

cpu_to_le64 (C++)

EmitSound (C++)

DialogUi (Go)

NewStores (Go)

Distribute (Python)

generate_password (Python)

Exemple #1

0

Afficher le fichier

Fichier : HtmlModule.java Projet : Det-Kongelige-Bibliotek/jhove

/** * Check if the digital object conforms to this Module's internal signature information. * * <p>HTML is one of the most ill-defined of any open formats, so checking a "signature" really * means using some heuristics. The only required tag is TITLE, but that could occur well into the * file. So we look for any of three strings -- taking into account case-independence and white * space -- within the first sigBytes bytes, and call that a signature check. * * @param file A File object for the object being parsed * @param stream An InputStream, positioned at its beginning, which is generated from the object * to be parsed * @param info A fresh RepInfo object which will be modified to reflect the results of the test */ public void checkSignatures(File file, InputStream stream, RepInfo info) throws IOException { info.setFormat(_format[0]); info.setMimeType(_mimeType[0]); info.setModule(this); char[][] sigtext = new char[3][]; sigtext[0] = "<!DOCTYPE HTML".toCharArray(); sigtext[1] = "<HTML".toCharArray(); sigtext[2] = "<TITLE".toCharArray(); int[] sigstate = {0, 0, 0}; JhoveBase jb = getBase(); int sigBytes = jb.getSigBytes(); int bytesRead = 0; boolean eof = false; DataInputStream dstream = new DataInputStream(stream); while (!eof && bytesRead < sigBytes) { try { int ch = readUnsignedByte(dstream, this); char chr = Character.toUpperCase((char) ch); ++bytesRead; if (Character.isWhitespace(chr)) { continue; // ignore all whitespace } for (int i = 0; i < 3; i++) { int ss = sigstate[i]; char[] st = sigtext[i]; if (chr == st[ss]) { ++sigstate[i]; if (sigstate[i] == st.length) { // One of the sig texts matches! info.setSigMatch(_name); return; } } else sigstate[i] = 0; } } catch (EOFException e) { eof = true; } } // If we fall through, there was no sig match info.setWellFormed(false); return; }

Exemple #2

0

Afficher le fichier

Fichier : HtmlModule.java Projet : Det-Kongelige-Bibliotek/jhove

/** * Parse the content of a purported HTML stream digital object and store the results in RepInfo. * * @param stream An InputStream, positioned at its beginning, which is generated from the object * to be parsed. If multiple calls to <code>parse</code> are made on the basis of a nonzero * value being returned, a new InputStream must be provided each time. * @param info A fresh (on the first call) RepInfo object which will be modified to reflect the * results of the parsing If multiple calls to <code>parse</code> are made on the basis of a * nonzero value being returned, the same RepInfo object should be passed with each call. * @param parseIndex Must be 0 in first call to <code>parse</code>. If <code>parse</code> returns * a nonzero value, it must be called again with <code>parseIndex</code> equal to that return * value. */ public int parse(InputStream stream, RepInfo info, int parseIndex) throws IOException { if (parseIndex != 0) { // Coming in with parseIndex = 1 indicates that we've determined // this is XHTML; so we invoke the XML module to parse it. // If parseIndex is 100, this is the first invocation of the // XML module, so we call it with 0; otherwise we call it with // the value of parseIndex. if (isXmlAvailable()) { edu.harvard.hul.ois.jhove.module.XmlModule xmlMod = new edu.harvard.hul.ois.jhove.module.XmlModule(); if (parseIndex == 100) { parseIndex = 0; } xmlMod.setApp(_app); xmlMod.setBase(_je); xmlMod.setDefaultParams(_defaultParams); try { xmlMod.applyDefaultParams(); } catch (Exception e) { // really shouldn't happen } xmlMod.setXhtmlDoctype(_doctype); return xmlMod.parse(stream, info, parseIndex); } else { // The XML module shouldn't be missing from any installation, // but someone who really wanted to could remove it. In // that case, you deserve what you get. info.setMessage(new ErrorMessage("XML-HUL module required to validate XHTML documents")); info.setWellFormed(false); // Treat it as completely wrong return 0; } } else { /* parseIndex = 0, first call only */ _doctype = null; } // Test if textMD is to be generated if (_defaultParams != null) { Iterator iter = _defaultParams.iterator(); while (iter.hasNext()) { String param = (String) iter.next(); if (param.toLowerCase().equals("withtextmd=true")) { _withTextMD = true; } } } initParse(); info.setFormat(_format[0]); info.setMimeType(_mimeType[0]); info.setModule(this); if (_textMD == null || parseIndex == 0) { _textMD = new TextMDMetadata(); } /* We may have already done the checksums while converting a temporary file. */ Checksummer ckSummer = null; if (_je != null && _je.getChecksumFlag() && info.getChecksum().size() == 0) { ckSummer = new Checksummer(); _cstream = new ChecksumInputStream(stream, ckSummer); _dstream = getBufferedDataStream(_cstream, _je != null ? _je.getBufferSize() : 0); } else { _dstream = getBufferedDataStream(stream, _je != null ? _je.getBufferSize() : 0); } ParseHtml parser = null; HtmlMetadata metadata = null; HtmlCharStream cstream = null; try { cstream = new HtmlCharStream(_dstream, "ISO-8859-1"); parser = new ParseHtml(cstream); } catch (UnsupportedEncodingException e) { info.setMessage(new ErrorMessage("Internal error: " + e.getMessage())); info.setWellFormed(false); return 0; // shouldn't happen! } int type = 0; try { List elements = parser.HtmlDoc(); if (elements.isEmpty()) { // Consider an empty document bad info.setWellFormed(false); info.setMessage(new ErrorMessage("Document is empty")); return 0; } type = checkDoctype(elements); if (type < 0) { info.setWellFormed(false); info.setMessage(new ErrorMessage("DOCTYPE is not HTML")); return 0; } /* Check if there is at least one html, head, body or title tag. * A plain text document * might be interpreted as a single PCDATA, which is in some * ethereal sense well-formed HTML, but it's pointless to consider * it such. It might also use angle brackets as a text delimiter, * and that shouldn't count as HTML either. */ boolean hasElements = false; Iterator iter = elements.iterator(); while (iter.hasNext()) { Object o = iter.next(); if (o instanceof JHOpenTag) { String name = ((JHOpenTag) o).getName(); if ("html".equals(name) || "head".equals(name) || "body".equals(name) || "title".equals(name)) { hasElements = true; } break; } } if (!hasElements) { info.setMessage(new ErrorMessage("Document contains no html, head, body or title tags")); info.setWellFormed(false); return 0; } // CRLF from HtmlCharStream ... String lineEnd = cstream.getKindOfLineEnd(); if (lineEnd == null) { info.setMessage(new InfoMessage("Not able to determine type of end of line")); _textMD.setLinebreak(TextMDMetadata.NILL); } else if (lineEnd.equalsIgnoreCase("CR")) { _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CR); } else if (lineEnd.equalsIgnoreCase("LF")) { _textMD.setLinebreak(TextMDMetadata.LINEBREAK_LF); } else if (lineEnd.equalsIgnoreCase("CRLF")) { _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CRLF); } if (type == 0) { /* If we can't find a doctype, it still might be XHTML * if the elements start with an XML declaration and * the root element is "html" */ switch (seemsToBeXHTML(elements)) { case 0: // Not XML break; // fall through case 1: // XML but not HTML info.setMessage( new ErrorMessage( "Document has XML declaration but no DOCTYPE; " + "probably XML rather than HTML")); info.setWellFormed(false); return 0; case 2: // probably XHTML return 100; } info.setMessage( new ErrorMessage( "Unrecognized or missing DOCTYPE declaration; " + "validation continuing as HTML 3.2")); info.setValid(false); // But keep going } HtmlDocDesc docDesc = null; switch (type) { case HTML_3_2: default: docDesc = new Html3_2DocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("3.2"); break; case HTML_4_0_FRAMESET: docDesc = new Html4_0FrameDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.0"); break; case HTML_4_0_TRANSITIONAL: docDesc = new Html4_0TransDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.0"); break; case HTML_4_0_STRICT: docDesc = new Html4_0StrictDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.0"); break; case HTML_4_01_FRAMESET: docDesc = new Html4_01FrameDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.01"); break; case HTML_4_01_TRANSITIONAL: docDesc = new Html4_01TransDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.01"); break; case HTML_4_01_STRICT: docDesc = new Html4_01StrictDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.01"); break; case XHTML_1_0_STRICT: case XHTML_1_0_TRANSITIONAL: case XHTML_1_0_FRAMESET: case XHTML_1_1: // Force a second call to parse as XML. 100 is a // magic code for the first XML call. return 100; } _textMD.setMarkup_language(_doctype); if (docDesc == null) { info.setMessage( new InfoMessage( "Code for appropriate HTML version not available yet:" + "substituting HTML 3.2")); docDesc = new Html3_2DocDesc(); } docDesc.validate(elements, info); metadata = docDesc.getMetadata(); // Try to get the charset from the meta Content if (metadata.getCharset() != null) { _textMD.setCharset(metadata.getCharset()); } else { _textMD.setCharset(TextMDMetadata.CHARSET_ISO8859_1); } String textMDEncoding = _textMD.getCharset(); if (textMDEncoding.indexOf("UTF") != -1) { _textMD.setByte_order( _bigEndian ? TextMDMetadata.BYTE_ORDER_BIG : TextMDMetadata.BYTE_ORDER_LITTLE); _textMD.setByte_size("8"); _textMD.setCharacter_size("variable"); } else { _textMD.setByte_order( _bigEndian ? TextMDMetadata.BYTE_ORDER_BIG : TextMDMetadata.BYTE_ORDER_LITTLE); _textMD.setByte_size("8"); _textMD.setCharacter_size("1"); } } catch (ParseException e) { Token t = e.currentToken; info.setMessage( new ErrorMessage("Parse error", "Line = " + t.beginLine + ", column = " + t.beginColumn)); info.setWellFormed(false); } catch (TokenMgrError f) { info.setMessage(new ErrorMessage("TokenMgrError: " + f.getLocalizedMessage())); info.setWellFormed(false); } if (info.getWellFormed() == RepInfo.FALSE) { return 0; } if (type != 0) { if (profileNames[type] != null) { info.setProfile(profileNames[type]); } info.setVersion(versionNames[type]); } if (metadata != null) { Property property = metadata.toProperty(_withTextMD ? _textMD : null); if (property != null) { info.setProperty(property); } } if (ckSummer != null) { info.setSize(_cstream.getNBytes()); info.setChecksum(new Checksum(ckSummer.getCRC32(), ChecksumType.CRC32)); String value = ckSummer.getMD5(); if (value != null) { info.setChecksum(new Checksum(value, ChecksumType.MD5)); } if ((value = ckSummer.getSHA1()) != null) { info.setChecksum(new Checksum(value, ChecksumType.SHA1)); } } return 0; }