/** * Reads an array of strings from the TIFF file. * * @param count Number of strings to read * @param value Offset from which to read */ protected String[] readASCIIArray(long count, long value) throws IOException { _raf.seek(value); int nstrs = 0; List list = new LinkedList(); byte[] buf = new byte[(int) count]; _raf.read(buf); StringBuffer strbuf = new StringBuffer(); for (int i = 0; i < count; i++) { int b = buf[i]; if (b == 0) { list.add(strbuf.toString()); strbuf.setLength(0); } else { strbuf.append((char) b); } } /* We can't use ArrayList.toArray because that returns an Object[], not a String[] ... sigh. */ String[] strs = new String[nstrs]; ListIterator iter = list.listIterator(); for (int i = 0; i < nstrs; i++) { strs[i] = (String) iter.next(); } return strs; }
/** * Reads a chunk and puts an Instrument property into the RepInfo object. * * @return <code>false</code> if the chunk is structurally invalid, otherwise <code>true</code> */ public boolean readChunk(RepInfo info) throws IOException { AiffModule module = (AiffModule) _module; int baseNote = ModuleBase.readUnsignedByte(_dstream, module); int detune = ModuleBase.readSignedByte(_dstream, module); int lowNote = ModuleBase.readUnsignedByte(_dstream, module); int highNote = ModuleBase.readUnsignedByte(_dstream, module); int lowVelocity = ModuleBase.readUnsignedByte(_dstream, module); int highVelocity = ModuleBase.readUnsignedByte(_dstream, module); int gain = module.readSignedShort(_dstream); Loop sustainLoop = readLoop(module); Loop releaseLoop = readLoop(module); List propList = new ArrayList(9); propList.add(new Property("BaseNote", PropertyType.INTEGER, new Integer(baseNote))); propList.add(new Property("Detune", PropertyType.INTEGER, new Integer(detune))); propList.add(new Property("LowNote", PropertyType.INTEGER, new Integer(lowNote))); propList.add(new Property("HighNote", PropertyType.INTEGER, new Integer(highNote))); propList.add(new Property("LowVelocity", PropertyType.INTEGER, new Integer(lowVelocity))); propList.add(new Property("HighVelocity", PropertyType.INTEGER, new Integer(highVelocity))); propList.add(new Property("Gain", PropertyType.INTEGER, new Integer(gain))); propList.add(sustainLoop.loopProp("SustainLoop")); propList.add(releaseLoop.loopProp("ReleaseLoop")); module.addAiffProperty( new Property("Instrument", PropertyType.PROPERTY, PropertyArity.LIST, propList)); return true; }
/* See if this document, even if it lacks a doctype, is most likely * XHTML. The test is that the document starts with an XML declaration * and has "html" for its first tag. * * Returns: * 0 if there's no XML declaration * 1 if there's an XML declaration but no html tag; in this * case it's probably some other kind of XML * 2 if there's an XML declaration and an html tag * */ protected int seemsToBeXHTML(List elements) { JHElement elem; try { elem = (JHElement) elements.get(0); if (!(elem instanceof JHXmlDecl)) { return 0; } Iterator iter = elements.iterator(); while (iter.hasNext()) { elem = (JHElement) iter.next(); if (elem instanceof JHOpenTag) { JHOpenTag tag = (JHOpenTag) elem; return ("html".equals(tag.getName()) ? 2 : 1); } } } catch (Exception e) { return 0; // document must be really empty } return 1; }
/** * Returns a Property representing a bitmask. If <code>rawOutput</code> is true, returns a LIST * property whose elements are STRING properties. The string values of these STRING properties are * the elements of <code>labels</code> whose indices correspond to 1 bits in the bitmask, counting * the low-order bit as bit 0. if <code>rawOutput</code> is false, returns a LONG property whose * numeric value is <code>value</code>. */ protected Property addBitmaskProperty( String name, long value, String[] labels, boolean rawOutput) { Property prop = null; if (!rawOutput) { List list = new LinkedList(); try { for (int i = 0; i < labels.length; i++) { if ((value & (1 << i)) != 0) { list.add(labels[i]); } } } catch (Exception e) { _errors.add(name + " value out of range: " + value); } prop = new Property(name, PropertyType.STRING, PropertyArity.LIST, list); } if (prop == null) { prop = new Property(name, PropertyType.LONG, new Long(value)); } return prop; }
/* Check if there is a DOCTYPE at the start of the elements * list. If there is, return the appropriate version string. * If the DOCTYPE says it isn't HTML, trust it and call this * document ill-formed by returning -1. * If there is no DOCTYPE, or an unrecognized one, return 0. */ protected int checkDoctype(List elements) { JHElement firstElem = (JHElement) elements.get(0); if (firstElem instanceof JHXmlDecl && elements.size() >= 2) { firstElem = (JHElement) elements.get(1); } if (!(firstElem instanceof JHDoctype)) { return 0; // no DOCTYPE found } List dt = ((JHDoctype) firstElem).getDoctypeElements(); if (dt.size() < 3) { return 0; } try { // Is DOCTYPE case sensitive? Assume not. String str = ((String) dt.get(0)).toUpperCase(); if (!"HTML".equals(str)) { // It's not HTML return -1; } str = ((String) dt.get(1)).toUpperCase(); if (!"PUBLIC".equals(str)) { return 0; } str = stripQuotes(((String) dt.get(2)).toUpperCase()); _doctype = str; if ("-//W3C//DTD HTML 3.2 FINAL//EN".equals(str) || "-//W3C//DTD HTML 3.2//EN".equals(str)) { return HTML_3_2; } else if ("-//W3C//DTD HTML 4.0//EN".equals(str)) { return HTML_4_0_STRICT; } else if ("-//W3C//DTD HTML 4.0 TRANSITIONAL//EN".equals(str)) { return HTML_4_0_TRANSITIONAL; } else if ("-//W3C//DTD HTML 4.0 FRAMESET//EN".equals(str)) { return HTML_4_0_FRAMESET; } else if ("-//W3C//DTD HTML 4.01//EN".equals(str)) { return HTML_4_01_STRICT; } else if ("-//W3C//DTD HTML 4.01 TRANSITIONAL//EN".equals(str)) { return HTML_4_01_TRANSITIONAL; } else if ("-//W3C//DTD HTML 4.01 FRAMESET//EN".equals(str)) { return HTML_4_01_FRAMESET; } } catch (Exception e) { // Really shouldn't happen, but if it does we've got // a bad doctype return 0; } return 0; }
/** * General function for adding a property with a 32-bit value, with two arrays of Strings to * interpret 0 and 1 values as a bitmask. * * @param val The bitmask * @param name The name for the Property * @param oneValueNames Array of names to use for '1' values * @param zeroValueNames Array of names to use for '0' values */ public Property buildBitmaskProperty( int val, String name, String[] oneValueNames, String[] zeroValueNames) { if (_je != null && _je.getShowRawFlag()) { return new Property(name, PropertyType.INTEGER, new Integer(val)); } else { List slist = new LinkedList(); try { for (int i = 0; i < oneValueNames.length; i++) { String s = null; if ((val & (1 << i)) != 0) { s = oneValueNames[i]; } else { s = zeroValueNames[i]; } if (s != null && s.length() > 0) { slist.add(s); } } } catch (Exception e) { return null; } return new Property(name, PropertyType.STRING, PropertyArity.LIST, slist); } }
/** * Returns an Property representing an integer value. If <code>rawOutput</code> is true, returns * an INTEGER property, and <code>labels</code> and <code>index</code> are unused. Otherwise, * returns a STRING property, with the string being the element of <code>labels</code> whose index * is <code>value</code>. */ protected Property addIntegerProperty( String name, int value, String[] labels, boolean rawOutput) { Property prop = null; if (!rawOutput) { try { prop = new Property(name, PropertyType.STRING, labels[value]); } catch (Exception e) { _errors.add(name + " value out of range: " + value); } } if (prop == null) { prop = new Property(name, PropertyType.INTEGER, new Integer(value)); } return prop; }
/** * Returns an ARRAY Property representing an integer array. If <code>rawOutput</code> is true, the * elements of the property array are INTEGER properties, and <code>labels</code> is unused. * Otherwise, the elements of the array are STRING properties, with the elements of <code>value * </code> used as indices into <code>labels</code>. */ protected Property addIntegerArrayProperty( String name, int[] value, String[] labels, boolean rawOutput) { Property prop = null; if (!rawOutput) { String[] s = new String[value.length]; for (int i = 0; i < value.length; i++) { try { s[i] = labels[value[i]]; } catch (Exception e) { _errors.add(name + " value out of range: " + value[i]); } } prop = new Property(name, PropertyType.STRING, PropertyArity.ARRAY, s); } if (prop == null) { prop = new Property(name, PropertyType.INTEGER, PropertyArity.ARRAY, value); } return prop; }
/** Initializes the state of the module for parsing. */ protected void initParse() { super.initParse(); _propList = new LinkedList(); _notes = new LinkedList(); _labels = new LinkedList(); _labeledText = new LinkedList(); _samples = new LinkedList(); firstSampleOffsetMarked = false; numSamples = 0; _metadata = new Property("WAVEMetadata", PropertyType.PROPERTY, PropertyArity.LIST, _propList); _aesMetadata = new AESAudioMetadata(); _aesMetadata.setByteOrder(AESAudioMetadata.LITTLE_ENDIAN); _aesMetadata.setAnalogDigitalFlag("FILE_DIGITAL"); _aesMetadata.setFormat("WAVE"); _aesMetadata.setUse("OTHER", "JHOVE_validation"); _aesMetadata.setDirection("NONE"); _propList.add(new Property("AESAudioMetadata", PropertyType.AESAUDIOMETADATA, _aesMetadata)); // Most chunk types are allowed to occur only once, // and a few must occur exactly once. // Clear flags for whether they have been seen. formatChunkSeen = false; dataChunkSeen = false; instrumentChunkSeen = false; cartChunkSeen = false; mpegChunkSeen = false; broadcastExtChunkSeen = false; peakChunkSeen = false; linkChunkSeen = false; cueChunkSeen = false; // Initialize profile flags flagPCMWaveFormat = false; flagWaveFormatEx = false; flagWaveFormatExtensible = false; flagBroadcastWave = false; }
/** * Returns an Property representing an integer value. If <code>rawOutput</code> is true, returns * an INTEGER property, and <code>labels</code> and <code>index</code> are unused. Otherwise, * returns a STRING property, with the string being the element of <code>labels</code> whose index * is the index of <code>value</code> in <code>index</code>. */ protected Property addIntegerProperty( String name, int value, String[] labels, int[] index, boolean rawOutput) { Property prop = null; if (!rawOutput) { int n = -1; for (int i = 0; i < index.length; i++) { if (value == index[i]) { n = i; break; } } if (n > -1) { prop = new Property(name, PropertyType.STRING, labels[n]); } else { _errors.add(name + " value out of range: " + value); } } if (prop == null) { prop = new Property(name, PropertyType.INTEGER, new Integer(value)); } return prop; }
/** * Parses the content of a purported WAVE digital object and stores the results in RepInfo. * * @param stream An InputStream, positioned at its beginning, which is generated from the object * to be parsed * @param info A fresh RepInfo object which will be modified to reflect the results of the parsing * @param parseIndex Must be 0 in first call to <code>parse</code>. If <code>parse</code> returns * a nonzero value, it must be called again with <code>parseIndex</code> equal to that return * value. */ public int parse(InputStream stream, RepInfo info, int parseIndex) throws IOException { initParse(); info.setFormat(_format[0]); info.setMimeType(_mimeType[0]); info.setModule(this); _aesMetadata.setPrimaryIdentifier(info.getUri()); if (info.getURLFlag()) { _aesMetadata.setOtherPrimaryIdentifierType("URI"); } else { _aesMetadata.setPrimaryIdentifierType(AESAudioMetadata.FILE_NAME); } /* We may have already done the checksums while converting a temporary file. */ _ckSummer = null; if (_je != null && _je.getChecksumFlag() && info.getChecksum().size() == 0) { _ckSummer = new Checksummer(); _cstream = new ChecksumInputStream(stream, _ckSummer); _dstream = getBufferedDataStream(_cstream, _je != null ? _je.getBufferSize() : 0); } else { _dstream = getBufferedDataStream(stream, _je != null ? _je.getBufferSize() : 0); } try { // Check the start of the file for the right opening bytes for (int i = 0; i < 4; i++) { int ch = readUnsignedByte(_dstream, this); if (ch != sigByte[i]) { info.setMessage(new ErrorMessage("Document does not start with RIFF chunk", 0)); info.setWellFormed(false); return 0; } } /* If we got this far, take note that the signature is OK. */ info.setSigMatch(_name); // Get the length of the Form chunk. This includes all // the subsequent chunks in the file, but excludes the // header ("FORM" and the length itself). bytesRemaining = readUnsignedInt(_dstream); // Read the file type. String typ = read4Chars(_dstream); bytesRemaining -= 4; if (!"WAVE".equals(typ)) { info.setMessage(new ErrorMessage("File type in RIFF header is not WAVE", _nByte)); info.setWellFormed(false); return 0; } while (bytesRemaining > 0) { if (!readChunk(info)) { break; } } } catch (EOFException e) { info.setWellFormed(false); info.setMessage(new ErrorMessage("Unexpected end of file", _nByte)); return 0; } // Set duration from number of samples and rate. if (numSamples > 0) { // _aesMetadata.setDuration((double) numSamples / sampleRate); _aesMetadata.setDuration(numSamples); } // Add note and label properties, if there's anything // to report. if (!_labels.isEmpty()) { _propList.add(new Property("Labels", PropertyType.PROPERTY, PropertyArity.LIST, _labels)); } if (!_labeledText.isEmpty()) { _propList.add( new Property("LabeledText", PropertyType.PROPERTY, PropertyArity.LIST, _labeledText)); } if (!_notes.isEmpty()) { _propList.add(new Property("Notes", PropertyType.PROPERTY, PropertyArity.LIST, _notes)); } if (!_samples.isEmpty()) { _propList.add(new Property("Samples", PropertyType.PROPERTY, PropertyArity.LIST, _samples)); } if (_exifInfo != null) { _propList.add(_exifInfo.buildProperty()); } if (!formatChunkSeen) { info.setMessage(new ErrorMessage("No Format Chunk")); info.setWellFormed(false); return 0; } /* This file looks OK. */ if (_ckSummer != null) { /* We may not have actually hit the end of file. If we're calculating * checksums on the fly, we have to read and discard whatever is * left, so it will get checksummed. */ for (; ; ) { try { int n = skipBytes(_dstream, 2048, this); if (n == 0) { break; } } catch (Exception e) { break; } } info.setSize(_cstream.getNBytes()); info.setChecksum(new Checksum(_ckSummer.getCRC32(), ChecksumType.CRC32)); String value = _ckSummer.getMD5(); if (value != null) { info.setChecksum(new Checksum(value, ChecksumType.MD5)); } if ((value = _ckSummer.getSHA1()) != null) { info.setChecksum(new Checksum(value, ChecksumType.SHA1)); } } info.setProperty(_metadata); // Indicate satisfied profiles. if (flagPCMWaveFormat) { info.setProfile("PCMWAVEFORMAT"); } if (flagWaveFormatEx) { info.setProfile("WAVEFORMATEX"); } if (flagWaveFormatExtensible) { info.setProfile("WAVEFORMATEXTENSIBLE"); } if (flagBroadcastWave) { // Need to do some additional checks. if (!broadcastExtChunkSeen) { flagBroadcastWave = false; } if (compressionCode == FormatChunk.WAVE_FORMAT_MPEG) { if (!broadcastExtChunkSeen || !factChunkSeen) { flagBroadcastWave = false; } } if (flagBroadcastWave) { String prof = null; switch (broadcastVersion) { case 0: prof = "Broadcast Wave Version 0"; break; case 1: prof = "Broadcast Wave Version 1"; break; // Other versions are unknown at this time } if (prof != null) { info.setProfile(prof); } } } return 0; }
/** * Reads a chunk and puts a BroadcastAudioExtension Property into the RepInfo object. * * @return <code>false</code> if the chunk is structurally invalid, otherwise <code>true</code> */ public boolean readChunk(RepInfo info) throws IOException { WaveModule module = (WaveModule) _module; byte[] buf256 = new byte[256]; ModuleBase.readByteBuf(_dstream, buf256, module); String description = byteBufString(buf256); byte[] buf32 = new byte[32]; ModuleBase.readByteBuf(_dstream, buf32, module); String originator = byteBufString(buf32); ModuleBase.readByteBuf(_dstream, buf32, module); String originatorRef = byteBufString(buf32); byte[] buf10 = new byte[10]; ModuleBase.readByteBuf(_dstream, buf10, module); String originationDate = byteBufString(buf10); byte[] buf8 = new byte[8]; ModuleBase.readByteBuf(_dstream, buf8, module); String originationTime = byteBufString(buf8); // TimeReference is stored as a 64-bit little-endian // number -- I think long timeReference = module.readSignedLong(_dstream); int version = module.readUnsignedShort(_dstream); module.setBroadcastVersion(version); byte[] smtpe_umid = new byte[64]; ModuleBase.readByteBuf(_dstream, smtpe_umid, module); module.skipBytes(_dstream, 190, module); String codingHistory = ""; if (bytesLeft > 602) { byte[] bufCodingHistory = new byte[(int) bytesLeft - 602]; ModuleBase.readByteBuf(_dstream, bufCodingHistory, module); codingHistory = byteBufString(bufCodingHistory); } // Whew -- we've read the whole thing. Now make that into a // list of Properties. List plist = new ArrayList(20); if (description.length() > 0) { plist.add(new Property("Description", PropertyType.STRING, description)); } if (originator.length() > 0) { plist.add(new Property("Originator", PropertyType.STRING, originator)); } if (originationDate.length() > 0) { plist.add(new Property("OriginationDate", PropertyType.STRING, originationDate)); } if (originationTime.length() > 0) { plist.add(new Property("OriginationTime", PropertyType.STRING, originationTime)); } plist.add(new Property("TimeReference", PropertyType.LONG, new Long(timeReference))); plist.add(new Property("Version", PropertyType.INTEGER, new Integer(version))); plist.add(new Property("UMID", PropertyType.BYTE, PropertyArity.ARRAY, smtpe_umid)); if (codingHistory.length() > 0) { plist.add(new Property("CodingHistory", PropertyType.STRING, codingHistory)); } module.addWaveProperty( new Property("BroadcastAudioExtension", PropertyType.PROPERTY, PropertyArity.LIST, plist)); // set time reference in AES metadata set @author David Ackerman AESAudioMetadata aes = module.getAESMetadata(); aes.setStartTime(timeReference); return true; }
/** Adds a Label property */ public void addLabel(Property p) { _labels.add(p); }
/** * Parse the content of a purported HTML stream digital object and store the results in RepInfo. * * @param stream An InputStream, positioned at its beginning, which is generated from the object * to be parsed. If multiple calls to <code>parse</code> are made on the basis of a nonzero * value being returned, a new InputStream must be provided each time. * @param info A fresh (on the first call) RepInfo object which will be modified to reflect the * results of the parsing If multiple calls to <code>parse</code> are made on the basis of a * nonzero value being returned, the same RepInfo object should be passed with each call. * @param parseIndex Must be 0 in first call to <code>parse</code>. If <code>parse</code> returns * a nonzero value, it must be called again with <code>parseIndex</code> equal to that return * value. */ public int parse(InputStream stream, RepInfo info, int parseIndex) throws IOException { if (parseIndex != 0) { // Coming in with parseIndex = 1 indicates that we've determined // this is XHTML; so we invoke the XML module to parse it. // If parseIndex is 100, this is the first invocation of the // XML module, so we call it with 0; otherwise we call it with // the value of parseIndex. if (isXmlAvailable()) { edu.harvard.hul.ois.jhove.module.XmlModule xmlMod = new edu.harvard.hul.ois.jhove.module.XmlModule(); if (parseIndex == 100) { parseIndex = 0; } xmlMod.setApp(_app); xmlMod.setBase(_je); xmlMod.setDefaultParams(_defaultParams); try { xmlMod.applyDefaultParams(); } catch (Exception e) { // really shouldn't happen } xmlMod.setXhtmlDoctype(_doctype); return xmlMod.parse(stream, info, parseIndex); } else { // The XML module shouldn't be missing from any installation, // but someone who really wanted to could remove it. In // that case, you deserve what you get. info.setMessage(new ErrorMessage("XML-HUL module required to validate XHTML documents")); info.setWellFormed(false); // Treat it as completely wrong return 0; } } else { /* parseIndex = 0, first call only */ _doctype = null; } // Test if textMD is to be generated if (_defaultParams != null) { Iterator iter = _defaultParams.iterator(); while (iter.hasNext()) { String param = (String) iter.next(); if (param.toLowerCase().equals("withtextmd=true")) { _withTextMD = true; } } } initParse(); info.setFormat(_format[0]); info.setMimeType(_mimeType[0]); info.setModule(this); if (_textMD == null || parseIndex == 0) { _textMD = new TextMDMetadata(); } /* We may have already done the checksums while converting a temporary file. */ Checksummer ckSummer = null; if (_je != null && _je.getChecksumFlag() && info.getChecksum().size() == 0) { ckSummer = new Checksummer(); _cstream = new ChecksumInputStream(stream, ckSummer); _dstream = getBufferedDataStream(_cstream, _je != null ? _je.getBufferSize() : 0); } else { _dstream = getBufferedDataStream(stream, _je != null ? _je.getBufferSize() : 0); } ParseHtml parser = null; HtmlMetadata metadata = null; HtmlCharStream cstream = null; try { cstream = new HtmlCharStream(_dstream, "ISO-8859-1"); parser = new ParseHtml(cstream); } catch (UnsupportedEncodingException e) { info.setMessage(new ErrorMessage("Internal error: " + e.getMessage())); info.setWellFormed(false); return 0; // shouldn't happen! } int type = 0; try { List elements = parser.HtmlDoc(); if (elements.isEmpty()) { // Consider an empty document bad info.setWellFormed(false); info.setMessage(new ErrorMessage("Document is empty")); return 0; } type = checkDoctype(elements); if (type < 0) { info.setWellFormed(false); info.setMessage(new ErrorMessage("DOCTYPE is not HTML")); return 0; } /* Check if there is at least one html, head, body or title tag. * A plain text document * might be interpreted as a single PCDATA, which is in some * ethereal sense well-formed HTML, but it's pointless to consider * it such. It might also use angle brackets as a text delimiter, * and that shouldn't count as HTML either. */ boolean hasElements = false; Iterator iter = elements.iterator(); while (iter.hasNext()) { Object o = iter.next(); if (o instanceof JHOpenTag) { String name = ((JHOpenTag) o).getName(); if ("html".equals(name) || "head".equals(name) || "body".equals(name) || "title".equals(name)) { hasElements = true; } break; } } if (!hasElements) { info.setMessage(new ErrorMessage("Document contains no html, head, body or title tags")); info.setWellFormed(false); return 0; } // CRLF from HtmlCharStream ... String lineEnd = cstream.getKindOfLineEnd(); if (lineEnd == null) { info.setMessage(new InfoMessage("Not able to determine type of end of line")); _textMD.setLinebreak(TextMDMetadata.NILL); } else if (lineEnd.equalsIgnoreCase("CR")) { _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CR); } else if (lineEnd.equalsIgnoreCase("LF")) { _textMD.setLinebreak(TextMDMetadata.LINEBREAK_LF); } else if (lineEnd.equalsIgnoreCase("CRLF")) { _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CRLF); } if (type == 0) { /* If we can't find a doctype, it still might be XHTML * if the elements start with an XML declaration and * the root element is "html" */ switch (seemsToBeXHTML(elements)) { case 0: // Not XML break; // fall through case 1: // XML but not HTML info.setMessage( new ErrorMessage( "Document has XML declaration but no DOCTYPE; " + "probably XML rather than HTML")); info.setWellFormed(false); return 0; case 2: // probably XHTML return 100; } info.setMessage( new ErrorMessage( "Unrecognized or missing DOCTYPE declaration; " + "validation continuing as HTML 3.2")); info.setValid(false); // But keep going } HtmlDocDesc docDesc = null; switch (type) { case HTML_3_2: default: docDesc = new Html3_2DocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("3.2"); break; case HTML_4_0_FRAMESET: docDesc = new Html4_0FrameDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.0"); break; case HTML_4_0_TRANSITIONAL: docDesc = new Html4_0TransDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.0"); break; case HTML_4_0_STRICT: docDesc = new Html4_0StrictDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.0"); break; case HTML_4_01_FRAMESET: docDesc = new Html4_01FrameDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.01"); break; case HTML_4_01_TRANSITIONAL: docDesc = new Html4_01TransDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.01"); break; case HTML_4_01_STRICT: docDesc = new Html4_01StrictDocDesc(); _textMD.setMarkup_basis("HTML"); _textMD.setMarkup_basis_version("4.01"); break; case XHTML_1_0_STRICT: case XHTML_1_0_TRANSITIONAL: case XHTML_1_0_FRAMESET: case XHTML_1_1: // Force a second call to parse as XML. 100 is a // magic code for the first XML call. return 100; } _textMD.setMarkup_language(_doctype); if (docDesc == null) { info.setMessage( new InfoMessage( "Code for appropriate HTML version not available yet:" + "substituting HTML 3.2")); docDesc = new Html3_2DocDesc(); } docDesc.validate(elements, info); metadata = docDesc.getMetadata(); // Try to get the charset from the meta Content if (metadata.getCharset() != null) { _textMD.setCharset(metadata.getCharset()); } else { _textMD.setCharset(TextMDMetadata.CHARSET_ISO8859_1); } String textMDEncoding = _textMD.getCharset(); if (textMDEncoding.indexOf("UTF") != -1) { _textMD.setByte_order( _bigEndian ? TextMDMetadata.BYTE_ORDER_BIG : TextMDMetadata.BYTE_ORDER_LITTLE); _textMD.setByte_size("8"); _textMD.setCharacter_size("variable"); } else { _textMD.setByte_order( _bigEndian ? TextMDMetadata.BYTE_ORDER_BIG : TextMDMetadata.BYTE_ORDER_LITTLE); _textMD.setByte_size("8"); _textMD.setCharacter_size("1"); } } catch (ParseException e) { Token t = e.currentToken; info.setMessage( new ErrorMessage("Parse error", "Line = " + t.beginLine + ", column = " + t.beginColumn)); info.setWellFormed(false); } catch (TokenMgrError f) { info.setMessage(new ErrorMessage("TokenMgrError: " + f.getLocalizedMessage())); info.setWellFormed(false); } if (info.getWellFormed() == RepInfo.FALSE) { return 0; } if (type != 0) { if (profileNames[type] != null) { info.setProfile(profileNames[type]); } info.setVersion(versionNames[type]); } if (metadata != null) { Property property = metadata.toProperty(_withTextMD ? _textMD : null); if (property != null) { info.setProperty(property); } } if (ckSummer != null) { info.setSize(_cstream.getNBytes()); info.setChecksum(new Checksum(ckSummer.getCRC32(), ChecksumType.CRC32)); String value = ckSummer.getMD5(); if (value != null) { info.setChecksum(new Checksum(value, ChecksumType.MD5)); } if ((value = ckSummer.getSHA1()) != null) { info.setChecksum(new Checksum(value, ChecksumType.SHA1)); } } return 0; }
/** Adds a LabeledText property */ public void addLabeledText(Property p) { _labeledText.add(p); }
/** Adds a Property to the WAVE metadata. */ public void addWaveProperty(Property prop) { _propList.add(prop); }
/** Adds the ListInfo property, which is a List of String Properties. */ public void addListInfo(List l) { _propList.add(new Property("ListInfo", PropertyType.PROPERTY, PropertyArity.LIST, l)); }
/** Adds a Note string */ public void addNote(Property p) { _notes.add(p); }
/** Adds a Sample property */ public void addSample(Property p) { _samples.add(p); }