Java RepInfo.setWellFormed Examples

Programming Language: Java

Namespace/Package Name: java.util

Class/Type: RepInfo

Method/Function: setWellFormed

Examples at hotexamples.com: 4

Java RepInfo.setWellFormed - 4 examples found. These are the top rated real world Java examples of java.util.RepInfo.setWellFormed extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

setMessage(6)

setWellFormed(4)

setFormat(3)

setMimeType(3)

setModule(3)

getChecksum(2)

setChecksum(2)

setProfile(2)

setProperty(2)

setSigMatch(2)

setSize(2)

setValid(2)

getURLFlag(1)

getUri(1)

getWellFormed(1)

setVersion(1)

Example #1

Show file

File: HtmlModule.java Project: Det-Kongelige-Bibliotek/jhove

 /**
  * Check if the digital object conforms to this Module's internal signature information.
  *
  * <p>HTML is one of the most ill-defined of any open formats, so checking a "signature" really
  * means using some heuristics. The only required tag is TITLE, but that could occur well into the
  * file. So we look for any of three strings -- taking into account case-independence and white
  * space -- within the first sigBytes bytes, and call that a signature check.
  *
  * @param file A File object for the object being parsed
  * @param stream An InputStream, positioned at its beginning, which is generated from the object
  *     to be parsed
  * @param info A fresh RepInfo object which will be modified to reflect the results of the test
  */
 public void checkSignatures(File file, InputStream stream, RepInfo info) throws IOException {
   info.setFormat(_format[0]);
   info.setMimeType(_mimeType[0]);
   info.setModule(this);
   char[][] sigtext = new char[3][];
   sigtext[0] = "<!DOCTYPE HTML".toCharArray();
   sigtext[1] = "<HTML".toCharArray();
   sigtext[2] = "<TITLE".toCharArray();
   int[] sigstate = {0, 0, 0};
   JhoveBase jb = getBase();
   int sigBytes = jb.getSigBytes();
   int bytesRead = 0;
   boolean eof = false;
   DataInputStream dstream = new DataInputStream(stream);
   while (!eof && bytesRead < sigBytes) {
     try {
       int ch = readUnsignedByte(dstream, this);
       char chr = Character.toUpperCase((char) ch);
       ++bytesRead;
       if (Character.isWhitespace(chr)) {
         continue; // ignore all whitespace
       }
       for (int i = 0; i < 3; i++) {
         int ss = sigstate[i];
         char[] st = sigtext[i];
         if (chr == st[ss]) {
           ++sigstate[i];
           if (sigstate[i] == st.length) {
             // One of the sig texts matches!
             info.setSigMatch(_name);
             return;
           }
         } else sigstate[i] = 0;
       }
     } catch (EOFException e) {
       eof = true;
     }
   }
   // If we fall through, there was no sig match
   info.setWellFormed(false);
   return;
 }

Example #2

Show file

File: IFD.java Project: KOST-CECO/SIP-Val

  /**
   * Parse the IFD. Errors are not suppressed.
   *
   * @param byteOffsetIsValid If true, allow offsets on odd byte boundaries
   * @return The offset of the next IFD
   */
  public long parse(boolean byteOffsetIsValid) throws TiffException {
    /* Start at the IFD offset, read the number of entries, then
     * read the entire IFD.
     */
    long offset = _offset;
    _next = 0L;
    byte[] buffer;
    int nFields = 0;
    try {
      _raf.seek(offset);
      nFields = ModuleBase.readUnsignedShort(_raf, _bigEndian);
      offset += 2;

      int len = 12 * nFields;
      buffer = new byte[len];
      _raf.read(buffer, 0, len);

      /* Read the offset of the next IFD (or 0 if none). */
      offset += len;
      _next = ModuleBase.readUnsignedInt(_raf, _bigEndian);
    } catch (Exception e) {
      throw new TiffException("Premature EOF", offset);
    }

    DataInputStream ifdStream = new DataInputStream(new ByteArrayInputStream(buffer));

    try {
      int prevTag = 0;
      for (int i = 0; i < nFields; i++) {
        int tag = ModuleBase.readUnsignedShort(ifdStream, _bigEndian, null);
        /* Tags must be in ascending numerical order. */
        if (!debug_allowoutofsequence && tag < prevTag) {
          _info.setMessage(
              new ErrorMessage("Tag " + tag + " out of sequence", _offset + 2 + 12 * i));
          _info.setWellFormed(false);
        }
        prevTag = tag;

        int type = ModuleBase.readUnsignedShort(ifdStream, _bigEndian, null);
        /* Skip over tags with unknown type. */
        if (type < BYTE || type > IFD) {
          _info.setMessage(
              new ErrorMessage(
                  "Unknown data type", "Type = " + type + ", Tag = " + tag, _offset + 4 + 12 * i));
        } else {
          /* Type gives indication of the TIFF version. */
          if (SBYTE <= type && type <= IFD) {
            _version = 6;
          }

          long count = ModuleBase.readUnsignedInt(ifdStream, _bigEndian, null);
          long value = ModuleBase.readUnsignedInt(ifdStream, _bigEndian, null);
          if (calcValueSize(type, count) > 4) {
            /* Value is the word-aligned offset of the actual
             * value. */
            if ((value & 1) != 0) {
              if (byteOffsetIsValid) {
                _info.setMessage(
                    new InfoMessage(
                        "Value offset not word-aligned: " + value, _offset + 10 + 12 * i));
              } else {
                throw new TiffException(
                    "Value offset not " + "word-aligned: " + value, _offset + 10 + 12 * i);
              }
            }
          } else {
            /* Value is the actual value; pass the offset of
             * the value. */
            value = _offset + 10 + 12 * i;
          }
          lookupTag(tag, type, count, value);
        }
      }
    } catch (IOException e) {
      throw new TiffException("Read error", _offset + 2);
    }
    postParseInitialization();

    return _next;
  }

Example #3

Show file

File: HtmlModule.java Project: Det-Kongelige-Bibliotek/jhove

  /**
   * Parse the content of a purported HTML stream digital object and store the results in RepInfo.
   *
   * @param stream An InputStream, positioned at its beginning, which is generated from the object
   *     to be parsed. If multiple calls to <code>parse</code> are made on the basis of a nonzero
   *     value being returned, a new InputStream must be provided each time.
   * @param info A fresh (on the first call) RepInfo object which will be modified to reflect the
   *     results of the parsing If multiple calls to <code>parse</code> are made on the basis of a
   *     nonzero value being returned, the same RepInfo object should be passed with each call.
   * @param parseIndex Must be 0 in first call to <code>parse</code>. If <code>parse</code> returns
   *     a nonzero value, it must be called again with <code>parseIndex</code> equal to that return
   *     value.
   */
  public int parse(InputStream stream, RepInfo info, int parseIndex) throws IOException {
    if (parseIndex != 0) {
      // Coming in with parseIndex = 1 indicates that we've determined
      // this is XHTML; so we invoke the XML module to parse it.
      // If parseIndex is 100, this is the first invocation of the
      // XML module, so we call it with 0; otherwise we call it with
      // the value of parseIndex.
      if (isXmlAvailable()) {
        edu.harvard.hul.ois.jhove.module.XmlModule xmlMod =
            new edu.harvard.hul.ois.jhove.module.XmlModule();
        if (parseIndex == 100) {
          parseIndex = 0;
        }
        xmlMod.setApp(_app);
        xmlMod.setBase(_je);
        xmlMod.setDefaultParams(_defaultParams);
        try {
          xmlMod.applyDefaultParams();
        } catch (Exception e) {
          // really shouldn't happen
        }
        xmlMod.setXhtmlDoctype(_doctype);
        return xmlMod.parse(stream, info, parseIndex);
      } else {
        // The XML module shouldn't be missing from any installation,
        // but someone who really wanted to could remove it.  In
        // that case, you deserve what you get.
        info.setMessage(new ErrorMessage("XML-HUL module required to validate XHTML documents"));
        info.setWellFormed(false); // Treat it as completely wrong
        return 0;
      }
    } else {
      /* parseIndex = 0, first call only */
      _doctype = null;
    }
    // Test if textMD is to be generated
    if (_defaultParams != null) {
      Iterator iter = _defaultParams.iterator();
      while (iter.hasNext()) {
        String param = (String) iter.next();
        if (param.toLowerCase().equals("withtextmd=true")) {
          _withTextMD = true;
        }
      }
    }

    initParse();
    info.setFormat(_format[0]);
    info.setMimeType(_mimeType[0]);
    info.setModule(this);

    if (_textMD == null || parseIndex == 0) {
      _textMD = new TextMDMetadata();
    }
    /* We may have already done the checksums while converting a
    temporary file. */
    Checksummer ckSummer = null;
    if (_je != null && _je.getChecksumFlag() && info.getChecksum().size() == 0) {
      ckSummer = new Checksummer();
      _cstream = new ChecksumInputStream(stream, ckSummer);
      _dstream = getBufferedDataStream(_cstream, _je != null ? _je.getBufferSize() : 0);
    } else {
      _dstream = getBufferedDataStream(stream, _je != null ? _je.getBufferSize() : 0);
    }

    ParseHtml parser = null;
    HtmlMetadata metadata = null;
    HtmlCharStream cstream = null;
    try {
      cstream = new HtmlCharStream(_dstream, "ISO-8859-1");
      parser = new ParseHtml(cstream);
    } catch (UnsupportedEncodingException e) {
      info.setMessage(new ErrorMessage("Internal error: " + e.getMessage()));
      info.setWellFormed(false);
      return 0; // shouldn't happen!
    }
    int type = 0;
    try {
      List elements = parser.HtmlDoc();
      if (elements.isEmpty()) {
        // Consider an empty document bad
        info.setWellFormed(false);
        info.setMessage(new ErrorMessage("Document is empty"));
        return 0;
      }
      type = checkDoctype(elements);
      if (type < 0) {
        info.setWellFormed(false);
        info.setMessage(new ErrorMessage("DOCTYPE is not HTML"));
        return 0;
      }
      /* Check if there is at least one html, head, body or title tag.
       * A plain text document
       * might be interpreted as a single PCDATA, which is in some
       * ethereal sense well-formed HTML, but it's pointless to consider
       * it such.  It might also use angle brackets as a text delimiter,
       * and that shouldn't count as HTML either. */
      boolean hasElements = false;
      Iterator iter = elements.iterator();
      while (iter.hasNext()) {
        Object o = iter.next();
        if (o instanceof JHOpenTag) {
          String name = ((JHOpenTag) o).getName();
          if ("html".equals(name)
              || "head".equals(name)
              || "body".equals(name)
              || "title".equals(name)) {
            hasElements = true;
          }
          break;
        }
      }
      if (!hasElements) {
        info.setMessage(new ErrorMessage("Document contains no html, head, body or title tags"));
        info.setWellFormed(false);
        return 0;
      }

      // CRLF from HtmlCharStream ...
      String lineEnd = cstream.getKindOfLineEnd();
      if (lineEnd == null) {
        info.setMessage(new InfoMessage("Not able to determine type of end of line"));
        _textMD.setLinebreak(TextMDMetadata.NILL);
      } else if (lineEnd.equalsIgnoreCase("CR")) {
        _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CR);
      } else if (lineEnd.equalsIgnoreCase("LF")) {
        _textMD.setLinebreak(TextMDMetadata.LINEBREAK_LF);
      } else if (lineEnd.equalsIgnoreCase("CRLF")) {
        _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CRLF);
      }

      if (type == 0) {
        /* If we can't find a doctype, it still might be XHTML
         * if the elements start with an XML declaration and
         * the root element is "html" */
        switch (seemsToBeXHTML(elements)) {
          case 0: // Not XML
            break; // fall through
          case 1: // XML but not HTML
            info.setMessage(
                new ErrorMessage(
                    "Document has XML declaration but no DOCTYPE; "
                        + "probably XML rather than HTML"));
            info.setWellFormed(false);
            return 0;
          case 2: // probably XHTML
            return 100;
        }
        info.setMessage(
            new ErrorMessage(
                "Unrecognized or missing DOCTYPE declaration; "
                    + "validation continuing as HTML 3.2"));
        info.setValid(false);
        // But keep going
      }

      HtmlDocDesc docDesc = null;
      switch (type) {
        case HTML_3_2:
        default:
          docDesc = new Html3_2DocDesc();
          _textMD.setMarkup_basis("HTML");
          _textMD.setMarkup_basis_version("3.2");
          break;

        case HTML_4_0_FRAMESET:
          docDesc = new Html4_0FrameDocDesc();
          _textMD.setMarkup_basis("HTML");
          _textMD.setMarkup_basis_version("4.0");
          break;
        case HTML_4_0_TRANSITIONAL:
          docDesc = new Html4_0TransDocDesc();
          _textMD.setMarkup_basis("HTML");
          _textMD.setMarkup_basis_version("4.0");
          break;
        case HTML_4_0_STRICT:
          docDesc = new Html4_0StrictDocDesc();
          _textMD.setMarkup_basis("HTML");
          _textMD.setMarkup_basis_version("4.0");
          break;
        case HTML_4_01_FRAMESET:
          docDesc = new Html4_01FrameDocDesc();
          _textMD.setMarkup_basis("HTML");
          _textMD.setMarkup_basis_version("4.01");
          break;
        case HTML_4_01_TRANSITIONAL:
          docDesc = new Html4_01TransDocDesc();
          _textMD.setMarkup_basis("HTML");
          _textMD.setMarkup_basis_version("4.01");
          break;
        case HTML_4_01_STRICT:
          docDesc = new Html4_01StrictDocDesc();
          _textMD.setMarkup_basis("HTML");
          _textMD.setMarkup_basis_version("4.01");
          break;
        case XHTML_1_0_STRICT:
        case XHTML_1_0_TRANSITIONAL:
        case XHTML_1_0_FRAMESET:
        case XHTML_1_1:
          // Force a second call to parse as XML. 100 is a
          // magic code for the first XML call.
          return 100;
      }
      _textMD.setMarkup_language(_doctype);
      if (docDesc == null) {
        info.setMessage(
            new InfoMessage(
                "Code for appropriate HTML version not available yet:" + "substituting HTML 3.2"));
        docDesc = new Html3_2DocDesc();
      }
      docDesc.validate(elements, info);
      metadata = docDesc.getMetadata();

      // Try to get the charset from the meta Content
      if (metadata.getCharset() != null) {
        _textMD.setCharset(metadata.getCharset());
      } else {
        _textMD.setCharset(TextMDMetadata.CHARSET_ISO8859_1);
      }
      String textMDEncoding = _textMD.getCharset();
      if (textMDEncoding.indexOf("UTF") != -1) {
        _textMD.setByte_order(
            _bigEndian ? TextMDMetadata.BYTE_ORDER_BIG : TextMDMetadata.BYTE_ORDER_LITTLE);
        _textMD.setByte_size("8");
        _textMD.setCharacter_size("variable");
      } else {
        _textMD.setByte_order(
            _bigEndian ? TextMDMetadata.BYTE_ORDER_BIG : TextMDMetadata.BYTE_ORDER_LITTLE);
        _textMD.setByte_size("8");
        _textMD.setCharacter_size("1");
      }
    } catch (ParseException e) {
      Token t = e.currentToken;
      info.setMessage(
          new ErrorMessage("Parse error", "Line = " + t.beginLine + ", column = " + t.beginColumn));
      info.setWellFormed(false);
    } catch (TokenMgrError f) {
      info.setMessage(new ErrorMessage("TokenMgrError: " + f.getLocalizedMessage()));
      info.setWellFormed(false);
    }

    if (info.getWellFormed() == RepInfo.FALSE) {
      return 0;
    }

    if (type != 0) {
      if (profileNames[type] != null) {
        info.setProfile(profileNames[type]);
      }
      info.setVersion(versionNames[type]);
    }

    if (metadata != null) {
      Property property = metadata.toProperty(_withTextMD ? _textMD : null);
      if (property != null) {
        info.setProperty(property);
      }
    }

    if (ckSummer != null) {
      info.setSize(_cstream.getNBytes());
      info.setChecksum(new Checksum(ckSummer.getCRC32(), ChecksumType.CRC32));
      String value = ckSummer.getMD5();
      if (value != null) {
        info.setChecksum(new Checksum(value, ChecksumType.MD5));
      }
      if ((value = ckSummer.getSHA1()) != null) {
        info.setChecksum(new Checksum(value, ChecksumType.SHA1));
      }
    }

    return 0;
  }

Example #4

Show file

File: WaveModule.java Project: KOST-secure/SIP-Val

  /**
   * Parses the content of a purported WAVE digital object and stores the results in RepInfo.
   *
   * @param stream An InputStream, positioned at its beginning, which is generated from the object
   *     to be parsed
   * @param info A fresh RepInfo object which will be modified to reflect the results of the parsing
   * @param parseIndex Must be 0 in first call to <code>parse</code>. If <code>parse</code> returns
   *     a nonzero value, it must be called again with <code>parseIndex</code> equal to that return
   *     value.
   */
  public int parse(InputStream stream, RepInfo info, int parseIndex) throws IOException {
    initParse();
    info.setFormat(_format[0]);
    info.setMimeType(_mimeType[0]);
    info.setModule(this);

    _aesMetadata.setPrimaryIdentifier(info.getUri());
    if (info.getURLFlag()) {
      _aesMetadata.setOtherPrimaryIdentifierType("URI");
    } else {
      _aesMetadata.setPrimaryIdentifierType(AESAudioMetadata.FILE_NAME);
    }

    /* We may have already done the checksums while converting a
    temporary file. */
    _ckSummer = null;
    if (_je != null && _je.getChecksumFlag() && info.getChecksum().size() == 0) {
      _ckSummer = new Checksummer();
      _cstream = new ChecksumInputStream(stream, _ckSummer);
      _dstream = getBufferedDataStream(_cstream, _je != null ? _je.getBufferSize() : 0);
    } else {
      _dstream = getBufferedDataStream(stream, _je != null ? _je.getBufferSize() : 0);
    }

    try {
      // Check the start of the file for the right opening bytes
      for (int i = 0; i < 4; i++) {
        int ch = readUnsignedByte(_dstream, this);
        if (ch != sigByte[i]) {
          info.setMessage(new ErrorMessage("Document does not start with RIFF chunk", 0));
          info.setWellFormed(false);
          return 0;
        }
      }
      /* If we got this far, take note that the signature is OK. */
      info.setSigMatch(_name);

      // Get the length of the Form chunk.  This includes all
      // the subsequent chunks in the file, but excludes the
      // header ("FORM" and the length itself).
      bytesRemaining = readUnsignedInt(_dstream);

      // Read the file type.
      String typ = read4Chars(_dstream);
      bytesRemaining -= 4;
      if (!"WAVE".equals(typ)) {
        info.setMessage(new ErrorMessage("File type in RIFF header is not WAVE", _nByte));
        info.setWellFormed(false);
        return 0;
      }

      while (bytesRemaining > 0) {
        if (!readChunk(info)) {
          break;
        }
      }
    } catch (EOFException e) {
      info.setWellFormed(false);
      info.setMessage(new ErrorMessage("Unexpected end of file", _nByte));
      return 0;
    }

    // Set duration from number of samples and rate.
    if (numSamples > 0) {
      // _aesMetadata.setDuration((double) numSamples / sampleRate);
      _aesMetadata.setDuration(numSamples);
    }

    // Add note and label properties, if there's anything
    // to report.
    if (!_labels.isEmpty()) {
      _propList.add(new Property("Labels", PropertyType.PROPERTY, PropertyArity.LIST, _labels));
    }
    if (!_labeledText.isEmpty()) {
      _propList.add(
          new Property("LabeledText", PropertyType.PROPERTY, PropertyArity.LIST, _labeledText));
    }
    if (!_notes.isEmpty()) {
      _propList.add(new Property("Notes", PropertyType.PROPERTY, PropertyArity.LIST, _notes));
    }
    if (!_samples.isEmpty()) {
      _propList.add(new Property("Samples", PropertyType.PROPERTY, PropertyArity.LIST, _samples));
    }
    if (_exifInfo != null) {
      _propList.add(_exifInfo.buildProperty());
    }
    if (!formatChunkSeen) {
      info.setMessage(new ErrorMessage("No Format Chunk"));
      info.setWellFormed(false);
      return 0;
    }

    /* This file looks OK. */
    if (_ckSummer != null) {
      /* We may not have actually hit the end of file. If we're calculating
       * checksums on the fly, we have to read and discard whatever is
       * left, so it will get checksummed. */
      for (; ; ) {
        try {
          int n = skipBytes(_dstream, 2048, this);
          if (n == 0) {
            break;
          }
        } catch (Exception e) {
          break;
        }
      }
      info.setSize(_cstream.getNBytes());
      info.setChecksum(new Checksum(_ckSummer.getCRC32(), ChecksumType.CRC32));
      String value = _ckSummer.getMD5();
      if (value != null) {
        info.setChecksum(new Checksum(value, ChecksumType.MD5));
      }
      if ((value = _ckSummer.getSHA1()) != null) {
        info.setChecksum(new Checksum(value, ChecksumType.SHA1));
      }
    }

    info.setProperty(_metadata);

    // Indicate satisfied profiles.
    if (flagPCMWaveFormat) {
      info.setProfile("PCMWAVEFORMAT");
    }
    if (flagWaveFormatEx) {
      info.setProfile("WAVEFORMATEX");
    }
    if (flagWaveFormatExtensible) {
      info.setProfile("WAVEFORMATEXTENSIBLE");
    }
    if (flagBroadcastWave) {
      // Need to do some additional checks.
      if (!broadcastExtChunkSeen) {
        flagBroadcastWave = false;
      }
      if (compressionCode == FormatChunk.WAVE_FORMAT_MPEG) {
        if (!broadcastExtChunkSeen || !factChunkSeen) {
          flagBroadcastWave = false;
        }
      }
      if (flagBroadcastWave) {
        String prof = null;
        switch (broadcastVersion) {
          case 0:
            prof = "Broadcast Wave Version 0";
            break;

          case 1:
            prof = "Broadcast Wave Version 1";
            break;

            // Other versions are unknown at this time
        }
        if (prof != null) {
          info.setProfile(prof);
        }
      }
    }
    return 0;
  }