/**
  * Check if the digital object conforms to this Module's internal signature information.
  *
  * <p>HTML is one of the most ill-defined of any open formats, so checking a "signature" really
  * means using some heuristics. The only required tag is TITLE, but that could occur well into the
  * file. So we look for any of three strings -- taking into account case-independence and white
  * space -- within the first sigBytes bytes, and call that a signature check.
  *
  * @param file A File object for the object being parsed
  * @param stream An InputStream, positioned at its beginning, which is generated from the object
  *     to be parsed
  * @param info A fresh RepInfo object which will be modified to reflect the results of the test
  */
 public void checkSignatures(File file, InputStream stream, RepInfo info) throws IOException {
   info.setFormat(_format[0]);
   info.setMimeType(_mimeType[0]);
   info.setModule(this);
   char[][] sigtext = new char[3][];
   sigtext[0] = "<!DOCTYPE HTML".toCharArray();
   sigtext[1] = "<HTML".toCharArray();
   sigtext[2] = "<TITLE".toCharArray();
   int[] sigstate = {0, 0, 0};
   JhoveBase jb = getBase();
   int sigBytes = jb.getSigBytes();
   int bytesRead = 0;
   boolean eof = false;
   DataInputStream dstream = new DataInputStream(stream);
   while (!eof && bytesRead < sigBytes) {
     try {
       int ch = readUnsignedByte(dstream, this);
       char chr = Character.toUpperCase((char) ch);
       ++bytesRead;
       if (Character.isWhitespace(chr)) {
         continue; // ignore all whitespace
       }
       for (int i = 0; i < 3; i++) {
         int ss = sigstate[i];
         char[] st = sigtext[i];
         if (chr == st[ss]) {
           ++sigstate[i];
           if (sigstate[i] == st.length) {
             // One of the sig texts matches!
             info.setSigMatch(_name);
             return;
           }
         } else sigstate[i] = 0;
       }
     } catch (EOFException e) {
       eof = true;
     }
   }
   // If we fall through, there was no sig match
   info.setWellFormed(false);
   return;
 }
示例#2
0
  /**
   * Parses the content of a purported WAVE digital object and stores the results in RepInfo.
   *
   * @param stream An InputStream, positioned at its beginning, which is generated from the object
   *     to be parsed
   * @param info A fresh RepInfo object which will be modified to reflect the results of the parsing
   * @param parseIndex Must be 0 in first call to <code>parse</code>. If <code>parse</code> returns
   *     a nonzero value, it must be called again with <code>parseIndex</code> equal to that return
   *     value.
   */
  public int parse(InputStream stream, RepInfo info, int parseIndex) throws IOException {
    initParse();
    info.setFormat(_format[0]);
    info.setMimeType(_mimeType[0]);
    info.setModule(this);

    _aesMetadata.setPrimaryIdentifier(info.getUri());
    if (info.getURLFlag()) {
      _aesMetadata.setOtherPrimaryIdentifierType("URI");
    } else {
      _aesMetadata.setPrimaryIdentifierType(AESAudioMetadata.FILE_NAME);
    }

    /* We may have already done the checksums while converting a
    temporary file. */
    _ckSummer = null;
    if (_je != null && _je.getChecksumFlag() && info.getChecksum().size() == 0) {
      _ckSummer = new Checksummer();
      _cstream = new ChecksumInputStream(stream, _ckSummer);
      _dstream = getBufferedDataStream(_cstream, _je != null ? _je.getBufferSize() : 0);
    } else {
      _dstream = getBufferedDataStream(stream, _je != null ? _je.getBufferSize() : 0);
    }

    try {
      // Check the start of the file for the right opening bytes
      for (int i = 0; i < 4; i++) {
        int ch = readUnsignedByte(_dstream, this);
        if (ch != sigByte[i]) {
          info.setMessage(new ErrorMessage("Document does not start with RIFF chunk", 0));
          info.setWellFormed(false);
          return 0;
        }
      }
      /* If we got this far, take note that the signature is OK. */
      info.setSigMatch(_name);

      // Get the length of the Form chunk.  This includes all
      // the subsequent chunks in the file, but excludes the
      // header ("FORM" and the length itself).
      bytesRemaining = readUnsignedInt(_dstream);

      // Read the file type.
      String typ = read4Chars(_dstream);
      bytesRemaining -= 4;
      if (!"WAVE".equals(typ)) {
        info.setMessage(new ErrorMessage("File type in RIFF header is not WAVE", _nByte));
        info.setWellFormed(false);
        return 0;
      }

      while (bytesRemaining > 0) {
        if (!readChunk(info)) {
          break;
        }
      }
    } catch (EOFException e) {
      info.setWellFormed(false);
      info.setMessage(new ErrorMessage("Unexpected end of file", _nByte));
      return 0;
    }

    // Set duration from number of samples and rate.
    if (numSamples > 0) {
      // _aesMetadata.setDuration((double) numSamples / sampleRate);
      _aesMetadata.setDuration(numSamples);
    }

    // Add note and label properties, if there's anything
    // to report.
    if (!_labels.isEmpty()) {
      _propList.add(new Property("Labels", PropertyType.PROPERTY, PropertyArity.LIST, _labels));
    }
    if (!_labeledText.isEmpty()) {
      _propList.add(
          new Property("LabeledText", PropertyType.PROPERTY, PropertyArity.LIST, _labeledText));
    }
    if (!_notes.isEmpty()) {
      _propList.add(new Property("Notes", PropertyType.PROPERTY, PropertyArity.LIST, _notes));
    }
    if (!_samples.isEmpty()) {
      _propList.add(new Property("Samples", PropertyType.PROPERTY, PropertyArity.LIST, _samples));
    }
    if (_exifInfo != null) {
      _propList.add(_exifInfo.buildProperty());
    }
    if (!formatChunkSeen) {
      info.setMessage(new ErrorMessage("No Format Chunk"));
      info.setWellFormed(false);
      return 0;
    }

    /* This file looks OK. */
    if (_ckSummer != null) {
      /* We may not have actually hit the end of file. If we're calculating
       * checksums on the fly, we have to read and discard whatever is
       * left, so it will get checksummed. */
      for (; ; ) {
        try {
          int n = skipBytes(_dstream, 2048, this);
          if (n == 0) {
            break;
          }
        } catch (Exception e) {
          break;
        }
      }
      info.setSize(_cstream.getNBytes());
      info.setChecksum(new Checksum(_ckSummer.getCRC32(), ChecksumType.CRC32));
      String value = _ckSummer.getMD5();
      if (value != null) {
        info.setChecksum(new Checksum(value, ChecksumType.MD5));
      }
      if ((value = _ckSummer.getSHA1()) != null) {
        info.setChecksum(new Checksum(value, ChecksumType.SHA1));
      }
    }

    info.setProperty(_metadata);

    // Indicate satisfied profiles.
    if (flagPCMWaveFormat) {
      info.setProfile("PCMWAVEFORMAT");
    }
    if (flagWaveFormatEx) {
      info.setProfile("WAVEFORMATEX");
    }
    if (flagWaveFormatExtensible) {
      info.setProfile("WAVEFORMATEXTENSIBLE");
    }
    if (flagBroadcastWave) {
      // Need to do some additional checks.
      if (!broadcastExtChunkSeen) {
        flagBroadcastWave = false;
      }
      if (compressionCode == FormatChunk.WAVE_FORMAT_MPEG) {
        if (!broadcastExtChunkSeen || !factChunkSeen) {
          flagBroadcastWave = false;
        }
      }
      if (flagBroadcastWave) {
        String prof = null;
        switch (broadcastVersion) {
          case 0:
            prof = "Broadcast Wave Version 0";
            break;

          case 1:
            prof = "Broadcast Wave Version 1";
            break;

            // Other versions are unknown at this time
        }
        if (prof != null) {
          info.setProfile(prof);
        }
      }
    }
    return 0;
  }