コード例 #1
0
  /**
   * Scans surrogates and append them to the specified buffer.
   *
   * <p><strong>Note:</strong> This assumes the current char has already been identified as a high
   * surrogate.
   *
   * @param buf The StringBuffer to append the read surrogates to.
   * @return True if it succeeded.
   */
  protected boolean scanSurrogates(XMLStringBuffer buf) throws IOException, XNIException {

    int high = fEntityScanner.scanChar();
    int low = fEntityScanner.peekChar();
    if (!XMLChar.isLowSurrogate(low)) {
      reportFatalError("InvalidCharInContent", new Object[] {Integer.toString(high, 16)});
      return false;
    }
    fEntityScanner.scanChar();

    // convert surrogates to supplemental character
    int c = XMLChar.supplemental((char) high, (char) low);

    // supplemental character must be a valid XML character
    if (isInvalid(c)) {
      reportFatalError("InvalidCharInContent", new Object[] {Integer.toString(c, 16)});
      return false;
    }

    // fill in the buffer
    buf.append((char) high);
    buf.append((char) low);

    return true;
  } // scanSurrogates():boolean
コード例 #2
0
  /**
   * Scans External ID and return the public and system IDs.
   *
   * @param identifiers An array of size 2 to return the system id, and public id (in that order).
   * @param optionalSystemId Specifies whether the system id is optional.
   *     <p><strong>Note:</strong> This method uses fString and fStringBuffer, anything in them at
   *     the time of calling is lost.
   */
  protected void scanExternalID(String[] identifiers, boolean optionalSystemId)
      throws IOException, XNIException {

    String systemId = null;
    String publicId = null;
    if (fEntityScanner.skipString("PUBLIC")) {
      if (!fEntityScanner.skipSpaces()) {
        reportFatalError("SpaceRequiredAfterPUBLIC", null);
      }
      scanPubidLiteral(fString);
      publicId = fString.toString();

      if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
        reportFatalError("SpaceRequiredBetweenPublicAndSystem", null);
      }
    }

    if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
      if (publicId == null && !fEntityScanner.skipSpaces()) {
        reportFatalError("SpaceRequiredAfterSYSTEM", null);
      }
      int quote = fEntityScanner.peekChar();
      if (quote != '\'' && quote != '"') {
        if (publicId != null && optionalSystemId) {
          // looks like we don't have any system id
          // simply return the public id
          identifiers[0] = null;
          identifiers[1] = publicId;
          return;
        }
        reportFatalError("QuoteRequiredInSystemID", null);
      }
      fEntityScanner.scanChar();
      XMLString ident = fString;
      if (fEntityScanner.scanLiteral(quote, ident) != quote) {
        fStringBuffer.clear();
        do {
          fStringBuffer.append(ident);
          int c = fEntityScanner.peekChar();
          if (XMLChar.isMarkup(c) || c == ']') {
            fStringBuffer.append((char) fEntityScanner.scanChar());
          }
        } while (fEntityScanner.scanLiteral(quote, ident) != quote);
        fStringBuffer.append(ident);
        ident = fStringBuffer;
      }
      systemId = ident.toString();
      if (!fEntityScanner.skipChar(quote)) {
        reportFatalError("SystemIDUnterminated", null);
      }
    }

    // store result in array
    identifiers[0] = systemId;
    identifiers[1] = publicId;
  }
コード例 #3
0
  /**
   * Scans a processing data. This is needed to handle the situation where a document starts with a
   * processing instruction whose target name <em>starts with</em> "xml". (e.g. xmlfoo)
   *
   * <p><strong>Note:</strong> This method uses fStringBuffer, anything in it at the time of calling
   * is lost.
   *
   * @param target The PI target
   * @param data The string to fill in with the data
   */
  protected void scanPIData(String target, XMLString data) throws IOException, XNIException {

    // check target
    if (target.length() == 3) {
      char c0 = Character.toLowerCase(target.charAt(0));
      char c1 = Character.toLowerCase(target.charAt(1));
      char c2 = Character.toLowerCase(target.charAt(2));
      if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
        reportFatalError("ReservedPITarget", null);
      }
    }

    // spaces
    if (!fEntityScanner.skipSpaces()) {
      if (fEntityScanner.skipString("?>")) {
        // we found the end, there is no data
        data.clear();
        return;
      } else {
        if (fNamespaces && fEntityScanner.peekChar() == ':') {
          fEntityScanner.scanChar();
          XMLStringBuffer colonName = new XMLStringBuffer(target);
          colonName.append(":");
          String str = fEntityScanner.scanName();
          if (str != null) colonName.append(str);
          reportFatalError("ColonNotLegalWithNS", new Object[] {colonName.toString()});
          fEntityScanner.skipSpaces();
        } else {
          // if there is data there should be some space
          reportFatalError("SpaceRequiredInPI", null);
        }
      }
    }

    fStringBuffer.clear();
    // data
    if (fEntityScanner.scanData("?>", fStringBuffer)) {
      do {
        int c = fEntityScanner.peekChar();
        if (c != -1) {
          if (XMLChar.isHighSurrogate(c)) {
            scanSurrogates(fStringBuffer);
          } else if (isInvalidLiteral(c)) {
            reportFatalError("InvalidCharInPI", new Object[] {Integer.toHexString(c)});
            fEntityScanner.scanChar();
          }
        }
      } while (fEntityScanner.scanData("?>", fStringBuffer));
    }
    data.setValues(fStringBuffer);
  } // scanPIData(String,XMLString)
コード例 #4
0
  /**
   * Scans public ID literal.
   *
   * <p>[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" [13] PubidChar::= #x20
   * | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
   *
   * <p>The returned string is normalized according to the following rule, from
   * http://www.w3.org/TR/REC-xml#dt-pubid:
   *
   * <p>Before a match is attempted, all strings of white space in the public identifier must be
   * normalized to single space characters (#x20), and leading and trailing white space must be
   * removed.
   *
   * @param literal The string to fill in with the public ID literal.
   * @return True on success.
   *     <p><strong>Note:</strong> This method uses fStringBuffer, anything in it at the time of
   *     calling is lost.
   */
  protected boolean scanPubidLiteral(XMLString literal) throws IOException, XNIException {
    int quote = fEntityScanner.scanChar();
    if (quote != '\'' && quote != '"') {
      reportFatalError("QuoteRequiredInPublicID", null);
      return false;
    }

    fStringBuffer.clear();
    // skip leading whitespace
    boolean skipSpace = true;
    boolean dataok = true;
    while (true) {
      int c = fEntityScanner.scanChar();
      if (c == ' ' || c == '\n' || c == '\r') {
        if (!skipSpace) {
          // take the first whitespace as a space and skip the others
          fStringBuffer.append(' ');
          skipSpace = true;
        }
      } else if (c == quote) {
        if (skipSpace) {
          // if we finished on a space let's trim it
          fStringBuffer.length--;
        }
        literal.setValues(fStringBuffer);
        break;
      } else if (XMLChar.isPubid(c)) {
        fStringBuffer.append((char) c);
        skipSpace = false;
      } else if (c == -1) {
        reportFatalError("PublicIDUnterminated", null);
        return false;
      } else {
        dataok = false;
        reportFatalError("InvalidCharInPublicID", new Object[] {Integer.toHexString(c)});
      }
    }
    return dataok;
  }
コード例 #5
0
  /**
   * Scans a comment.
   *
   * <p>
   *
   * <pre>
   * [15] Comment ::= '&lt!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
   * </pre>
   *
   * <p><strong>Note:</strong> Called after scanning past '&lt;!--' <strong>Note:</strong> This
   * method uses fString, anything in it at the time of calling is lost.
   *
   * @param text The buffer to fill in with the text.
   */
  protected void scanComment(XMLStringBuffer text) throws IOException, XNIException {

    // text
    // REVISIT: handle invalid character, eof
    text.clear();
    while (fEntityScanner.scanData("--", text)) {
      int c = fEntityScanner.peekChar();
      if (c != -1) {
        if (XMLChar.isHighSurrogate(c)) {
          scanSurrogates(text);
        } else if (isInvalidLiteral(c)) {
          reportFatalError("InvalidCharInComment", new Object[] {Integer.toHexString(c)});
          fEntityScanner.scanChar();
        }
      }
    }
    if (!fEntityScanner.skipChar('>')) {
      reportFatalError("DashDashInComment", null);
    }
  } // scanComment()
コード例 #6
0
  /**
   * Scans an attribute value and normalizes whitespace converting all whitespace characters to
   * space characters.
   *
   * <p>[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
   *
   * @param value The XMLString to fill in with the value.
   * @param nonNormalizedValue The XMLString to fill in with the non-normalized value.
   * @param atName The name of the attribute being parsed (for error msgs).
   * @param checkEntities true if undeclared entities should be reported as VC violation, false if
   *     undeclared entities should be reported as WFC violation.
   * @param eleName The name of element to which this attribute belongs.
   * @return true if the non-normalized and normalized value are the same
   *     <p><strong>Note:</strong> This method uses fStringBuffer2, anything in it at the time of
   *     calling is lost.
   */
  protected boolean scanAttributeValue(
      XMLString value,
      XMLString nonNormalizedValue,
      String atName,
      boolean checkEntities,
      String eleName)
      throws IOException, XNIException {
    // quote
    int quote = fEntityScanner.peekChar();
    if (quote != '\'' && quote != '"') {
      reportFatalError("OpenQuoteExpected", new Object[] {eleName, atName});
    }

    fEntityScanner.scanChar();
    int entityDepth = fEntityDepth;

    int c = fEntityScanner.scanLiteral(quote, value);
    if (DEBUG_ATTR_NORMALIZATION) {
      System.out.println("** scanLiteral -> \"" + value.toString() + "\"");
    }

    int fromIndex = 0;
    if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
      /** Both the non-normalized and normalized attribute values are equal. * */
      nonNormalizedValue.setValues(value);
      int cquote = fEntityScanner.scanChar();
      if (cquote != quote) {
        reportFatalError("CloseQuoteExpected", new Object[] {eleName, atName});
      }
      return true;
    }
    fStringBuffer2.clear();
    fStringBuffer2.append(value);
    normalizeWhitespace(value, fromIndex);
    if (DEBUG_ATTR_NORMALIZATION) {
      System.out.println("** normalizeWhitespace -> \"" + value.toString() + "\"");
    }
    if (c != quote) {
      fScanningAttribute = true;
      fStringBuffer.clear();
      do {
        fStringBuffer.append(value);
        if (DEBUG_ATTR_NORMALIZATION) {
          System.out.println("** value2: \"" + fStringBuffer.toString() + "\"");
        }
        if (c == '&') {
          fEntityScanner.skipChar('&');
          if (entityDepth == fEntityDepth) {
            fStringBuffer2.append('&');
          }
          if (fEntityScanner.skipChar('#')) {
            if (entityDepth == fEntityDepth) {
              fStringBuffer2.append('#');
            }
            int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
            if (ch != -1) {
              if (DEBUG_ATTR_NORMALIZATION) {
                System.out.println("** value3: \"" + fStringBuffer.toString() + "\"");
              }
            }
          } else {
            String entityName = fEntityScanner.scanName();
            if (entityName == null) {
              reportFatalError("NameRequiredInReference", null);
            } else if (entityDepth == fEntityDepth) {
              fStringBuffer2.append(entityName);
            }
            if (!fEntityScanner.skipChar(';')) {
              reportFatalError("SemicolonRequiredInReference", new Object[] {entityName});
            } else if (entityDepth == fEntityDepth) {
              fStringBuffer2.append(';');
            }
            if (entityName == fAmpSymbol) {
              fStringBuffer.append('&');
              if (DEBUG_ATTR_NORMALIZATION) {
                System.out.println("** value5: \"" + fStringBuffer.toString() + "\"");
              }
            } else if (entityName == fAposSymbol) {
              fStringBuffer.append('\'');
              if (DEBUG_ATTR_NORMALIZATION) {
                System.out.println("** value7: \"" + fStringBuffer.toString() + "\"");
              }
            } else if (entityName == fLtSymbol) {
              fStringBuffer.append('<');
              if (DEBUG_ATTR_NORMALIZATION) {
                System.out.println("** value9: \"" + fStringBuffer.toString() + "\"");
              }
            } else if (entityName == fGtSymbol) {
              fStringBuffer.append('>');
              if (DEBUG_ATTR_NORMALIZATION) {
                System.out.println("** valueB: \"" + fStringBuffer.toString() + "\"");
              }
            } else if (entityName == fQuotSymbol) {
              fStringBuffer.append('"');
              if (DEBUG_ATTR_NORMALIZATION) {
                System.out.println("** valueD: \"" + fStringBuffer.toString() + "\"");
              }
            } else {
              if (fEntityManager.isExternalEntity(entityName)) {
                reportFatalError("ReferenceToExternalEntity", new Object[] {entityName});
              } else {
                if (!fEntityManager.isDeclaredEntity(entityName)) {
                  // WFC & VC: Entity Declared
                  if (checkEntities) {
                    if (fValidation) {
                      fErrorReporter.reportError(
                          XMLMessageFormatter.XML_DOMAIN,
                          "EntityNotDeclared",
                          new Object[] {entityName},
                          XMLErrorReporter.SEVERITY_ERROR);
                    }
                  } else {
                    reportFatalError("EntityNotDeclared", new Object[] {entityName});
                  }
                }
                fEntityManager.startEntity(entityName, true);
              }
            }
          }
        } else if (c == '<') {
          reportFatalError("LessthanInAttValue", new Object[] {eleName, atName});
          fEntityScanner.scanChar();
          if (entityDepth == fEntityDepth) {
            fStringBuffer2.append((char) c);
          }
        } else if (c == '%' || c == ']') {
          fEntityScanner.scanChar();
          fStringBuffer.append((char) c);
          if (entityDepth == fEntityDepth) {
            fStringBuffer2.append((char) c);
          }
          if (DEBUG_ATTR_NORMALIZATION) {
            System.out.println("** valueF: \"" + fStringBuffer.toString() + "\"");
          }
        } else if (c == '\n' || c == '\r') {
          fEntityScanner.scanChar();
          fStringBuffer.append(' ');
          if (entityDepth == fEntityDepth) {
            fStringBuffer2.append('\n');
          }
        } else if (c != -1 && XMLChar.isHighSurrogate(c)) {
          fStringBuffer3.clear();
          if (scanSurrogates(fStringBuffer3)) {
            fStringBuffer.append(fStringBuffer3);
            if (entityDepth == fEntityDepth) {
              fStringBuffer2.append(fStringBuffer3);
            }
            if (DEBUG_ATTR_NORMALIZATION) {
              System.out.println("** valueI: \"" + fStringBuffer.toString() + "\"");
            }
          }
        } else if (c != -1 && isInvalidLiteral(c)) {
          reportFatalError(
              "InvalidCharInAttValue", new Object[] {eleName, atName, Integer.toString(c, 16)});
          fEntityScanner.scanChar();
          if (entityDepth == fEntityDepth) {
            fStringBuffer2.append((char) c);
          }
        }
        c = fEntityScanner.scanLiteral(quote, value);
        if (entityDepth == fEntityDepth) {
          fStringBuffer2.append(value);
        }
        normalizeWhitespace(value);
      } while (c != quote || entityDepth != fEntityDepth);
      fStringBuffer.append(value);
      if (DEBUG_ATTR_NORMALIZATION) {
        System.out.println("** valueN: \"" + fStringBuffer.toString() + "\"");
      }
      value.setValues(fStringBuffer);
      fScanningAttribute = false;
    }
    nonNormalizedValue.setValues(fStringBuffer2);

    // quote
    int cquote = fEntityScanner.scanChar();
    if (cquote != quote) {
      reportFatalError("CloseQuoteExpected", new Object[] {eleName, atName});
    }
    return nonNormalizedValue.equals(value.ch, value.offset, value.length);
  } // scanAttributeValue()
コード例 #7
0
  /**
   * Scans a pseudo attribute.
   *
   * @param scanningTextDecl True if scanning this pseudo-attribute for a TextDecl; false if
   *     scanning XMLDecl. This flag is needed to report the correct type of error.
   * @param value The string to fill in with the attribute value.
   * @return The name of the attribute
   *     <p><strong>Note:</strong> This method uses fStringBuffer2, anything in it at the time of
   *     calling is lost.
   */
  public String scanPseudoAttribute(boolean scanningTextDecl, XMLString value)
      throws IOException, XNIException {

    // REVISIT: This method is used for generic scanning of
    // pseudo attributes, but since there are only three such
    // attributes: version, encoding, and standalone there are
    // for performant ways of scanning them. Every decl must
    // have a version, and in TextDecls this version must
    // be followed by an encoding declaration. Also the
    // methods we invoke on the scanners allow non-ASCII
    // characters to be parsed in the decls, but since
    // we don't even know what the actual encoding of the
    // document is until we scan the encoding declaration
    // you cannot reliably read any characters outside
    // of the ASCII range here. -- mrglavas
    String name = fEntityScanner.scanName();
    XMLEntityManager.print(fEntityManager.getCurrentEntity());
    if (name == null) {
      reportFatalError("PseudoAttrNameExpected", null);
    }
    fEntityScanner.skipDeclSpaces();
    if (!fEntityScanner.skipChar('=')) {
      reportFatalError(
          scanningTextDecl ? "EqRequiredInTextDecl" : "EqRequiredInXMLDecl", new Object[] {name});
    }
    fEntityScanner.skipDeclSpaces();
    int quote = fEntityScanner.peekChar();
    if (quote != '\'' && quote != '"') {
      reportFatalError(
          scanningTextDecl ? "QuoteRequiredInTextDecl" : "QuoteRequiredInXMLDecl",
          new Object[] {name});
    }
    fEntityScanner.scanChar();
    int c = fEntityScanner.scanLiteral(quote, value);
    if (c != quote) {
      fStringBuffer2.clear();
      do {
        fStringBuffer2.append(value);
        if (c != -1) {
          if (c == '&' || c == '%' || c == '<' || c == ']') {
            fStringBuffer2.append((char) fEntityScanner.scanChar());
          }
          // REVISIT: Even if you could reliably read non-ASCII chars
          // why bother scanning for surrogates here? Only ASCII chars
          // match the productions in XMLDecls and TextDecls. -- mrglavas
          else if (XMLChar.isHighSurrogate(c)) {
            scanSurrogates(fStringBuffer2);
          } else if (isInvalidLiteral(c)) {
            String key = scanningTextDecl ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl";
            reportFatalError(key, new Object[] {Integer.toString(c, 16)});
            fEntityScanner.scanChar();
          }
        }
        c = fEntityScanner.scanLiteral(quote, value);
      } while (c != quote);
      fStringBuffer2.append(value);
      value.setValues(fStringBuffer2);
    }
    if (!fEntityScanner.skipChar(quote)) {
      reportFatalError(
          scanningTextDecl ? "CloseQuoteMissingInTextDecl" : "CloseQuoteMissingInXMLDecl",
          new Object[] {name});
    }

    // return
    return name;
  } // scanPseudoAttribute(XMLString):String
コード例 #8
0
  /**
   * Scans a character reference and append the corresponding chars to the specified buffer.
   *
   * <p>
   *
   * <pre>
   * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
   * </pre>
   *
   * <strong>Note:</strong> This method uses fStringBuffer, anything in it at the time of calling is
   * lost.
   *
   * @param buf the character buffer to append chars to
   * @param buf2 the character buffer to append non-normalized chars to
   * @return the character value or (-1) on conversion failure
   */
  protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)
      throws IOException, XNIException {

    // scan hexadecimal value
    boolean hex = false;
    if (fEntityScanner.skipChar('x')) {
      if (buf2 != null) {
        buf2.append('x');
      }
      hex = true;
      fStringBuffer3.clear();
      boolean digit = true;

      int c = fEntityScanner.peekChar();
      digit = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
      if (digit) {
        if (buf2 != null) {
          buf2.append((char) c);
        }
        fEntityScanner.scanChar();
        fStringBuffer3.append((char) c);

        do {
          c = fEntityScanner.peekChar();
          digit = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
          if (digit) {
            if (buf2 != null) {
              buf2.append((char) c);
            }
            fEntityScanner.scanChar();
            fStringBuffer3.append((char) c);
          }
        } while (digit);
      } else {
        reportFatalError("HexdigitRequiredInCharRef", null);
      }
    }

    // scan decimal value
    else {
      fStringBuffer3.clear();
      boolean digit = true;

      int c = fEntityScanner.peekChar();
      digit = c >= '0' && c <= '9';
      if (digit) {
        if (buf2 != null) {
          buf2.append((char) c);
        }
        fEntityScanner.scanChar();
        fStringBuffer3.append((char) c);

        do {
          c = fEntityScanner.peekChar();
          digit = c >= '0' && c <= '9';
          if (digit) {
            if (buf2 != null) {
              buf2.append((char) c);
            }
            fEntityScanner.scanChar();
            fStringBuffer3.append((char) c);
          }
        } while (digit);
      } else {
        reportFatalError("DigitRequiredInCharRef", null);
      }
    }

    // end
    if (!fEntityScanner.skipChar(';')) {
      reportFatalError("SemicolonRequiredInCharRef", null);
    }
    if (buf2 != null) {
      buf2.append(';');
    }

    // convert string to number
    int value = -1;
    try {
      value = Integer.parseInt(fStringBuffer3.toString(), hex ? 16 : 10);

      // character reference must be a valid XML character
      if (isInvalid(value)) {
        StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
        if (hex) errorBuf.append('x');
        errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
        reportFatalError("InvalidCharRef", new Object[] {errorBuf.toString()});
      }
    } catch (NumberFormatException e) {
      // Conversion failed, let -1 value drop through.
      // If we end up here, the character reference was invalid.
      StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
      if (hex) errorBuf.append('x');
      errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
      reportFatalError("InvalidCharRef", new Object[] {errorBuf.toString()});
    }

    // append corresponding chars to the given buffer
    if (!XMLChar.isSupplemental(value)) {
      buf.append((char) value);
    } else {
      // character is supplemental, split it into surrogate chars
      buf.append(XMLChar.highSurrogate(value));
      buf.append(XMLChar.lowSurrogate(value));
    }

    // char refs notification code
    if (fNotifyCharRefs && value != -1) {
      String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString();
      if (!fScanningAttribute) {
        fCharRefLiteral = literal;
      }
    }

    return value;
  }