Beispiel #1
0
 static {
   RSHIPS.put("NT", "BT");
   RSHIPS.put("BT", "BT");
   RSHIPS.put("BTG", "NTG");
   RSHIPS.put("NTG", "BTG");
   RSHIPS.put("BTP", "NTP");
   RSHIPS.put("NTP", "BTP");
   RSHIPS.put("USE", "UF");
   RSHIPS.put("UF", "USE");
   RSHIPS.put("RT", "RT");
 }
Beispiel #2
0
  /**
   * Scans a PEReference. [69]
   *
   * @return entity
   * @throws IOException I/O exception
   */
  private byte[] peRef() throws IOException {
    // scans predefined entities
    final byte[] name = name(true);
    consume(';');

    final byte[] en = pents.get(name);
    if (en != null) return en;
    return name;
  }
Beispiel #3
0
  /**
   * Scans a reference. [67]
   *
   * @param f dissolve entities
   * @return entity
   * @throws IOException I/O exception
   */
  private byte[] ref(final boolean f) throws IOException {
    // scans numeric entities
    if (consume('#')) { // [66]
      final TokenBuilder ent = new TokenBuilder();
      int b = 10;
      int ch = nextChar();
      ent.add(ch);
      if (ch == 'x') {
        b = 16;
        ent.add(ch = nextChar());
      }
      int n = 0;
      do {
        final boolean m = ch >= '0' && ch <= '9';
        final boolean h = b == 16 && (ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F');
        if (!m && !h) {
          completeRef(ent);
          return QUESTION;
        }
        n *= b;
        n += ch & 15;
        if (!m) n += 9;
        ent.add(ch = nextChar());
      } while (ch != ';');

      if (!valid(n)) return QUESTION;
      ent.reset();
      ent.add(n);
      return ent.finish();
    }

    // scans predefined entities [68]
    final byte[] name = name(false);
    if (!consume(';')) return QUESTION;

    if (!f) return concat(AMPER, name, SEMI);

    byte[] en = ents.get(name);
    if (en == null) {
      // unknown entity: try HTML entities (lazy initialization)
      if (HTMLENTS.size() == 0) {
        for (int s = 0; s < HTMLENTITIES.length; s += 2) {
          HTMLENTS.add(token(HTMLENTITIES[s]), token(HTMLENTITIES[s + 1]));
        }
      }
      en = HTMLENTS.get(name);
    }
    return en == null ? QUESTION : en;
  }
Beispiel #4
0
 /**
  * Reads next character or throws an exception if all bytes have been read.
  *
  * @return next character
  * @throws IOException I/O exception
  */
 private int consume() throws IOException {
   while (true) {
     final int ch = input.read();
     if (ch == -1) return 0;
     if (ch == '%' && pe) { // [69]
       final byte[] key = name(true);
       final byte[] val = pents.get(key);
       if (val == null) error(UNKNOWNPE, key);
       check(';');
       input.add(val, true);
     } else {
       return ch;
     }
   }
 }
Beispiel #5
0
  /**
   * Builds the thesaurus.
   *
   * @param value input nodes
   * @throws QueryException query exception
   */
  private void build(final Value value) throws QueryException {
    final Value synonyms = nodes("*:synonym", value);
    if (synonyms.isEmpty()) return;

    final ThesNode term = node(text("*:term", value));
    for (final Item synonym : synonyms) {
      final ThesNode sterm = node(text("*:term", synonym));
      final byte[] rs = text("*:relationship", synonym);
      term.add(sterm, rs);

      final byte[] srs = RSHIPS.get(rs);
      if (srs != null) sterm.add(term, srs);
      build(synonyms);
    }
  }
Beispiel #6
0
  /**
   * Initializes the scanner.
   *
   * @param f input file
   * @param pr database properties
   * @param frag allow parsing of document fragment
   * @throws IOException I/O exception
   */
  XMLScanner(final IO f, final Prop pr, final boolean frag) throws IOException {
    input = new XMLInput(f);
    fragment = frag;

    try {
      for (int e = 0; e < ENTITIES.length; e += 2) {
        ents.add(token(ENTITIES[e]), token(ENTITIES[e + 1]));
      }
      dtd = pr.is(Prop.DTD);
      chop = pr.is(Prop.CHOP);

      String enc = null;
      // process document declaration...
      if (consume(DOCDECL)) {
        if (s()) {
          if (!version()) error(DECLSTART);
          boolean s = s();
          enc = encoding();
          if (enc != null) {
            if (!s) error(WSERROR);
            s = s();
          }
          if (sddecl() != null && !s) error(WSERROR);
          s();
          final int ch = nextChar();
          if (ch != '?' || nextChar() != '>') error(DECLWRONG);
        } else {
          prev(5);
        }
      }
      encoding = enc == null ? UTF8 : enc;

      if (!fragment) {
        final int n = consume();
        if (!s(n)) {
          if (n != '<') error(BEFOREROOT);
          prev(1);
        }
      }
    } catch (final IOException ex) {
      input.close();
      throw ex;
    }
  }
Beispiel #7
0
  /**
   * Inserts a data instance at the specified pre value. Note that the specified data instance must
   * differ from this instance.
   *
   * @param ipre value at which to insert new data
   * @param ipar parent pre value of node
   * @param clip data clip
   */
  public final void insert(final int ipre, final int ipar, final DataClip clip) {
    meta.update();

    // update value and document indexes
    if (meta.updindex) indexBegin();
    resources.insert(ipre, clip);

    final int dsize = clip.size();
    final int buf = Math.min(dsize, IO.BLOCKSIZE >> IO.NODEPOWER);
    // resize buffer to cache more entries
    buffer(buf);

    // find all namespaces in scope to avoid duplicate declarations
    final TokenMap nsScope = nspaces.scope(ipar, this);

    // loop through all entries
    final IntList preStack = new IntList();
    final NSNode nsRoot = nspaces.current();
    final HashSet<NSNode> newNodes = new HashSet<NSNode>();
    final IntList flagPres = new IntList();

    // indicates if database only contains a dummy node
    final Data data = clip.data;
    int c = 0;
    for (int dpre = clip.start; dpre < clip.end; ++dpre, ++c) {
      if (c != 0 && c % buf == 0) insert(ipre + c - buf);

      final int pre = ipre + c;
      final int dkind = data.kind(dpre);
      final int dpar = data.parent(dpre, dkind);
      // ipar < 0 if document nodes on top level are added
      final int dis = dpar >= 0 ? dpre - dpar : ipar >= 0 ? pre - ipar : 0;
      final int par = dis == 0 ? -1 : pre - dis;

      if (c == 0) nspaces.root(par, this);

      while (!preStack.isEmpty() && preStack.peek() > par) nspaces.close(preStack.pop());

      switch (dkind) {
        case DOC:
          // add document
          nspaces.prepare();
          final int s = data.size(dpre, dkind);
          doc(pre, s, data.text(dpre, true));
          meta.ndocs++;
          preStack.push(pre);
          break;
        case ELEM:
          // add element
          nspaces.prepare();
          boolean ne = false;
          if (data.nsFlag(dpre)) {
            final Atts at = data.ns(dpre);
            for (int a = 0; a < at.size(); ++a) {
              // see if prefix has been declared/ is part of current ns scope
              final byte[] old = nsScope.get(at.name(a));
              if (old == null || !eq(old, at.value(a))) {
                // we have to keep track of all new NSNodes that are added
                // to the Namespace structure, as their pre values must not
                // be updated. I.e. if an NSNode N with pre value 3 existed
                // prior to inserting and two new nodes are inserted at
                // location pre == 3 we have to make sure N and only N gets
                // updated.
                newNodes.add(nspaces.add(at.name(a), at.value(a), pre));
                ne = true;
              }
            }
          }
          byte[] nm = data.name(dpre, dkind);
          elem(
              dis,
              tagindex.index(nm, null, false),
              data.attSize(dpre, dkind),
              data.size(dpre, dkind),
              nspaces.uri(nm, true),
              ne);
          preStack.push(pre);
          break;
        case TEXT:
        case COMM:
        case PI:
          // add text
          text(pre, dis, data.text(dpre, true), dkind);
          break;
        case ATTR:
          // add attribute
          nm = data.name(dpre, dkind);
          // check if prefix already in nsScope or not
          final byte[] attPref = prefix(nm);
          // check if prefix of attribute has already been declared, otherwise
          // add declaration to parent node
          if (data.nsFlag(dpre) && nsScope.get(attPref) == null) {
            nspaces.add(
                par,
                preStack.isEmpty() ? -1 : preStack.peek(),
                attPref,
                data.nspaces.uri(data.uri(dpre, dkind)),
                this);
            // save pre value to set ns flag later for this node. can't be done
            // here as direct table access would interfere with the buffer
            flagPres.add(par);
          }
          attr(
              pre,
              dis,
              atnindex.index(nm, null, false),
              data.text(dpre, false),
              nspaces.uri(nm, false),
              false);
          break;
      }
    }
    // finalize and update namespace structure
    while (!preStack.isEmpty()) nspaces.close(preStack.pop());
    nspaces.root(nsRoot);

    if (bp != 0) insert(ipre + c - 1 - (c - 1) % buf);
    // reset buffer to old size
    buffer(1);

    // set ns flags
    for (int f = 0; f < flagPres.size(); f++) {
      final int fl = flagPres.get(f);
      table.write2(fl, 1, name(fl) | 1 << 15);
    }

    // increase size of ancestors
    int p = ipar;
    while (p >= 0) {
      final int k = kind(p);
      size(p, k, size(p, k) + dsize);
      p = parent(p, k);
    }

    if (meta.updindex) {
      // add the entries to the ID -> PRE mapping:
      idmap.insert(ipre, id(ipre), dsize);
      indexEnd();
    }

    if (!cache) updateDist(ipre + dsize, dsize);

    // propagate PRE value shifts to namespaces
    if (ipar != -1) nspaces.insert(ipre, dsize, newNodes);
  }
Beispiel #8
0
  /**
   * Scans a markup declaration. [29]
   *
   * @return true if a declaration was found
   * @throws IOException I/O exception
   */
  private boolean markupDecl() throws IOException {
    if (consume(ENT)) { // [70]
      checkS();
      if (consume('%')) { // [72] PEDecl
        checkS();
        final byte[] key = name(true);
        checkS();
        byte[] val = entityValue(true); // [74]
        if (val == null) {
          val = externalID(true, false);
          if (val == null) error(INVEND);
        }
        s();
        pents.add(key, val);
      } else { // [71] GEDecl
        final byte[] key = name(true);
        checkS();
        byte[] val = entityValue(false); // [73] EntityDef
        if (val == null) {
          val = externalID(true, false);
          if (val == null) error(INVEND);
          if (s()) {
            check(ND);
            checkS();
            name(true);
          }
        }
        s();
        ents.add(key, val);
      }
      check('>');
      pe = true;
    } else if (consume(ELEM)) { // [45]
      checkS();
      name(true);
      checkS();
      pe = true;
      if (!consume(EMP) && !consume(ANY)) { // [46]
        if (consume('(')) {
          s();
          if (consume(PC)) { // [51]
            s();
            boolean alt = false;
            while (consume('|')) {
              s();
              name(true);
              s();
              alt = true;
            }
            check(')');
            if (!consume('*') && alt) error(INVEND);
          } else {
            cp();
            s();
            // check(')'); // to be fixed...
            while (!consume(')')) consume();
            // input.prev(1);
            occ();
          }
        } else {
          error(INVEND);
        }
      }
      s();
      check('>');
    } else if (consume(ATTL)) { // [52]
      pe = true;
      checkS();
      name(true);
      s();
      while (name(false) != null) { // [53]
        checkS();
        if (!consume(CD)
            && !consume(IDRS)
            && !consume(IDR)
            && !consume(ID)
            && !consume(ENTS)
            && !consume(ENT1)
            && !consume(NMTS)
            && !consume(NMT)) { // [56]
          if (consume(NOT)) { // [57,58]
            checkS();
            check('(');
            s();
            name(true);
            s();
            while (consume('|')) {
              s();
              name(true);
              s();
            }
            check(')');
          } else { // [59]
            check('(');
            s();
            nmtoken();
            s();
            while (consume('|')) {
              s();
              nmtoken();
              s();
            }
            check(')');
          }
        }

        // [54]
        pe = true;
        checkS();
        if (!consume(REQ) && !consume(IMP)) { // [60]
          if (consume(FIX)) checkS();
          quote = qu();
          attValue(consume());
        }
        s();
      }
      check('>');
    } else if (consume(NOTA)) { // [82]
      checkS();
      name(true);
      s();
      externalID(false, false);
      s();
      check('>');
    } else if (consume(COMS)) {
      comment();
    } else if (consume(XML)) {
      pi();
    } else {
      return false;
    }
    s();
    pe = false;
    return true;
  }