Пример #1
0
  /**
   * Initializes the scanner.
   *
   * @param f input file
   * @param pr database properties
   * @param frag allow parsing of document fragment
   * @throws IOException I/O exception
   */
  XMLScanner(final IO f, final Prop pr, final boolean frag) throws IOException {
    input = new XMLInput(f);
    fragment = frag;

    try {
      for (int e = 0; e < ENTITIES.length; e += 2) {
        ents.add(token(ENTITIES[e]), token(ENTITIES[e + 1]));
      }
      dtd = pr.is(Prop.DTD);
      chop = pr.is(Prop.CHOP);

      String enc = null;
      // process document declaration...
      if (consume(DOCDECL)) {
        if (s()) {
          if (!version()) error(DECLSTART);
          boolean s = s();
          enc = encoding();
          if (enc != null) {
            if (!s) error(WSERROR);
            s = s();
          }
          if (sddecl() != null && !s) error(WSERROR);
          s();
          final int ch = nextChar();
          if (ch != '?' || nextChar() != '>') error(DECLWRONG);
        } else {
          prev(5);
        }
      }
      encoding = enc == null ? UTF8 : enc;

      if (!fragment) {
        final int n = consume();
        if (!s(n)) {
          if (n != '<') error(BEFOREROOT);
          prev(1);
        }
      }
    } catch (final IOException ex) {
      input.close();
      throw ex;
    }
  }
Пример #2
0
  /**
   * Constructor.
   *
   * @param d data reference
   * @throws IOException IOException
   */
  FTBuilder(final Data d) throws IOException {
    super(d);

    final Prop prop = d.meta.prop;
    final FTOpt fto = new FTOpt();
    fto.set(FTFlag.DC, prop.is(Prop.DIACRITICS));
    fto.set(FTFlag.CS, prop.is(Prop.CASESENS));
    fto.set(FTFlag.ST, prop.is(Prop.STEMMING));
    fto.sw = new StopWords(d, prop.get(Prop.STOPWORDS));
    fto.ln = Language.get(prop);

    if (!Tokenizer.supportFor(fto.ln)) throw new BaseXException(NO_TOKENIZER_X, fto.ln);
    if (prop.is(Prop.STEMMING) && !Stemmer.supportFor(fto.ln))
      throw new BaseXException(NO_STEMMER_X, fto.ln);

    scm = d.meta.scoring;
    max = -1;
    min = Integer.MAX_VALUE;
    lex = new FTLexer(fto);
  }