/** * Initializes the scanner. * * @param f input file * @param pr database properties * @param frag allow parsing of document fragment * @throws IOException I/O exception */ XMLScanner(final IO f, final Prop pr, final boolean frag) throws IOException { input = new XMLInput(f); fragment = frag; try { for (int e = 0; e < ENTITIES.length; e += 2) { ents.add(token(ENTITIES[e]), token(ENTITIES[e + 1])); } dtd = pr.is(Prop.DTD); chop = pr.is(Prop.CHOP); String enc = null; // process document declaration... if (consume(DOCDECL)) { if (s()) { if (!version()) error(DECLSTART); boolean s = s(); enc = encoding(); if (enc != null) { if (!s) error(WSERROR); s = s(); } if (sddecl() != null && !s) error(WSERROR); s(); final int ch = nextChar(); if (ch != '?' || nextChar() != '>') error(DECLWRONG); } else { prev(5); } } encoding = enc == null ? UTF8 : enc; if (!fragment) { final int n = consume(); if (!s(n)) { if (n != '<') error(BEFOREROOT); prev(1); } } } catch (final IOException ex) { input.close(); throw ex; } }
/** * Constructor. * * @param d data reference * @throws IOException IOException */ FTBuilder(final Data d) throws IOException { super(d); final Prop prop = d.meta.prop; final FTOpt fto = new FTOpt(); fto.set(FTFlag.DC, prop.is(Prop.DIACRITICS)); fto.set(FTFlag.CS, prop.is(Prop.CASESENS)); fto.set(FTFlag.ST, prop.is(Prop.STEMMING)); fto.sw = new StopWords(d, prop.get(Prop.STOPWORDS)); fto.ln = Language.get(prop); if (!Tokenizer.supportFor(fto.ln)) throw new BaseXException(NO_TOKENIZER_X, fto.ln); if (prop.is(Prop.STEMMING) && !Stemmer.supportFor(fto.ln)) throw new BaseXException(NO_STEMMER_X, fto.ln); scm = d.meta.scoring; max = -1; min = Integer.MAX_VALUE; lex = new FTLexer(fto); }