public Query getQuery(Element e) throws ParserException { String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String text = DOMUtils.getNonBlankTextOrFail(e); BooleanQuery bq = new BooleanQuery(DOMUtils.getAttribute(e, "disableCoord", false)); bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e, "minimumNumberShouldMatch", 0)); try { TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text)); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); Term term = null; BytesRef bytes = termAtt.getBytesRef(); ts.reset(); while (ts.incrementToken()) { termAtt.fillBytesRef(); term = new Term(fieldName, BytesRef.deepCopyOf(bytes)); bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD)); } ts.end(); ts.close(); } catch (IOException ioe) { throw new RuntimeException("Error constructing terms from index:" + ioe); } bq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); return bq; }
public SpanQuery getSpanQuery(Element e) throws ParserException { String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String value = DOMUtils.getNonBlankTextOrFail(e); SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, value)); stq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); return stq; }
@Override public SpanQuery getSpanQuery(Element e) throws ParserException { String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String value = DOMUtils.getNonBlankTextOrFail(e); SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, value)); float boost = DOMUtils.getAttribute(e, "boost", 1.0f); return new SpanBoostQuery(stq, boost); }
@Override public TermRangeQuery getQuery(Element e) throws ParserException { String fieldName = DOMUtils.getAttributeWithInheritance(e, "fieldName"); String lowerTerm = e.hasAttribute("lowerTerm") ? e.getAttribute("lowerTerm") : null; String upperTerm = e.hasAttribute("upperTerm") ? e.getAttribute("upperTerm") : null; boolean includeLower = DOMUtils.getAttribute(e, "includeLower", true); boolean includeUpper = DOMUtils.getAttribute(e, "includeUpper", true); return TermRangeQuery.newStringRange( fieldName, lowerTerm, upperTerm, includeLower, includeUpper); }
@Override public Query getQuery(Element e) throws ParserException { String field = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String value = DOMUtils.getNonBlankTextOrFail(e); Query tq = new TermQuery(new Term(field, value)); float boost = DOMUtils.getAttribute(e, "boost", 1.0f); if (boost != 1f) { tq = new BoostQuery(tq, boost); } return tq; }
public Query getQuery(Element e) throws ParserException { float tieBreaker = DOMUtils.getAttribute(e, "tieBreaker", 0.0f); DisjunctionMaxQuery dq = new DisjunctionMaxQuery(tieBreaker); dq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); NodeList nl = e.getChildNodes(); for (int i = 0; i < nl.getLength(); i++) { Node node = nl.item(i); if (node instanceof Element) { // all elements are disjuncts. Element queryElem = (Element) node; Query q = factory.getQuery(queryElem); dq.add(q); } } return dq; }
@Override public Filter getFilter(Element e) throws ParserException { String field = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String lowerTerm = DOMUtils.getAttributeOrFail(e, "lowerTerm"); String upperTerm = DOMUtils.getAttributeOrFail(e, "upperTerm"); boolean lowerInclusive = DOMUtils.getAttribute(e, "includeLower", true); boolean upperInclusive = DOMUtils.getAttribute(e, "includeUpper", true); int precisionStep = DOMUtils.getAttribute(e, "precisionStep", NumericUtils.PRECISION_STEP_DEFAULT); String type = DOMUtils.getAttribute(e, "type", "int"); try { Filter filter; if (type.equalsIgnoreCase("int")) { filter = NumericRangeFilter.newIntRange( field, precisionStep, Integer.valueOf(lowerTerm), Integer.valueOf(upperTerm), lowerInclusive, upperInclusive); } else if (type.equalsIgnoreCase("long")) { filter = NumericRangeFilter.newLongRange( field, precisionStep, Long.valueOf(lowerTerm), Long.valueOf(upperTerm), lowerInclusive, upperInclusive); } else if (type.equalsIgnoreCase("double")) { filter = NumericRangeFilter.newDoubleRange( field, precisionStep, Double.valueOf(lowerTerm), Double.valueOf(upperTerm), lowerInclusive, upperInclusive); } else if (type.equalsIgnoreCase("float")) { filter = NumericRangeFilter.newFloatRange( field, precisionStep, Float.valueOf(lowerTerm), Float.valueOf(upperTerm), lowerInclusive, upperInclusive); } else { throw new ParserException("type attribute must be one of: [long, int, double, float]"); } return filter; } catch (NumberFormatException nfe) { if (strictMode) { throw new ParserException("Could not parse lowerTerm or upperTerm into a number", nfe); } return NO_MATCH_FILTER; } }
@Override public Query getQuery(Element e) throws ParserException { String text = DOMUtils.getText(e); try { Query q = null; if (unSafeParser != null) { // synchronize on unsafe parser synchronized (unSafeParser) { q = unSafeParser.parse(text); } } else { String fieldName = DOMUtils.getAttribute(e, "fieldName", defaultField); // Create new parser QueryParser parser = createQueryParser(fieldName, analyzer); q = parser.parse(text); } q.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); return q; } catch (ParseException e1) { throw new ParserException(e1.getMessage()); } }
@Override public SpanQuery getSpanQuery(Element e) throws ParserException { String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String value = DOMUtils.getNonBlankTextOrFail(e); List<SpanQuery> clausesList = new ArrayList<>(); try (TokenStream ts = analyzer.tokenStream(fieldName, value)) { TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); ts.reset(); while (ts.incrementToken()) { SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef()))); clausesList.add(stq); } ts.end(); SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()])); float boost = DOMUtils.getAttribute(e, "boost", 1.0f); return new SpanBoostQuery(soq, boost); } catch (IOException ioe) { throw new ParserException("IOException parsing value:" + value); } }
/* (non-Javadoc) * @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element) */ @Override public Query getQuery(Element e) throws ParserException { String fieldsList = e.getAttribute("fieldNames"); // a comma-delimited list of fields String fields[] = defaultFieldNames; if ((fieldsList != null) && (fieldsList.trim().length() > 0)) { fields = fieldsList.trim().split(","); // trim the fieldnames for (int i = 0; i < fields.length; i++) { fields[i] = fields[i].trim(); } } // Parse any "stopWords" attribute // TODO MoreLikeThis needs to ideally have per-field stopWords lists - until then // I use all analyzers/fields to generate multi-field compatible stop list String stopWords = e.getAttribute("stopWords"); Set<String> stopWordsSet = null; if ((stopWords != null) && (fields != null)) { stopWordsSet = new HashSet<String>(); for (String field : fields) { try (TokenStream ts = analyzer.tokenStream(field, stopWords)) { CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { stopWordsSet.add(termAtt.toString()); } ts.end(); ts.close(); } catch (IOException ioe) { throw new ParserException( "IoException parsing stop words list in " + getClass().getName() + ":" + ioe.getLocalizedMessage()); } } } MoreLikeThisQuery mlt = new MoreLikeThisQuery(DOMUtils.getText(e), fields, analyzer, fields[0]); mlt.setMaxQueryTerms(DOMUtils.getAttribute(e, "maxQueryTerms", DEFAULT_MAX_QUERY_TERMS)); mlt.setMinTermFrequency( DOMUtils.getAttribute(e, "minTermFrequency", DEFAULT_MIN_TERM_FREQUENCY)); mlt.setPercentTermsToMatch( DOMUtils.getAttribute(e, "percentTermsToMatch", DEFAULT_PERCENT_TERMS_TO_MATCH) / 100); mlt.setStopWords(stopWordsSet); int minDocFreq = DOMUtils.getAttribute(e, "minDocFreq", -1); if (minDocFreq >= 0) { mlt.setMinDocFreq(minDocFreq); } mlt.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); return mlt; }