public NtcirTopicReader(File topicFile) throws ParserConfigurationException, IOException, SAXException { DocumentBuilder documentBuilder = XMLHelper.getDocumentBuilder(true); topics = documentBuilder.parse(topicFile); // TODO: Find out how this code duplication can be avoided in Java. queryGenerator = new XQueryGenerator(topics); }
/** * Splits the given NTCIR query file into individual queries, converts each query into an XQuery * using XQueryGenerator, and returns the result as a list of NtcirPatterns for each individual * query. * * @return List of NtcirPatterns for each query * @throws XPathExpressionException Thrown if xpaths fail to compile or fail to evaluate + */ public final List<NtcirPattern> extractPatterns() throws XPathExpressionException { final XPath xpath = XMLHelper.namespaceAwareXpath("t", NS_NII); final XPathExpression xNum = xpath.compile("./t:num"); final XPathExpression xFormula = xpath.compile("./t:query/t:formula"); final NonWhitespaceNodeList topicList = new NonWhitespaceNodeList(topics.getElementsByTagNameNS(NS_NII, "topic")); for (final Node node : topicList) { final String num = xNum.evaluate(node); final NonWhitespaceNodeList formulae = new NonWhitespaceNodeList((NodeList) xFormula.evaluate(node, XPathConstants.NODESET)); for (final Node formula : formulae) { final String id = formula.getAttributes().getNamedItem("id").getTextContent(); final Node mathMLNode = getFirstChild(formula); queryGenerator.setMainElement(getFirstChild(mathMLNode)); patterns.add(new NtcirPattern(num, id, queryGenerator.toString(), mathMLNode)); } } return patterns; }