private void buildDocument() throws IOException { _docId = _documentDescriptor; String currentDocId = DocumentIdOperations.documentDescriptorToId(_documentDescriptor); int lastSnippetId = 0; DocumentHolder docHolder = new DocumentHolder(_indexReader, _docId); DocForReport reportDocument = new DocForReport(currentDocId); for (MatchDescriptor match : _searchResult.matches) { // there are ~ 100 matches in a regular report page, and it's OK to traverse them all each // time building // a new document node DocumentDescriptor docDescriptor = match.doc; if (!(docDescriptor.equals(_docId))) { continue; } if (!_searchType.equals("all-documents") && !docDescriptor.equals(_docId)) { continue; } // searching for the specific snippet if (_searchType.equals("snippet") && lastSnippetId != _sId) { ++lastSnippetId; continue; } addMatchToDocument(match, reportDocument, docHolder); ++lastSnippetId; } createDocumentNode(_documentDescriptor); _rootNode.setAttribute("snippets", String.valueOf(reportDocument.snippets.size())); for (Snippet snippet : reportDocument.snippets) { Element snippetNode = createSnippetNode(snippet); _rootNode.appendChild(snippetNode); } }
protected void readQueryParameters(final Map<String, String> inCgiConfig) { _docId = null; _sId = -1; if (inCgiConfig.containsKey("docid")) { _docId = DocumentIdOperations.idToDocumentDescriptor(inCgiConfig.get("docid")); } if (inCgiConfig.containsKey("sid")) { _sId = Integer.parseInt(inCgiConfig.get("sid")); } }
void createDocumentNode(final DocumentDescriptor inDescriptor) throws IOException { try { _document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); _rootNode = _document.createElement("document"); } catch (ParserConfigurationException e) { e.printStackTrace(); System.exit(1); } AtomicReader segmentReader = _indexReader.leaves().get(inDescriptor.segmentNumber).reader(); _rootNode.setAttribute("id", DocumentIdOperations.documentDescriptorToId(inDescriptor)); // TODO: implement the proper way of building a title from the production report _rootNode.setAttribute("title", buildDocumentTitle(segmentReader, inDescriptor)); _rootNode.setAttribute("path", "ruscorpora.ru"); _rootNode.setAttribute( "tagging", Attributes.getDocAttribute(segmentReader, inDescriptor.localId, "tagging")); _rootNode.setAttribute("snippets", "0"); Element attributesNode = _document.createElement("attributes"); _rootNode.appendChild(attributesNode); FieldInfos fields = segmentReader.getFieldInfos(); for (int fieldIndex = 0; fieldIndex != fields.size(); ++fieldIndex) { FieldInfo field = fields.fieldInfo(fieldIndex); // TODO: understand why field may turn into null if (field == null) { continue; } String name = field.name; if (Attributes.ATTRIBUTES.contains(name) || Attributes.ATTRIBUTES_FOR_REPORT.contains(name) || Attributes.ATTRIBUTES_FOR_WORD_INFO.contains(name) || !field.hasDocValues()) { // it's a word attribute continue; } Element attrNode = _document.createElement("attr"); attrNode.setAttribute("name", name); attrNode.setAttribute( "value", Attributes.getDocAttribute(segmentReader, inDescriptor.localId, name)); attributesNode.appendChild(attrNode); } }