void createDocumentNode(final DocumentDescriptor inDescriptor) throws IOException { try { _document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); _rootNode = _document.createElement("document"); } catch (ParserConfigurationException e) { e.printStackTrace(); System.exit(1); } AtomicReader segmentReader = _indexReader.leaves().get(inDescriptor.segmentNumber).reader(); _rootNode.setAttribute("id", DocumentIdOperations.documentDescriptorToId(inDescriptor)); // TODO: implement the proper way of building a title from the production report _rootNode.setAttribute("title", buildDocumentTitle(segmentReader, inDescriptor)); _rootNode.setAttribute("path", "ruscorpora.ru"); _rootNode.setAttribute( "tagging", Attributes.getDocAttribute(segmentReader, inDescriptor.localId, "tagging")); _rootNode.setAttribute("snippets", "0"); Element attributesNode = _document.createElement("attributes"); _rootNode.appendChild(attributesNode); FieldInfos fields = segmentReader.getFieldInfos(); for (int fieldIndex = 0; fieldIndex != fields.size(); ++fieldIndex) { FieldInfo field = fields.fieldInfo(fieldIndex); // TODO: understand why field may turn into null if (field == null) { continue; } String name = field.name; if (Attributes.ATTRIBUTES.contains(name) || Attributes.ATTRIBUTES_FOR_REPORT.contains(name) || Attributes.ATTRIBUTES_FOR_WORD_INFO.contains(name) || !field.hasDocValues()) { // it's a word attribute continue; } Element attrNode = _document.createElement("attr"); attrNode.setAttribute("name", name); attrNode.setAttribute( "value", Attributes.getDocAttribute(segmentReader, inDescriptor.localId, name)); attributesNode.appendChild(attrNode); } }