public void importDocument(NEDocument document) { Element root = document.DOMDocument.getDocumentElement(); List<Element> names = XML.getElementsByTagname(root, "ns:ne", false); nl.openconvert.log.ConverterLog.defaultLog.println("TITLE: " + document.title); ElementConcordancer concordancer = new ElementConcordancer(); databaseMapping.documentMapping.insertObject(connection, "documents", document); for (Element n : names) { NELemma neLemma = createNELemma(n); NEWordform neWord = createNEWordform(n); NEAnalyzedWordform awf = createNEAnalyzedWordform(neLemma, neWord); String id = n.getAttribute("xml:id"); if (id != null && id.length() > 0) { String quotation = concordancer.getConcordance(n); NEAttestation at = createNEAttestation(document, awf, id, quotation); } List<Element> neParts = XML.getElementsByTagname(n, "ns:nePart", false); int partNumber = 0; if (addEntriesForParts) for (Element np : neParts) { NELemma pLemma = createNELemma(np); NEWordform pWord = createNEWordform(np); NEAnalyzedWordform pAwf = createNEAnalyzedWordform(pLemma, pWord); String pid = np.getAttribute("xml:id"); if (pid != null && pid.length() > 0) { NEAttestation at = createNEAttestation(document, pAwf, pid, null); } NEContainment nec = new NEContainment(); nec.parent = awf; nec.child = pAwf; nec.partNumber = partNumber++; nec = (NEContainment) DatabaseMapping.canonical(databaseMapping.containmentMap, nec); // HM (bah.) should add extra key for group id to this... // or should we add this to the PoS info for an NE (structure?) } // lexicon.addLemma(lemma, PoS, neLabel, gloss); } }
private String getNameText(Element e) // whoops; { List<String> parts = new ArrayList<String>(); for (Element w : XML.getElementsByTagname(e, "w", false)) { parts.add(w.getTextContent().trim()); } return StringUtils.join(parts, " "); }