public void flush() { databaseMapping.lemmaMapping.insertObjects( connection, "lemmata", databaseMapping.lemmaMap.keySet()); databaseMapping.wordformMapping.insertObjects( connection, "wordforms", databaseMapping.wordformMap.keySet()); for (Object o : databaseMapping.awfMap.keySet()) { NEAnalyzedWordform awf = (NEAnalyzedWordform) o; awf.lemmaKey = awf.lemma.primaryKey; awf.wordformKey = awf.wordform.primaryKey; } databaseMapping.awfMapping.insertObjects( connection, "analyzed_wordforms", databaseMapping.awfMap.keySet()); for (Object o : databaseMapping.containmentMap.keySet()) { NEContainment nec = (NEContainment) o; nec.parentKey = nec.parent.primaryKey; nec.childKey = nec.child.primaryKey; } if (databaseMapping.containmentMap.size() > 0) databaseMapping.containmentMapping.insertObjects( connection, "analyzed_wordform_groups", databaseMapping.containmentMap.keySet()); for (Object o : databaseMapping.attestationMap.keySet()) { NEAttestation at = (NEAttestation) o; at.documentKey = at.document.primaryKey; at.analyzedWordformKey = at.awf.primaryKey; } databaseMapping.attestationMapping.insertObjectsInPortions( connection, "token_attestations", databaseMapping.attestationMap.keySet(), 10000); }
public void importDocument(NEDocument document) { Element root = document.DOMDocument.getDocumentElement(); List<Element> names = XML.getElementsByTagname(root, "ns:ne", false); nl.openconvert.log.ConverterLog.defaultLog.println("TITLE: " + document.title); ElementConcordancer concordancer = new ElementConcordancer(); databaseMapping.documentMapping.insertObject(connection, "documents", document); for (Element n : names) { NELemma neLemma = createNELemma(n); NEWordform neWord = createNEWordform(n); NEAnalyzedWordform awf = createNEAnalyzedWordform(neLemma, neWord); String id = n.getAttribute("xml:id"); if (id != null && id.length() > 0) { String quotation = concordancer.getConcordance(n); NEAttestation at = createNEAttestation(document, awf, id, quotation); } List<Element> neParts = XML.getElementsByTagname(n, "ns:nePart", false); int partNumber = 0; if (addEntriesForParts) for (Element np : neParts) { NELemma pLemma = createNELemma(np); NEWordform pWord = createNEWordform(np); NEAnalyzedWordform pAwf = createNEAnalyzedWordform(pLemma, pWord); String pid = np.getAttribute("xml:id"); if (pid != null && pid.length() > 0) { NEAttestation at = createNEAttestation(document, pAwf, pid, null); } NEContainment nec = new NEContainment(); nec.parent = awf; nec.child = pAwf; nec.partNumber = partNumber++; nec = (NEContainment) DatabaseMapping.canonical(databaseMapping.containmentMap, nec); // HM (bah.) should add extra key for group id to this... // or should we add this to the PoS info for an NE (structure?) } // lexicon.addLemma(lemma, PoS, neLabel, gloss); } }