예제 #1
0
  public void flush() {
    databaseMapping.lemmaMapping.insertObjects(
        connection, "lemmata", databaseMapping.lemmaMap.keySet());
    databaseMapping.wordformMapping.insertObjects(
        connection, "wordforms", databaseMapping.wordformMap.keySet());

    for (Object o : databaseMapping.awfMap.keySet()) {
      NEAnalyzedWordform awf = (NEAnalyzedWordform) o;
      awf.lemmaKey = awf.lemma.primaryKey;
      awf.wordformKey = awf.wordform.primaryKey;
    }

    databaseMapping.awfMapping.insertObjects(
        connection, "analyzed_wordforms", databaseMapping.awfMap.keySet());

    for (Object o : databaseMapping.containmentMap.keySet()) {
      NEContainment nec = (NEContainment) o;
      nec.parentKey = nec.parent.primaryKey;
      nec.childKey = nec.child.primaryKey;
    }

    if (databaseMapping.containmentMap.size() > 0)
      databaseMapping.containmentMapping.insertObjects(
          connection, "analyzed_wordform_groups", databaseMapping.containmentMap.keySet());

    for (Object o : databaseMapping.attestationMap.keySet()) {
      NEAttestation at = (NEAttestation) o;
      at.documentKey = at.document.primaryKey;
      at.analyzedWordformKey = at.awf.primaryKey;
    }
    databaseMapping.attestationMapping.insertObjectsInPortions(
        connection, "token_attestations", databaseMapping.attestationMap.keySet(), 10000);
  }
예제 #2
0
  public void importDocument(NEDocument document) {
    Element root = document.DOMDocument.getDocumentElement();
    List<Element> names = XML.getElementsByTagname(root, "ns:ne", false);

    nl.openconvert.log.ConverterLog.defaultLog.println("TITLE: " + document.title);

    ElementConcordancer concordancer = new ElementConcordancer();

    databaseMapping.documentMapping.insertObject(connection, "documents", document);

    for (Element n : names) {
      NELemma neLemma = createNELemma(n);
      NEWordform neWord = createNEWordform(n);
      NEAnalyzedWordform awf = createNEAnalyzedWordform(neLemma, neWord);

      String id = n.getAttribute("xml:id");

      if (id != null && id.length() > 0) {

        String quotation = concordancer.getConcordance(n);
        NEAttestation at = createNEAttestation(document, awf, id, quotation);
      }

      List<Element> neParts = XML.getElementsByTagname(n, "ns:nePart", false);
      int partNumber = 0;

      if (addEntriesForParts)
        for (Element np : neParts) {
          NELemma pLemma = createNELemma(np);
          NEWordform pWord = createNEWordform(np);
          NEAnalyzedWordform pAwf = createNEAnalyzedWordform(pLemma, pWord);
          String pid = np.getAttribute("xml:id");
          if (pid != null && pid.length() > 0) {
            NEAttestation at = createNEAttestation(document, pAwf, pid, null);
          }
          NEContainment nec = new NEContainment();
          nec.parent = awf;
          nec.child = pAwf;
          nec.partNumber = partNumber++;
          nec = (NEContainment) DatabaseMapping.canonical(databaseMapping.containmentMap, nec);
          // HM (bah.) should add extra key for group id to this...
          // or should we add this to the PoS info for an NE (structure?)
        }
      // lexicon.addLemma(lemma, PoS, neLabel, gloss);
    }
  }