Пример #1
0
  public void importDocument(NEDocument document) {
    Element root = document.DOMDocument.getDocumentElement();
    List<Element> names = XML.getElementsByTagname(root, "ns:ne", false);

    nl.openconvert.log.ConverterLog.defaultLog.println("TITLE: " + document.title);

    ElementConcordancer concordancer = new ElementConcordancer();

    databaseMapping.documentMapping.insertObject(connection, "documents", document);

    for (Element n : names) {
      NELemma neLemma = createNELemma(n);
      NEWordform neWord = createNEWordform(n);
      NEAnalyzedWordform awf = createNEAnalyzedWordform(neLemma, neWord);

      String id = n.getAttribute("xml:id");

      if (id != null && id.length() > 0) {

        String quotation = concordancer.getConcordance(n);
        NEAttestation at = createNEAttestation(document, awf, id, quotation);
      }

      List<Element> neParts = XML.getElementsByTagname(n, "ns:nePart", false);
      int partNumber = 0;

      if (addEntriesForParts)
        for (Element np : neParts) {
          NELemma pLemma = createNELemma(np);
          NEWordform pWord = createNEWordform(np);
          NEAnalyzedWordform pAwf = createNEAnalyzedWordform(pLemma, pWord);
          String pid = np.getAttribute("xml:id");
          if (pid != null && pid.length() > 0) {
            NEAttestation at = createNEAttestation(document, pAwf, pid, null);
          }
          NEContainment nec = new NEContainment();
          nec.parent = awf;
          nec.child = pAwf;
          nec.partNumber = partNumber++;
          nec = (NEContainment) DatabaseMapping.canonical(databaseMapping.containmentMap, nec);
          // HM (bah.) should add extra key for group id to this...
          // or should we add this to the PoS info for an NE (structure?)
        }
      // lexicon.addLemma(lemma, PoS, neLabel, gloss);
    }
  }
Пример #2
0
 private String getNameText(Element e) // whoops;
     {
   List<String> parts = new ArrayList<String>();
   for (Element w : XML.getElementsByTagname(e, "w", false)) {
     parts.add(w.getTextContent().trim());
   }
   return StringUtils.join(parts, " ");
 }
Пример #3
0
 public void importDocument(String filename) {
   try {
     if (nDocuments >= maxDocuments) return;
     Document d = XML.parse(filename);
     NEDocument document = new NEDocument(d);
     document.url = filename;
     importDocument(document);
     nDocuments++;
   } catch (Exception e) {
     e.printStackTrace();
   }
 }