예제 #1
0
  /**
   * Adds a GAMSO activity to the Jena model.
   *
   * @param activityNumber An array of integers with the components of the activity code.
   * @param activityLabel The label of the activity as read from the Word document.
   * @param activityDescription The components of the activity description (a <code>List</code> of
   *     strings).
   */
  private void addActivityToModel(
      int[] activityNumber, String activityLabel, List<String> activityDescription) {

    String code = String.format("%d", activityNumber[0]);
    if (activityNumber[1] > 0) code += String.format(".%d", activityNumber[1]);
    if (activityNumber[2] > 0) code += String.format(".%d", activityNumber[2]);
    String parentCode = getParentCode(code);

    logger.debug("Adding activity " + code + " - " + activityLabel);

    Resource gamsoConcept = gamsoModel.createResource(GAMSO_BASE_URI + code, SKOS.Concept);
    gamsoConcept.addProperty(RDF.type, CSPAOnto.GAMSOActivity);
    gamsoConcept.addProperty(SKOS.notation, code);
    gamsoConcept.addProperty(SKOS.prefLabel, gamsoModel.createLiteral(activityLabel, "en"));
    gamsoConcept.addProperty(SKOS.inScheme, gamsoCS);

    if (parentCode == null) {
      gamsoCS.addProperty(SKOS.hasTopConcept, gamsoConcept);
      gamsoConcept.addProperty(SKOS.topConceptOf, gamsoCS);
    } else {
      Resource parentConcept = gamsoModel.createResource(GAMSO_BASE_URI + parentCode);
      parentConcept.addProperty(SKOS.narrower, gamsoConcept);
      gamsoConcept.addProperty(SKOS.broader, parentConcept);
    }
  }
예제 #2
0
 /** Initialize the result model. */
 public void initResults() {
   testResults = ModelFactory.createDefaultModel();
   jena2 = testResults.createResource(BASE_RESULTS_URI + "#jena2");
   jena2.addProperty(
       RDFS.comment,
       testResults.createLiteral(
           "<a xmlns=\"http://www.w3.org/1999/xhtml\" href=\"http://jena.sourceforce.net/\">Jena2</a> includes a rule-based inference engine for RDF processing, "
               + "supporting both forward and backward chaining rules. Its OWL rule set is designed to provide sound "
               + "but not complete instance resasoning for that fragment of OWL/Full limited to the OWL/lite vocabulary. In"
               + "particular it does not support unionOf/complementOf.",
           true));
   jena2.addProperty(RDFS.label, "Jena2");
   testResults.setNsPrefix("results", OWLResults.NS);
 }
예제 #3
0
  /**
   * Main method: reads the Word document and extracts the information about entities.
   *
   * @throws IOException In case of error while reading the document.
   */
  public void readGAMSODocument() throws IOException {

    // Read the document with POI and get the list of paragraphs
    XWPFDocument document = new XWPFDocument(new FileInputStream(GAMSO_DOCX));
    List<XWPFParagraph> paragraphs = document.getParagraphs();

    int paragraphNumber = 0;
    int paragraphStylingNumber = 0;
    int[] currentNumber = {0, 0, 0};
    List<String> currentDescription = null;
    String currentLabel = null;

    // Creation of the concept scheme resource.
    gamsoCS = gamsoModel.createResource(GAMSO_BASE_URI + "gamso", SKOS.ConceptScheme);

    // Iteration through the document paragraphs
    logger.debug(
        "Document read from " + GAMSO_DOCX + ", starting to iterate through the paragraphs.");
    for (XWPFParagraph paragraph : paragraphs) {

      if (paragraph.getParagraphText() == null) continue; // skipping empty paragraphs
      paragraphNumber++;

      // Styling number will be strictly positive for headings and list elements (eg. bullet points)
      paragraphStylingNumber = (paragraph.getNumID() == null) ? 0 : paragraph.getNumID().intValue();

      // Add the paragraph text to the CS description if its number corresponds
      if (descriptionIndexes.contains(paragraphNumber)) {
        // TODO normalize white spaces
        if (gamsoDescription == null) gamsoDescription = paragraph.getParagraphText();
        else gamsoDescription += " " + paragraph.getParagraphText();
      }

      if (LEVEL1_STYLING.equals(paragraph.getStyle())) {
        // The first headings are in the introduction: we skip those
        if (paragraphStylingNumber == 0) continue;
        // If paragraph has a number styling, we have a new level 1 activity
        currentNumber[2] = 0; // Because third number may have been modified by level 3 operations
        if (currentDescription != null) {
          // Previous description is complete: record in the model
          this.addActivityToModel(currentNumber, currentLabel, currentDescription);
        }
        currentNumber[0]++;
        currentNumber[1] = 0;
        currentDescription = new ArrayList<String>();
        currentLabel = normalizeActivityName(paragraph);
      } else if (LEVEL2_STYLING.equals(paragraph.getStyle())) {
        // Start of a new level 2 activity
        currentNumber[2] = 0;
        // Record previous description (which exists since we are at level 2) in the model
        this.addActivityToModel(currentNumber, currentLabel, currentDescription);
        currentNumber[1]++;
        currentDescription = new ArrayList<String>();
        currentLabel = normalizeActivityName(paragraph); // Strip code for 3.x activities
      } else {
        if (currentNumber[0] == 0) continue; // Skip paragraphs that are before the first activity
        // Not a heading, so part of a description
        String descriptionPart = normalizeDescriptionItem(paragraph, paragraphStylingNumber);
        if (descriptionPart.length() > 0) currentDescription.add(descriptionPart);
        // Transform bullet points of level 2 activities into level 3 activities
        if ((paragraphStylingNumber > 0) && (currentNumber[1] > 0)) {
          currentNumber[2]++;
          this.addActivityToModel(currentNumber, paragraph.getParagraphText().trim(), null);
        }
      }
    }
    // The last activity read has not been added to the model yet: we do it here
    this.addActivityToModel(currentNumber, currentLabel, currentDescription);

    document.close();

    logger.debug("Iteration through the paragraphs finished, completing the Jena model.");

    // Add the properties of the concept scheme (the description is now complete)
    gamsoCS.addProperty(SKOS.notation, gamsoModel.createLiteral("GAMSO v1.0"));
    gamsoCS.addProperty(
        SKOS.prefLabel,
        gamsoModel.createLiteral(
            "Generic Activity Model for Statistical Organisations v 1.0", "en"));
    gamsoCS.addProperty(SKOS.scopeNote, gamsoModel.createLiteral(gamsoDescription, "en"));
  }