/** * Adds a GAMSO activity to the Jena model. * * @param activityNumber An array of integers with the components of the activity code. * @param activityLabel The label of the activity as read from the Word document. * @param activityDescription The components of the activity description (a <code>List</code> of * strings). */ private void addActivityToModel( int[] activityNumber, String activityLabel, List<String> activityDescription) { String code = String.format("%d", activityNumber[0]); if (activityNumber[1] > 0) code += String.format(".%d", activityNumber[1]); if (activityNumber[2] > 0) code += String.format(".%d", activityNumber[2]); String parentCode = getParentCode(code); logger.debug("Adding activity " + code + " - " + activityLabel); Resource gamsoConcept = gamsoModel.createResource(GAMSO_BASE_URI + code, SKOS.Concept); gamsoConcept.addProperty(RDF.type, CSPAOnto.GAMSOActivity); gamsoConcept.addProperty(SKOS.notation, code); gamsoConcept.addProperty(SKOS.prefLabel, gamsoModel.createLiteral(activityLabel, "en")); gamsoConcept.addProperty(SKOS.inScheme, gamsoCS); if (parentCode == null) { gamsoCS.addProperty(SKOS.hasTopConcept, gamsoConcept); gamsoConcept.addProperty(SKOS.topConceptOf, gamsoCS); } else { Resource parentConcept = gamsoModel.createResource(GAMSO_BASE_URI + parentCode); parentConcept.addProperty(SKOS.narrower, gamsoConcept); gamsoConcept.addProperty(SKOS.broader, parentConcept); } }
/** Initialize the result model. */ public void initResults() { testResults = ModelFactory.createDefaultModel(); jena2 = testResults.createResource(BASE_RESULTS_URI + "#jena2"); jena2.addProperty( RDFS.comment, testResults.createLiteral( "<a xmlns=\"http://www.w3.org/1999/xhtml\" href=\"http://jena.sourceforce.net/\">Jena2</a> includes a rule-based inference engine for RDF processing, " + "supporting both forward and backward chaining rules. Its OWL rule set is designed to provide sound " + "but not complete instance resasoning for that fragment of OWL/Full limited to the OWL/lite vocabulary. In" + "particular it does not support unionOf/complementOf.", true)); jena2.addProperty(RDFS.label, "Jena2"); testResults.setNsPrefix("results", OWLResults.NS); }
/** * Main method: reads the Word document and extracts the information about entities. * * @throws IOException In case of error while reading the document. */ public void readGAMSODocument() throws IOException { // Read the document with POI and get the list of paragraphs XWPFDocument document = new XWPFDocument(new FileInputStream(GAMSO_DOCX)); List<XWPFParagraph> paragraphs = document.getParagraphs(); int paragraphNumber = 0; int paragraphStylingNumber = 0; int[] currentNumber = {0, 0, 0}; List<String> currentDescription = null; String currentLabel = null; // Creation of the concept scheme resource. gamsoCS = gamsoModel.createResource(GAMSO_BASE_URI + "gamso", SKOS.ConceptScheme); // Iteration through the document paragraphs logger.debug( "Document read from " + GAMSO_DOCX + ", starting to iterate through the paragraphs."); for (XWPFParagraph paragraph : paragraphs) { if (paragraph.getParagraphText() == null) continue; // skipping empty paragraphs paragraphNumber++; // Styling number will be strictly positive for headings and list elements (eg. bullet points) paragraphStylingNumber = (paragraph.getNumID() == null) ? 0 : paragraph.getNumID().intValue(); // Add the paragraph text to the CS description if its number corresponds if (descriptionIndexes.contains(paragraphNumber)) { // TODO normalize white spaces if (gamsoDescription == null) gamsoDescription = paragraph.getParagraphText(); else gamsoDescription += " " + paragraph.getParagraphText(); } if (LEVEL1_STYLING.equals(paragraph.getStyle())) { // The first headings are in the introduction: we skip those if (paragraphStylingNumber == 0) continue; // If paragraph has a number styling, we have a new level 1 activity currentNumber[2] = 0; // Because third number may have been modified by level 3 operations if (currentDescription != null) { // Previous description is complete: record in the model this.addActivityToModel(currentNumber, currentLabel, currentDescription); } currentNumber[0]++; currentNumber[1] = 0; currentDescription = new ArrayList<String>(); currentLabel = normalizeActivityName(paragraph); } else if (LEVEL2_STYLING.equals(paragraph.getStyle())) { // Start of a new level 2 activity currentNumber[2] = 0; // Record previous description (which exists since we are at level 2) in the model this.addActivityToModel(currentNumber, currentLabel, currentDescription); currentNumber[1]++; currentDescription = new ArrayList<String>(); currentLabel = normalizeActivityName(paragraph); // Strip code for 3.x activities } else { if (currentNumber[0] == 0) continue; // Skip paragraphs that are before the first activity // Not a heading, so part of a description String descriptionPart = normalizeDescriptionItem(paragraph, paragraphStylingNumber); if (descriptionPart.length() > 0) currentDescription.add(descriptionPart); // Transform bullet points of level 2 activities into level 3 activities if ((paragraphStylingNumber > 0) && (currentNumber[1] > 0)) { currentNumber[2]++; this.addActivityToModel(currentNumber, paragraph.getParagraphText().trim(), null); } } } // The last activity read has not been added to the model yet: we do it here this.addActivityToModel(currentNumber, currentLabel, currentDescription); document.close(); logger.debug("Iteration through the paragraphs finished, completing the Jena model."); // Add the properties of the concept scheme (the description is now complete) gamsoCS.addProperty(SKOS.notation, gamsoModel.createLiteral("GAMSO v1.0")); gamsoCS.addProperty( SKOS.prefLabel, gamsoModel.createLiteral( "Generic Activity Model for Statistical Organisations v 1.0", "en")); gamsoCS.addProperty(SKOS.scopeNote, gamsoModel.createLiteral(gamsoDescription, "en")); }