private List<ConLLWord> addWordResourcesToModel( OntModel outputModel, List<ConLLWord> wordObjectsOfSentence, Individual sentenceResource, Individual contextResource) { for (ConLLWord word : wordObjectsOfSentence) { // generate URI differently String uri = contextResource.getURI().substring(0, contextResource.getURI().lastIndexOf("=") + 1) + word.getStart() + "," + word.getEnd(); Individual wordResource = outputModel.createIndividual( uri, outputModel.createClass(NIFOntClasses.RFC5147String.getUri())); wordResource.addOntClass(NIFOntClasses.Word.getOntClass(outputModel)); wordResource.addOntClass(NIFOntClasses.String.getOntClass(outputModel)); wordResource.addProperty( NIFDatatypeProperties.beginIndex.getDatatypeProperty(outputModel), word.getStart() + ""); wordResource.addProperty( NIFDatatypeProperties.endIndex.getDatatypeProperty(outputModel), word.getEnd() + ""); wordResource.addLiteral( NIFDatatypeProperties.anchorOf.getDatatypeProperty(outputModel), outputModel.createLiteral(word.getWordString())); wordResource.addProperty( NIFDatatypeProperties.posTag.getDatatypeProperty(outputModel), word.getPos()); if (tagsetKnown) addOliaPos(wordResource, word.getPos(), outputModel); wordResource.addProperty( NIFObjectProperties.referenceContext.getObjectProperty(outputModel), contextResource); // TODO: we may need something different here if (!word.getPosFine().equals("_")) wordResource.addProperty( NIFDatatypeProperties.posTag.getDatatypeProperty(outputModel), word.getPosFine()); if (!word.getLemma().equals("_")) wordResource.addProperty( NIFDatatypeProperties.lemma.getDatatypeProperty(outputModel), word.getLemma()); // TODO: add genus, numerus etc here wordResource.addProperty( NIFObjectProperties.sentence.getObjectProperty(outputModel), sentenceResource); word.setResource(wordResource); sentenceResource.addProperty( NIFObjectProperties.word.getObjectProperty(outputModel), wordResource); sentenceResource.addProperty( NIFObjectProperties.referenceContext.getObjectProperty(outputModel), contextResource); } return wordObjectsOfSentence; }
public void transform(OntModel inputModel, OntModel outputModel, NIFParameters nifParameters) { this.prefix = nifParameters.getPrefix(); String uri = this.prefix + "char=0,"; // only supporting RFC5147 string atm contextResource = outputModel.createIndividual( uri, outputModel.createClass(NIFOntClasses.RFC5147String.getUri())); contextResource.addOntClass(NIFOntClasses.Context.getOntClass(outputModel)); contextResource.addOntClass(NIFOntClasses.String.getOntClass(outputModel)); contextResource.addProperty( NIFDatatypeProperties.beginIndex.getDatatypeProperty(outputModel), "0"); if (!nifParameters.getOptions().has("informat")) { log.warn("informat parameter empty, please choose informat=file or informat=text"); } if (!nifParameters.getOptions().has("tagset")) { log.warn( "No tagset chosen, please choose an OLiA tagset from: https://github.com/NLP2RDF/software/blob/master/java-maven/vocabularymodule/OLiA/src/main/java/org/nlp2rdf/vm/olia/models"); } else { loadTagset(nifParameters.getOptions().valueOf("tagset").toString()); } if (nifParameters.getOptions().valueOf("intype").equals("file")) { if (nifParameters.getOptions().valueOf("informat").equals("text")) { File input = new File(nifParameters.getOptions().valueOf("i").toString()); FileReader reader = null; try { reader = new FileReader(input); this.transformConLL(reader, inputModel, outputModel, nifParameters); reader.close(); } catch (FileNotFoundException fnf) { log.error("Could not open file " + nifParameters.getOptions().valueOf("i").toString()); } catch (IOException e) { log.error("Could not read file " + nifParameters.getOptions().valueOf("i").toString()); } } } else if (nifParameters.getOptions().valueOf("intype").equals("url")) { log.error("URL input not yet supported"); } else { if (nifParameters.getOptions().valueOf("informat").equals("text")) { StringReader reader = new StringReader(nifParameters.getOptions().valueOf("i").toString()); this.transformConLL(reader, inputModel, outputModel, nifParameters); reader.close(); } } }
private Individual addSentenceResourceToModel( OntModel outputModel, String sentence, int startOffset, Individual context) { int endOffset = startOffset + sentence.length(); String uri = context.getURI().substring(0, context.getURI().lastIndexOf("=") + 1) + startOffset + "," + endOffset; // only supporting RFC5147 string atm Individual sentenceResource = outputModel.createIndividual( uri, outputModel.createClass(NIFOntClasses.RFC5147String.getUri())); sentenceResource.addOntClass(NIFOntClasses.Sentence.getOntClass(outputModel)); sentenceResource.addOntClass(NIFOntClasses.String.getOntClass(outputModel)); sentenceResource.addProperty( NIFDatatypeProperties.beginIndex.getDatatypeProperty(outputModel), startOffset + ""); sentenceResource.addProperty( NIFDatatypeProperties.endIndex.getDatatypeProperty(outputModel), endOffset + ""); sentenceResource.addLiteral( NIFDatatypeProperties.anchorOf.getDatatypeProperty(outputModel), outputModel.createLiteral(sentence)); return sentenceResource; }