@Override public Document get(int index) { if (index >= docDataList.size()) return null; Document res = documents.get(index); if (DEBUG) Out.prln("SerialCorpusImpl: get(): index " + index + "result: " + res); // if the document is null, then I must get it from the DS if (res == null) { FeatureMap parameters = Factory.newFeatureMap(); parameters.put(DataStore.DATASTORE_FEATURE_NAME, this.dataStore); try { parameters.put(DataStore.LR_ID_FEATURE_NAME, docDataList.get(index).getPersistentID()); Document lr = (Document) Factory.createResource(docDataList.get(index).getClassType(), parameters); if (DEBUG) Out.prln("Loaded document :" + lr.getName()); // change the result to the newly loaded doc res = lr; // finally replace the doc with the instantiated version documents.set(index, lr); } catch (ResourceInstantiationException ex) { Err.prln("Error reading document inside a serialised corpus."); throw new GateRuntimeException(ex); } } return res; }
@Override public boolean add(Document o) { if (o == null) return false; Document doc = o; // make it accept only docs from its own datastore if (doc.getDataStore() != null && !this.dataStore.equals(doc.getDataStore())) { Err.prln("Error: Persistent corpus can only accept documents " + "from its own datastore!"); return false; } // if // add the document with its index in the docDataList // in this case, since it's going to be added to the end // the index will be the size of the docDataList before // the addition DocumentData docData = new DocumentData(doc.getName(), doc.getLRPersistenceId(), doc.getClass().getName()); boolean result = docDataList.add(docData); documents.add(doc); documentAdded(doc); fireDocumentAdded( new CorpusEvent( SerialCorpusImpl.this, doc, docDataList.size() - 1, doc.getLRPersistenceId(), CorpusEvent.DOCUMENT_ADDED)); return result; }
/** * This method is called when the SAX parser encounts the end of the XML document. Here we set the * content of the gate Document to be the one generated inside this class (tmpDocContent). After * that we use the colector to generate all the annotation reffering this new gate document. */ @Override public void endDocument() throws org.xml.sax.SAXException { // replace the document content with the one without markups doc.setContent(new DocumentContentImpl(tmpDocContent.toString())); // fire the status listener fireStatusChangedEvent("Total elements: " + elements); // If basicAs is null then get the default AnnotationSet, // based on the gate document. if (basicAS == null) { basicAS = doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME); } // sort colector ascending on its id Collections.sort(colector); Set<Integer> testIdsSet = new HashSet<Integer>(); // create all the annotations (on this new document) from the collector while (!colector.isEmpty()) { CustomObject obj = colector.getFirst(); // Test to see if there are two annotation objects with the same id. if (testIdsSet.contains(obj.getId())) { throw new GateSaxException( "Found two annotations with the same Id(" + obj.getId() + ").The document is inconsistent."); } else { testIdsSet.add(obj.getId()); } // End iff // create a new annotation and add it to the annotation set try { // the annotation type will be conforming with markupElementsMap // add the annotation to the Annotation Set if (markupElementsMap == null) { basicAS.add(obj.getId(), obj.getStart(), obj.getEnd(), obj.getElemName(), obj.getFM()); } else { // get the type of the annotation from Map String annotationType = markupElementsMap.get(obj.getElemName()); if (annotationType != null) { basicAS.add(obj.getId(), obj.getStart(), obj.getEnd(), annotationType, obj.getFM()); } } // End if } catch (gate.util.InvalidOffsetException e) { Err.prln( "InvalidOffsetException for annot :" + obj.getElemName() + " with Id =" + obj.getId() + ". Discarded..."); } // End try colector.remove(obj); } // End while } // endDocument();
/** * This method is called when the HTML parser encounts the end of a tag that means that the tag is * paired by a beginning tag */ @Override public void handleEndTag(HTML.Tag t, int pos) { // obj is for internal use CustomObject obj = null; // end of STYLE tag if (HTML.Tag.STYLE.equals(t)) { isInsideStyleTag = false; } // if // If the stack is not empty then we get the object from the stack if (!stack.isEmpty()) { obj = stack.pop(); // Before adding it to the colector, we need to check if is an // emptyAndSpan one. See CustomObject's isEmptyAndSpan field. if (obj.getStart().equals(obj.getEnd())) { // The element had an end tag and its start was equal to its end. Hence // it is anEmptyAndSpan one. obj.getFM().put("isEmptyAndSpan", "true"); } // End iff // we add it to the colector colector.add(obj); } // End if // If element has text between, then customize its apearance if (obj != null && obj.getStart().longValue() != obj.getEnd().longValue()) // Customize the appearance of the document customizeAppearanceOfDocumentWithEndTag(t); // if t is the </HTML> tag then we reached the end of theHTMLdocument if (t == HTML.Tag.HTML) { // replace the old content with the new one doc.setContent(new DocumentContentImpl(tmpDocContent.toString())); // If basicAs is null then get the default annotation // set from this gate document if (basicAS == null) basicAS = doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME); // sort colector ascending on its id Collections.sort(colector); // iterate through colector and construct annotations while (!colector.isEmpty()) { obj = colector.getFirst(); colector.remove(obj); // Construct an annotation from this obj try { if (markupElementsMap == null) { basicAS.add(obj.getStart(), obj.getEnd(), obj.getElemName(), obj.getFM()); } else { String annotationType = markupElementsMap.get(obj.getElemName()); if (annotationType != null) basicAS.add(obj.getStart(), obj.getEnd(), annotationType, obj.getFM()); } } catch (InvalidOffsetException e) { Err.prln("Error creating an annot :" + obj + " Discarded..."); } // end try // }// end if } // while // notify the listener about the total amount of elements that // has been processed fireStatusChangedEvent("Total elements : " + elements); } // else } // handleEndTag