public void oncreate() { if (Gate.getPluginsHome() != null) { System.out.println(Gate.getPluginsHome()); } else { Gate.setPluginsHome(new File("C:/Program Files/GATE_Developer_8.0/plugins")); } }
/** Constructor, setting the home directory. */ public TestLearningAPI(String arg0) throws GateException, MalformedURLException { super(arg0); if (!initialized) { Gate.init(); learningHome = new File(new File(Gate.getGateHome(), "plugins"), "Learning"); Gate.getCreoleRegister().addDirectory(learningHome.toURI().toURL()); initialized = true; } }
@Override public void setUp() throws MalformedURLException, IOException, GateException { if (!Gate.isInitialised()) { Gate.init(); } File baseDir = new File(Gate.getPluginsHome(), "Tagger_Measurements"); parser = new MeasurementsParser( (new File(baseDir, "resources/units.dat")).toURI().toURL(), new File(baseDir, "resources/common_words.txt").toURI().toURL()); }
public static void main(String[] args) throws Exception { /*Set to your Gate installation home*/ XMLConfiguration config = new XMLConfiguration("config/madaap.xml"); Gate.setGateHome(new File(config.getString("gate.home"))); Gate.init(); Gate.getCreoleRegister() .registerDirectories( new File(Gate.getPluginsHome(), ANNIEConstants.PLUGIN_DIR).toURI().toURL()); /*Set the path to \Plugins\Tools directory*/ Gate.getCreoleRegister() .registerDirectories( new File(config.getString("gate.home") + "\\plugins\\Tools").toURI().toURL()); /*Declare queue to receive URL from various collectors*/ BlockingQueue<URL> queue = new LinkedBlockingQueue<URL>(); /*Start extractor, initialization will run the thread*/ Extractor e = new Extractor(queue); /*Timer to schedule collector tasks at regular intervals*/ Timer timer = new Timer(); /*Collect URL from /input/url.txt*/ TimerTask manualFeederTask = new ManualFeeder(queue); long manualFeederTime = Long.parseLong(config.getString("timer.ManualFeederInterval")) * ONE_HOUR; // Unit of ManualFeederTime: hour timer.scheduleAtFixedRate(manualFeederTask, 0, manualFeederTime); /*Collect URL from twitter feed*/ TimerTask twitterTask = new Twitter(queue); long twitterTime = Long.parseLong(config.getString("timer.TwitterInterval")) * ONE_MIN; // Unit of twitterTime: minutes timer.scheduleAtFixedRate(twitterTask, 0, twitterTime); /*Check all URL if they are active or not*/ TimerTask checkerTask = new Checker(); long checkerTime = Long.parseLong(config.getString("timer.CheckerInterval")) * ONE_HOUR; // Unit of CheckerTime: hour timer.scheduleAtFixedRate(checkerTask, 0, checkerTime); }
@Override public void setIndexDefinition(IndexDefinition definition) { if (definition != null) { this.getFeatures().put(GateConstants.CORPUS_INDEX_DEFINITION_FEATURE_KEY, definition); String className = definition.getIrEngineClassName(); try { // Class aClass = Class.forName(className); Class<?> aClass = Class.forName(className, true, Gate.getClassLoader()); IREngine engine = (IREngine) aClass.newInstance(); this.indexManager = engine.getIndexmanager(); this.indexManager.setIndexDefinition(definition); this.indexManager.setCorpus(this); } catch (Exception e) { e.printStackTrace(Err.getPrintWriter()); } // switch (definition.getIndexType()) { // case GateConstants.IR_LUCENE_INVFILE: // this.indexManager = new LuceneIndexManager(); // this.indexManager.setIndexDefinition(definition); // this.indexManager.setCorpus(this); // break; // } this.addedDocs = new Vector<Document>(); this.removedDocIDs = new Vector<String>(); this.changedDocs = new Vector<Document>(); } }
/** This method is called when the SAX parser encounts the beginning of the XML document. */ @Override public void startDocument() throws org.xml.sax.SAXException { // init of variables in the parent super.startDocument(); /** * We will attempt to add namespace feature info to each namespaced element only if three * parameters are set in the global or local config file: ADD_NAMESPACE_FEATURES: boolean flag * ELEMENT_NAMESPACE_URI: feature name to use to hold namespace uri ELEMENT_NAMESPACE_PREFIX: * feature name to use to hold namespace prefix */ OptionsMap configData = Gate.getUserConfig(); boolean addNSFeature = Boolean.parseBoolean((String) configData.get(GateConstants.ADD_NAMESPACE_FEATURES)); namespaceURIFeature = (String) configData.get(GateConstants.ELEMENT_NAMESPACE_URI); namespacePrefixFeature = (String) configData.get(GateConstants.ELEMENT_NAMESPACE_PREFIX); deserializeNamespaceInfo = (addNSFeature && namespacePrefixFeature != null && !namespacePrefixFeature.isEmpty() && namespaceURIFeature != null && !namespaceURIFeature.isEmpty()); }
@Override public URL getResource(String name) { URL result = null; result = super.getResource(name); if (result != null) return result; if (getParent() == null) { result = Gate.getClassLoader().findResource(name); if (result != null) return result; } Set<GateClassLoader> children; synchronized (childClassLoaders) { children = new LinkedHashSet<GateClassLoader>(childClassLoaders.values()); } for (GateClassLoader cl : children) { if (!cl.isIsolated()) { result = cl.getResource(name); if (result != null) return result; } } return null; }
public void setConf(Configuration conf) { config = conf; if (applicationDescriptorPath == null) throw new RuntimeException("GATE application path is null"); // create one instance of the GATE application // need to avoid concurrent access to the application try { if (inited == false) { File gateHome = new File(applicationDescriptorPath.getFile()).getParentFile(); LOG.info("Setting GATE_HOME as " + gateHome); File pluginsHome = new File(gateHome, "plugins"); // the config files are in the job archive - not in the GATE // application // zip // File siteConfigFile = new File(conf // .getResource("site-gate.xml").getFile()); // File userConfig = new File(conf.getResource("user-gate.xml") // .getFile()); Gate.runInSandbox(true); Gate.setGateHome(gateHome); Gate.setPluginsHome(pluginsHome); // Gate.setSiteConfigFile(siteConfigFile); // Gate.setUserConfigFile(userConfig); // the builtInCreoleDir files // are stored in the same place as the config ones // Gate.setBuiltinCreoleDir(conf.getResource("creole.xml")); Gate.init(); inited = true; } corpus = Factory.newCorpus("DummyCorpus"); this.GATEapplication = (CorpusController) PersistenceManager.loadObjectFromUrl(applicationDescriptorPath); // load the annotation and feature filters from the configuration this.filters = GATEAnnotationFilters.getFilters(config); } catch (Exception e) { LOG.error("Encountered error while initialising GATE", e); throw new RuntimeException(e); } }
protected void initLocalData() { docTableModel = new DocumentTableModel(); try { documentsLoadedCount = Gate.getCreoleRegister().getAllInstances("gate.Document").size(); } catch (GateException exception) { exception.printStackTrace(); } }
public JSONObject persian_sentiment(String text) throws Exception { oncreate(); File PersianGapp = new File("C:/Users/mohammad/Desktop/New folder/Gate/application.xgapp"); // initialise GATE - this must be done before calling any GATE APIs Gate.init(); // load the saved application CorpusController application = (CorpusController) PersistenceManager.loadObjectFromFile(PersianGapp); // Create a Corpus to use. We recycle the same Corpus object for each // iteration. The string parameter to newCorpus() is simply the // GATE-internal name to use for the corpus. It has no particular // significance. Corpus corpus = Factory.newCorpus("BatchProcessApp Corpus"); application.setCorpus(corpus); // process the files one by one // load the document (using the specified encoding if one was given) Document doc = Factory.newDocument(text); // put the document in the corpus corpus.add(doc); // run the application application.execute(); String featureName = "Doc_sentiment"; FeatureMap features = doc.getFeatures(); // remove the document from the corpus again corpus.clear(); // doc.getFeatures(). // Release the document, as it is no longer needed Factory.deleteResource(doc); LinkedHashMap originalContent = (LinkedHashMap) features.get(featureName); String obj = (String) originalContent.get("sentiment"); // BigDecimal pos =(BigDecimal) originalContent.get("positive"); // BigDecimal neg =(BigDecimal) originalContent.get("negative"); // System.out.println(obj); // create Json for response to user JSONObject obj1 = new JSONObject(); obj1.put("sentiment", obj); /*obj1.put("positive",pos); //obj1.put("negative",neg); System.out.print("----------"); System.out.print(obj1); System.out.print("----------");*/ // application.cleanup(); return obj1; }
/** * Initialise the ANNIE system. This creates a "corpus pipeline" application that can be used to * run sets of documents through the extraction system. */ public void initAnnie() throws GateException, IOException { Out.prln("Initialising ANNIE..."); // load the ANNIE application from the saved state in plugins/ANNIE File pluginsHome = Gate.getPluginsHome(); File anniePlugin = new File(pluginsHome, "ANNIE"); File annieGapp = new File(anniePlugin, "ANNIE_with_defaults.gapp"); annieController = (CorpusController) PersistenceManager.loadObjectFromFile(annieGapp); Out.prln("...ANNIE loaded"); } // initAnnie()
/** * Every LR that is a CreoleListener (and other Listeners too) must override this method and make * sure it removes itself from the objects which it has been listening to. Otherwise, the object * will not be released from memory (memory leak!). */ @Override public void cleanup() { if (DEBUG) Out.prln("serial corpus cleanup called"); if (corpusListeners != null) corpusListeners = null; if (documents != null) documents.clear(); docDataList.clear(); Gate.getCreoleRegister().removeCreoleListener(this); if (this.dataStore != null) { this.dataStore.removeDatastoreListener(this); } }
public static void main(String[] args) { try { Gate.init(); TestHashGazetteer testGaz = new TestHashGazetteer(""); testGaz.setUp(); testGaz.testHashGazetteer(); testGaz.tearDown(); } catch (Exception e) { e.printStackTrace(); } } // main
public void actionPerformed(ActionEvent e) { List<Resource> loadedDocuments; try { // get all the documents loaded in the system loadedDocuments = Gate.getCreoleRegister().getAllInstances("gate.Document"); } catch (GateException ge) { // gate.Document is not registered in creole.xml....what is!? throw new GateRuntimeException( "gate.Document is not registered in the creole register!\n" + "Something must be terribly wrong...take a vacation!"); } Vector<String> docNames = new Vector<String>(); for (Resource loadedDocument : new ArrayList<Resource>(loadedDocuments)) { if (corpus.contains(loadedDocument)) { loadedDocuments.remove(loadedDocument); } else { docNames.add(loadedDocument.getName()); } } JList docList = new JList(docNames); docList.getSelectionModel().setSelectionInterval(0, docNames.size() - 1); docList.setCellRenderer(renderer); final JOptionPane optionPane = new JOptionPane( new JScrollPane(docList), JOptionPane.QUESTION_MESSAGE, JOptionPane.OK_CANCEL_OPTION); final JDialog dialog = optionPane.createDialog(CorpusEditor.this, "Add document(s) to this corpus"); docList.addMouseListener( new MouseAdapter() { public void mouseClicked(MouseEvent e) { if (e.getClickCount() == 2) { optionPane.setValue(JOptionPane.OK_OPTION); dialog.dispose(); } } }); dialog.setVisible(true); if (optionPane.getValue().equals(JOptionPane.OK_OPTION)) { int[] selectedIndices = docList.getSelectedIndices(); for (int selectedIndice : selectedIndices) { corpus.add((Document) loadedDocuments.get(selectedIndice)); } } changeMessage(); }
public RuleBaseViewer() { super(); String pluginPath = ""; try { File pluginDir = new File(Gate.getGateHome().toString() + "/plugins/Semano/"); pluginPath = pluginDir.getAbsoluteFile().toURI().toURL().toString(); if (pluginPath != null) { pluginPath = pluginPath.substring(5, pluginPath.length()); System.out.println("plugin directory: " + pluginPath); } editURL = new File(pluginPath + "pencil.gif").toURI().toURL(); deleteURL = new File(pluginPath + "delete.gif").toURI().toURL(); System.out.println("loading icons from " + editURL.toString()); } catch (MalformedURLException e) { // this should never be reached! System.err.println("icons not found!"); } }
/** * Constructor to create a SerialCorpus from a transient one. This is called by adopt() to store * the transient corpus and re-route the methods calls to it, until the corpus is sync-ed on disk. * After that, the transientCorpus will always be null, so the new functionality will be used * instead. */ protected SerialCorpusImpl(Corpus tCorpus) { // copy the corpus name and features from the one in memory this.setName(tCorpus.getName()); this.setFeatures(tCorpus.getFeatures()); docDataList = new ArrayList<DocumentData>(); // now cache the names of all docs for future use List<String> docNames = tCorpus.getDocumentNames(); for (int i = 0; i < docNames.size(); i++) { Document doc = tCorpus.get(i); docDataList.add(new DocumentData(docNames.get(i), null, doc.getClass().getName())); } // copy all the documents from the transient corpus documents = new ArrayList<Document>(); documents.addAll(tCorpus); // make sure we fire events when docs are added/removed/etc Gate.getCreoleRegister().addCreoleListener(this); }
/** * Populates this Persistence with the data that needs to be stored from the original source * object. */ @Override public void extractDataFromSource(Object source) throws PersistenceException { if (!(source instanceof ProcessingResource)) { throw new UnsupportedOperationException( getClass().getName() + " can only be used for " + ProcessingResource.class.getName() + " objects!\n" + source.getClass().getName() + " is not a " + ProcessingResource.class.getName()); } super.extractDataFromSource(source); Resource res = (Resource) source; ResourceData rData = Gate.getCreoleRegister().get(res.getClass().getName()); if (rData == null) throw new PersistenceException("Could not find CREOLE data for " + res.getClass().getName()); // now get the runtime params ParameterList params = rData.getParameterList(); try { // get the values for the init time parameters runtimeParams = Factory.newFeatureMap(); // this is a list of lists Iterator<List<Parameter>> parDisjIter = params.getRuntimeParameters().iterator(); while (parDisjIter.hasNext()) { Iterator<Parameter> parIter = parDisjIter.next().iterator(); while (parIter.hasNext()) { Parameter parameter = parIter.next(); String parName = parameter.getName(); Object parValue = res.getParameterValue(parName); ((FeatureMap) runtimeParams).put(parName, parValue); } } runtimeParams = PersistenceManager.getPersistentRepresentation(runtimeParams); } catch (ResourceInstantiationException rie) { throw new PersistenceException(rie); } }
public void setTransientSource(Object source) { if (!(source instanceof Corpus)) return; // the following initialisation is only valid when we're // constructing // this object from a transient one. If it has already been stored // in // a datastore, then the initialisation is done in readObject() // since // this method is the one called by serialisation, when objects // are restored. if (this.dataStore != null && this.lrPersistentId != null) return; Corpus tCorpus = (Corpus) source; // copy the corpus name and features from the one in memory this.setName(tCorpus.getName()); this.setFeatures(tCorpus.getFeatures()); docDataList = new ArrayList<DocumentData>(); // now cache the names of all docs for future use List<String> docNames = tCorpus.getDocumentNames(); for (int i = 0; i < docNames.size(); i++) { Document aDoc = tCorpus.get(i); docDataList.add(new DocumentData(docNames.get(i), null, aDoc.getClass().getName())); } // copy all the documents from the transient corpus documents = new ArrayList<Document>(); documents.addAll(tCorpus); this.addedDocs = new Vector<Document>(); this.removedDocIDs = new Vector<String>(); this.changedDocs = new Vector<Document>(); // make sure we fire events when docs are added/removed/etc Gate.getCreoleRegister().addCreoleListener(this); }
/** * readObject - calls the default readObject() and then initialises the transient data * * @serialData Read serializable fields. No optional data read. */ private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException { s.defaultReadObject(); documents = new ArrayList<Document>(docDataList.size()); for (int i = 0; i < docDataList.size(); i++) documents.add(null); corpusListeners = new Vector<CorpusListener>(); // finally set the creole listeners if the LR is like that Gate.getCreoleRegister().addCreoleListener(this); if (this.dataStore != null) this.dataStore.addDatastoreListener(this); // if indexed construct the manager. /*IndexDefinition definition = (IndexDefinition)this.getFeatures().get( GateConstants.CORPUS_INDEX_DEFINITION_FEATURE_KEY); if(definition != null) { String className = definition.getIrEngineClassName(); try { // Class aClass = Class.forName(className); Class<?> aClass = Class.forName(className, true, Gate.getClassLoader()); IREngine engine = (IREngine)aClass.newInstance(); this.indexManager = engine.getIndexmanager(); this.indexManager.setIndexDefinition(definition); this.indexManager.setCorpus(this); } catch(Exception e) { e.printStackTrace(Err.getPrintWriter()); } // switch (definition.getIndexType()) { // case GateConstants.IR_LUCENE_INVFILE: // this.indexManager = new LuceneIndexManager(); // this.indexManager.setIndexDefinition(definition); // this.indexManager.setCorpus(this); // break; // } this.addedDocs = new Vector<Document>(); this.removedDocIDs = new Vector<String>(); this.changedDocs = new Vector<Document>(); }*/ } // readObject
/** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { /* Parse command line arguments */ Getopt g = new Getopt("gateExtractor", args, "i:r:te"); g.setOpterr(false); String inputPath = ""; String outputPath = ""; boolean train = false; boolean eval = false; boolean run = false; int c; String arg; while ((c = g.getopt()) != -1) { switch (c) { case 'i': arg = g.getOptarg(); if (arg == null || arg.isEmpty()) { usage("Please provide an input path"); } inputPath = arg; break; case 'r': run = true; arg = g.getOptarg(); if (arg == null || arg.isEmpty()) { usage("Please provide an output path"); } outputPath = arg; break; case 't': train = true; break; case 'e': eval = true; break; case '?': default: usage(null); } } if (args.length == 0 || (!run && !train && !eval)) { usage("Nothing to do."); } if (inputPath == null || inputPath.isEmpty()) { usage("Please provide an input path"); } if (run && (outputPath == null || outputPath.isEmpty())) { usage("Please provide an output directory!"); } if (train && eval) { usage("Only one mode allowed at a time"); } if (train && run) { usage("Only one mode allowed at a time"); } if (eval && run) { usage("Only one mode allowed at a time"); } /* Initialize GATE */ String location = new File(Main.class.getProtectionDomain().getCodeSource().getLocation().getPath()) .getParent(); String resourcesFolder = location + "/resources"; Gate.setGateHome(new File(resourcesFolder)); /* Create ml-config.xml with threads */ createConfig(resourcesFolder + File.separator); Gate.init(); /* Load Corpus */ log.info("Loading Corpus ... "); Corpus corpus = Factory.newCorpus("Training Corpus"); File directory = new File(inputPath); URL url = directory.toURI().toURL(); corpus.populate(url, null, null, true); log.info("Done loading Corpus!"); Pipeline pipeline = null; /* Do Tagging */ pipeline = new Tagger(); pipeline.run(corpus, resourcesFolder); /* Train */ if (train) { pipeline = new Trainer(); pipeline.run(corpus, resourcesFolder); } /* Apply learned rules */ if (run) { pipeline = new Extractor(); pipeline.run(corpus, resourcesFolder); ExecutorService executorService = Executors.newFixedThreadPool(20); for (int i = 0; i < corpus.size(); i++) { executorService.execute(new OutputGenerator(outputPath, corpus.get(i))); } executorService.shutdown(); executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS); } /* Evaluate results */ if (eval) { pipeline = new Evaluator(); pipeline.run(corpus, resourcesFolder); } /* Clean up */ Factory.deleteResource(corpus); outputFile_mlConfigThreads.delete(); }
@Override public Resource init() throws ResourceInstantiationException { gracefulExit = false; if (configFileURL == null) { gracefulExit = true; gate.util.Err.println("No configuration file provided!"); } if (japeURL == null) { gracefulExit = true; gate.util.Err.println("No JAPE grammar file provided!"); } // create the init params for the JAPE transducer FeatureMap params = Factory.newFeatureMap(); params.put(Transducer.TRANSD_GRAMMAR_URL_PARAMETER_NAME, japeURL); // Code borrowed from Mark Greenwood's Measurements PR if (japeTransducer == null) { // if this is the first time we are running init then actually create a // new transducer as we don't already have one FeatureMap hidden = Factory.newFeatureMap(); Gate.setHiddenAttribute(hidden, true); japeTransducer = (Transducer) Factory.createResource("gate.creole.Transducer", params, hidden); } else { // we are being run through a call to reInit so simply re-init the // underlying JAPE transducer japeTransducer.setParameterValues(params); japeTransducer.reInit(); } ConfigReader config = new ConfigReader(configFileURL); gracefulExit = config.config(); try { HashMap<String, String> options = config.getOptions(); patternMap = new HashMap<String, Pattern>(); addSuffixPattern("disease_suffix", options); addWordPattern("disease_abbrevs", options); addWordPattern("disease_sense", options); addWordExtraPattern("disease_sense_context", options); addPossessiveWordPattern("disease_named_syndrome", options); addWordExtraPattern("disease_generic_context", options); addWordExtraPattern("disease_anatomy_context", options); addSuffixPluralPattern("procedure_suffix", options); addWordPluralPattern("procedure_key", options); addWordExtraPattern("procedure_anatomy_context", options); addWordPluralPattern("symptom_key", options); addWordPattern("test_key", options); addSuffixPattern("anatomy_suffix_adjective", options); addSuffixPattern("anatomy_suffix", options); addPrefixPattern("anatomy_prefix", options); addWordPattern("anatomy_position", options); addWordPluralPattern("anatomy_space_region_junction", options); addWordPattern("anatomy_part_adjective", options); addWordPattern("anatomy_latin_noun", options); addWordPattern("anatomy_muscle", options); addWordPluralPattern("anatomy_part", options); addWordPluralPattern("anatomy_fluid", options); } catch (NullPointerException ne) { gracefulExit = true; gate.util.Err.println( "Missing or unset configuration options. Please check configuration file."); } return this; } // end init()
/** * Run from the command-line, with a list of URLs as argument. * * <p><B>NOTE:</B><br> * This code will run with all the documents in memory - if you want to unload each from memory * after use, add code to store the corpus in a DataStore. */ public static void main(String args[]) throws GateException, IOException { // initialise the GATE library Out.prln("Initialising GATE..."); Gate.init(); Out.prln("...GATE initialised"); // initialise ANNIE (this may take several minutes) StandAloneAnnie annie = new StandAloneAnnie(); annie.initAnnie(); // create a GATE corpus and add a document for each command-line // argument Corpus corpus = Factory.newCorpus("StandAloneAnnie corpus"); for (int i = 0; i < args.length; i++) { URL u = new URL(args[i]); FeatureMap params = Factory.newFeatureMap(); params.put("sourceUrl", u); params.put("preserveOriginalContent", new Boolean(true)); params.put("collectRepositioningInfo", new Boolean(true)); Out.prln("Creating doc for " + u); Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params); corpus.add(doc); } // for each of args // tell the pipeline about the corpus and run it annie.setCorpus(corpus); annie.execute(); // for each document, get an XML document with the // person and location names added Iterator iter = corpus.iterator(); int count = 0; String startTagPart_1 = "<span GateID=\""; String startTagPart_2 = "\" title=\""; String startTagPart_3 = "\" style=\"background:Red;\">"; String endTag = "</span>"; while (iter.hasNext()) { Document doc = (Document) iter.next(); AnnotationSet defaultAnnotSet = doc.getAnnotations(); Set annotTypesRequired = new HashSet(); annotTypesRequired.add("Person"); annotTypesRequired.add("Location"); Set<Annotation> peopleAndPlaces = new HashSet<Annotation>(defaultAnnotSet.get(annotTypesRequired)); FeatureMap features = doc.getFeatures(); String originalContent = (String) features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME); RepositioningInfo info = (RepositioningInfo) features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME); ++count; File file = new File("StANNIE_" + count + ".HTML"); Out.prln("File name: '" + file.getAbsolutePath() + "'"); if (originalContent != null && info != null) { Out.prln("OrigContent and reposInfo existing. Generate file..."); Iterator it = peopleAndPlaces.iterator(); Annotation currAnnot; SortedAnnotationList sortedAnnotations = new SortedAnnotationList(); while (it.hasNext()) { currAnnot = (Annotation) it.next(); sortedAnnotations.addSortedExclusive(currAnnot); } // while StringBuffer editableContent = new StringBuffer(originalContent); long insertPositionEnd; long insertPositionStart; // insert anotation tags backward Out.prln("Unsorted annotations count: " + peopleAndPlaces.size()); Out.prln("Sorted annotations count: " + sortedAnnotations.size()); for (int i = sortedAnnotations.size() - 1; i >= 0; --i) { currAnnot = (Annotation) sortedAnnotations.get(i); insertPositionStart = currAnnot.getStartNode().getOffset().longValue(); insertPositionStart = info.getOriginalPos(insertPositionStart); insertPositionEnd = currAnnot.getEndNode().getOffset().longValue(); insertPositionEnd = info.getOriginalPos(insertPositionEnd, true); if (insertPositionEnd != -1 && insertPositionStart != -1) { editableContent.insert((int) insertPositionEnd, endTag); editableContent.insert((int) insertPositionStart, startTagPart_3); editableContent.insert((int) insertPositionStart, currAnnot.getType()); editableContent.insert((int) insertPositionStart, startTagPart_2); editableContent.insert((int) insertPositionStart, currAnnot.getId().toString()); editableContent.insert((int) insertPositionStart, startTagPart_1); } // if } // for FileWriter writer = new FileWriter(file); writer.write(editableContent.toString()); writer.close(); } // if - should generate else if (originalContent != null) { Out.prln("OrigContent existing. Generate file..."); Iterator it = peopleAndPlaces.iterator(); Annotation currAnnot; SortedAnnotationList sortedAnnotations = new SortedAnnotationList(); while (it.hasNext()) { currAnnot = (Annotation) it.next(); sortedAnnotations.addSortedExclusive(currAnnot); } // while StringBuffer editableContent = new StringBuffer(originalContent); long insertPositionEnd; long insertPositionStart; // insert anotation tags backward Out.prln("Unsorted annotations count: " + peopleAndPlaces.size()); Out.prln("Sorted annotations count: " + sortedAnnotations.size()); for (int i = sortedAnnotations.size() - 1; i >= 0; --i) { currAnnot = (Annotation) sortedAnnotations.get(i); insertPositionStart = currAnnot.getStartNode().getOffset().longValue(); insertPositionEnd = currAnnot.getEndNode().getOffset().longValue(); if (insertPositionEnd != -1 && insertPositionStart != -1) { editableContent.insert((int) insertPositionEnd, endTag); editableContent.insert((int) insertPositionStart, startTagPart_3); editableContent.insert((int) insertPositionStart, currAnnot.getType()); editableContent.insert((int) insertPositionStart, startTagPart_2); editableContent.insert((int) insertPositionStart, currAnnot.getId().toString()); editableContent.insert((int) insertPositionStart, startTagPart_1); } // if } // for FileWriter writer = new FileWriter(file); writer.write(editableContent.toString()); writer.close(); } else { Out.prln("Content : " + originalContent); Out.prln("Repositioning: " + info); } String xmlDocument = doc.toXml(peopleAndPlaces, false); String fileName = new String("StANNIE_toXML_" + count + ".HTML"); FileWriter writer = new FileWriter(fileName); writer.write(xmlDocument); writer.close(); } // for each doc } // main
/** Releases all resources and listeners */ public void cleanup() { Gate.getCreoleRegister().removeCreoleListener(this); // remove the annotationSetListener as well }
public static void main(String[] args) throws Exception { int num_threads = Integer.parseInt(args[0]); int chunk_size = 100000; int total_lines_read = 0; if (args.length > 4) chunk_size = Integer.parseInt(args[4]); // initialise GATE - this must be done before calling any GATE APIs Gate.init(); CorpusController application = (CorpusController) PersistenceManager.loadObjectFromFile(new File("TJInfoExtractor/application.xgapp")); List<CorpusController> applicationList = new ArrayList<CorpusController>(); for (int i = 0; i < num_threads; ++i) applicationList.add((CorpusController) Factory.duplicate(application)); String outfile = "Out.csv"; if (args.length > 2) outfile = args[2]; PrintWriter writer = new PrintWriter(outfile, "UTF-8"); writer.println( "Perspective_1st,Perspective_3rd,Name,Age,Cost,Height_ft,Height_in,Weight,Cup,Chest,Waist,Hip,Ethnicity,SkinColor,EyeColor,HairColor,Restriction_Type,Restriction_Ethnicity,Restriction_Age,PhoneNumber,AreaCode_State,AreaCode_Cities,Email,Url,Media"); outfile = "Out.txt"; if (args.length > 3) outfile = args[3]; PrintWriter writer2 = new PrintWriter(outfile, "UTF-8"); // load the document System.out.println("Reading document " + args[1] + "..."); BufferedReader br = new BufferedReader(new FileReader(args[1])); Boolean done = false; while (!done) { List<String> FileLines = new ArrayList<String>(); // Create container for results List<String> AnnotationResults = new ArrayList<String>(); List<String> AnnotationText = new ArrayList<String>(); int LinesRead = 0; String fileline; // read the file while (true) { if (LinesRead >= chunk_size) break; if ((fileline = br.readLine()) == null) { done = true; break; } FileLines.add(fileline); LinesRead++; total_lines_read++; } // launch threads to process each chunk int step = (int) Math.ceil(((double) FileLines.size()) / ((double) num_threads)); List<ExtractorThread> pool = new ArrayList<ExtractorThread>(); for (int i = 0; i < num_threads; ++i) { pool.add( new ExtractorThread( FileLines.subList(i * step, Math.min((i + 1) * step, FileLines.size())), applicationList.get(i), i)); } for (int i = 0; i < num_threads; ++i) { pool.get(i).t.join(); if (pool.get(i).results != null) AnnotationResults.addAll(pool.get(i).results); if (pool.get(i).text != null) AnnotationText.addAll(pool.get(i).text); } for (String l : AnnotationResults) writer.println(l); for (String l : AnnotationText) writer2.println(l); System.out.println("Processed " + total_lines_read + " lines..."); } br.close(); writer.close(); writer2.close(); System.out.println("All done"); }
/** * The main entry point. First we parse the command line options (see usage() method for details), * then we take all remaining command line parameters to be file names to process. Each file is * loaded, processed using the application and the results written to the output file * (inputFile.out.xml). */ public static void main(String[] args) throws Exception { parseCommandLine(args); // initialise GATE - this must be done before calling any GATE APIs Gate.init(); // load the saved application CorpusController application = (CorpusController) PersistenceManager.loadObjectFromFile(gappFile); // Create a Corpus to use. We recycle the same Corpus object for each // iteration. The string parameter to newCorpus() is simply the // GATE-internal name to use for the corpus. It has no particular // significance. ArrayList<String> files = getFilesFromDir(inputDir); gate.Corpus corpus = createCorpus(files); // Corpus corpus = Factory.newCorpus("BatchProcessApp Corpus"); application.setCorpus(corpus); System.out.println("Processing " + files.size() + " files"); // process the files one by one for (int i = 0; i < files.size(); i++) { // load the document (using the specified encoding if one was given) File docFile = new File(files.get(i)); System.out.print("Processing document " + docFile + " (" + i + ") ..."); Document doc = Factory.newDocument(docFile.toURL(), encoding); // put the document in the corpus corpus.add(doc); // run the application application.execute(); // remove the document from the corpus again corpus.clear(); String docXMLString = null; // if we want to just write out specific annotation types, we must // extract the annotations into a Set if (annotTypesToWrite != null) { // Create a temporary Set to hold the annotations we wish to write out Set annotationsToWrite = new HashSet(); // we only extract annotations from the default (unnamed) AnnotationSet // in this example AnnotationSet defaultAnnots = doc.getAnnotations("Output"); Iterator annotTypesIt = annotTypesToWrite.iterator(); while (annotTypesIt.hasNext()) { // extract all the annotations of each requested type and add them to // the temporary set AnnotationSet annotsOfThisType = defaultAnnots.get((String) annotTypesIt.next()); if (annotsOfThisType != null) { annotationsToWrite.addAll(annotsOfThisType); } } // create the XML string using these annotations docXMLString = doc.toXml(annotationsToWrite, true); } // otherwise, just write out the whole document as GateXML else { docXMLString = doc.toXml(); } // Release the document, as it is no longer needed Factory.deleteResource(doc); // output the XML to <inputFile>.out.xml System.out.println("Writing file " + docFile.getName()); String outputFileName = docFile.getName() + ".out.xml"; // File outputFile = new File(docFile.getParentFile(), outputFileName); File outputFile = new File(new File(outputDir).getAbsolutePath(), outputFileName); // Write output files using the same encoding as the original FileOutputStream fos = new FileOutputStream(outputFile); BufferedOutputStream bos = new BufferedOutputStream(fos); OutputStreamWriter out; if (encoding == null) { out = new OutputStreamWriter(bos); } else { out = new OutputStreamWriter(bos, encoding); } out.write(docXMLString); out.close(); System.out.println("done"); } // for each file System.out.println("All done"); } // void main(String[] args)
/** Delegate loading to the super class (loadClass has protected access there). */ private Class<?> loadClass( String name, boolean resolve, boolean localOnly, Set<GateClassLoader> visited) throws ClassNotFoundException { Class<?> previous = findLoadedClass(name); if (previous != null) { if (DEBUG) System.out.println("CACHE HIT: " + name + " -- " + id); return previous; } if (DEBUG) System.out.println( name + " -- " + id + ": " + localOnly + "/" + isolated + "/" + getParent()); // to ensure we don't end up looping through the same classloader // twice we // keep a track of which ones we have already visited visited.add(this); if (!this.equals(Gate.getClassLoader())) { try { // first we see if we can find the class via the system class // path Class<?> found = Gate.getClassLoader().getParent().loadClass(name); URL url = findResource(name.replace('.', '/') + ".class"); if (url != null) log.warn( name + " is available via both the system classpath and a plugin; the plugin classes will be ignored"); // if we got to here then the class has been found via the // system // classpath so return it and stop looking return found; } catch (ClassNotFoundException e) { // this can safely be ignored } } try { // try loading and returning by looking within this classloader return super.loadClass(name, resolve); } catch (ClassNotFoundException e) { // this can safely be ignored } if (this.getParent() != null && this.getParent() instanceof GateClassLoader) visited.add((GateClassLoader) this.getParent()); if (!localOnly) { // if we aren't just looking locally then... if (getParent() == null) { try { // if this classloader doesn't have a parent then it must be // disposable, but as we haven't found the class we need yet // we should // now look into the main GATE classloader return Gate.getClassLoader().loadClass(name, resolve, false, visited); } catch (ClassNotFoundException e) { // this can safely be ignored } } Set<GateClassLoader> children; synchronized (childClassLoaders) { children = new LinkedHashSet<GateClassLoader>(childClassLoaders.values()); } // make sure we don't visit a classloader we've already been // through children.removeAll(visited); for (GateClassLoader cl : children) { // the class isn't to be found in either this classloader or the // main // GATE classloader so let's check all the other disposable // classloaders try { if (!cl.isIsolated()) return cl.loadClass(name, resolve, true, visited); } catch (ClassNotFoundException e) { // this can safely be ignored } } } // if we got to here then no matter where we have looked we have // been unable // to find the class requested so throw an exception throw new ClassNotFoundException(name); }
/** This method is called when all characters between specific tags have been read completely */ public void charactersAction(char[] text, int start, int length) throws SAXException { // correction of real offset. Didn't affect on other data. super.characters(text, start, length); // create a string object based on the reported text String content = new String(text, start, length); StringBuffer contentBuffer = new StringBuffer(""); int tmpDocContentSize = tmpDocContent.length(); boolean incrementStartIndex = false; boolean addExtraSpace = true; if (Gate.getUserConfig().get(GateConstants.DOCUMENT_ADD_SPACE_ON_UNPACK_FEATURE_NAME) != null) { addExtraSpace = Gate.getUserConfig() .getBoolean(GateConstants.DOCUMENT_ADD_SPACE_ON_UNPACK_FEATURE_NAME) .booleanValue(); } // If the first char of the text just read "text[0]" is NOT whitespace AND // the last char of the tmpDocContent[SIZE-1] is NOT whitespace then // concatenation "tmpDocContent + content" will result into a new different // word... and we want to avoid that, because the tokenizer, gazetter and // Jape work on the raw text and concatenating tokens might be not good. if (tmpDocContentSize != 0 && content.length() != 0 && !Character.isWhitespace(content.charAt(0)) && !Character.isWhitespace(tmpDocContent.charAt(tmpDocContentSize - 1))) { // If we are here it means that a concatenation between the last // token in the tmpDocContent and the content(which doesn't start // with a white space) will be performed. In order to prevent this, // we will add a " " space char in order to assure that the 2 tokens // stay apart. Howerver we will except from this rule the most known // internal entities like &, <, >, etc if (( // Testing the length against 1 makes it more likely that // an internal entity was called. characters() gets called for // each entity separately. (content.length() == 1) && (content.charAt(0) == '&' || content.charAt(0) == '<' || content.charAt(0) == '>' || content.charAt(0) == '"' || content.charAt(0) == '\'')) || (tmpDocContent.charAt(tmpDocContentSize - 1) == '&' || tmpDocContent.charAt(tmpDocContentSize - 1) == '<' || tmpDocContent.charAt(tmpDocContentSize - 1) == '>' || tmpDocContent.charAt(tmpDocContentSize - 1) == '"' || tmpDocContent.charAt(tmpDocContentSize - 1) == '\'')) { // do nothing. The content will be appended } else if (!addExtraSpace) { } else { // In all other cases append " " contentBuffer.append(" "); incrementStartIndex = true; } // End if } // End if // put the repositioning information if (reposInfo != null) { if (!(start == 0 && length == 1 && text.length <= 2)) { // normal piece of text reposInfo.addPositionInfo( getRealOffset(), content.length(), tmpDocContent.length() + contentBuffer.length(), content.length()); if (DEBUG) { Out.println("Info: " + getRealOffset() + ", " + content.length()); Out.println("Start: " + start + " len" + length); } // DEBUG } else { // unicode char or &xxx; coding // Reported from the parser offset is 0 // The real offset should be found in the ampCodingInfo structure. long lastPosition = 0; RepositioningInfo.PositionInfo pi; if (reposInfo.size() > 0) { pi = reposInfo.get(reposInfo.size() - 1); lastPosition = pi.getOriginalPosition(); } // if for (int i = 0; i < ampCodingInfo.size(); ++i) { pi = ampCodingInfo.get(i); if (pi.getOriginalPosition() > lastPosition) { // found reposInfo.addPositionInfo( pi.getOriginalPosition(), pi.getOriginalLength(), tmpDocContent.length() + contentBuffer.length(), content.length()); break; } // if } // for } // if } // if // update the document content contentBuffer.append(content); // calculate the End index for all the elements of the stack // the expression is : End index = Current doc length + text length Long end = new Long(tmpDocContent.length() + contentBuffer.length()); CustomObject obj = null; // Iterate through stack to modify the End index of the existing elements Iterator<CustomObject> anIterator = stack.iterator(); while (anIterator.hasNext()) { // get the object and move to the next one obj = anIterator.next(); if (incrementStartIndex && obj.getStart().equals(obj.getEnd())) { obj.setStart(new Long(obj.getStart().longValue() + 1)); } // End if // sets its End index obj.setEnd(end); } // End while tmpDocContent.append(contentBuffer.toString()); } // characters();
protected void initListeners() { // mouse double-click to open the document // context menu to get the actions for the selection docTable.addMouseListener( new MouseAdapter() { public void mouseClicked(MouseEvent e) { processMouseEvent(e); } public void mousePressed(MouseEvent e) { if (e.isPopupTrigger()) { processMouseEvent(e); } } public void mouseReleased(MouseEvent e) { if (e.isPopupTrigger()) { processMouseEvent(e); } } private void processMouseEvent(MouseEvent e) { int row = docTable.rowAtPoint(e.getPoint()); if (row == -1) { return; } if (e.isPopupTrigger()) { // context menu if (!docTable.isRowSelected(row)) { // if right click outside the selection then reset selection docTable.getSelectionModel().setSelectionInterval(row, row); } JPopupMenu popup = new XJPopupMenu(); popup.add(openDocumentsAction); popup.add(removeDocumentsAction); popup.show(docTable, e.getPoint().x, e.getPoint().y); } else if (e.getID() == MouseEvent.MOUSE_CLICKED && e.getClickCount() == 2) { // open document on double-click openDocumentsAction.actionPerformed(null); } } }); // Enter key opens the selected documents docTable.addKeyListener( new KeyAdapter() { public void keyPressed(KeyEvent e) { if (e.getKeyCode() == KeyEvent.VK_ENTER) { openDocumentsAction.actionPerformed(null); } } }); docTable .getSelectionModel() .addListSelectionListener( new ListSelectionListener() { public void valueChanged(ListSelectionEvent e) { // enable/disable buttons according to the selection removeDocumentsAction.setEnabled(docTable.getSelectedRowCount() > 0); openDocumentsAction.setEnabled(docTable.getSelectedRowCount() > 0); moveUpAction.setEnabled( docTable.getSelectedRowCount() > 0 && !docTable.isRowSelected(0)); moveDownAction.setEnabled( docTable.getSelectedRowCount() > 0 && !docTable.isRowSelected(docTable.getRowCount() - 1)); } }); Gate.getCreoleRegister() .addCreoleListener( new CreoleListener() { public void resourceLoaded(CreoleEvent e) { if (e.getResource() instanceof Document) { documentsLoadedCount++; changeMessage(); } } public void resourceUnloaded(CreoleEvent e) { if (e.getResource() instanceof Document) { documentsLoadedCount--; changeMessage(); } } public void datastoreOpened(CreoleEvent e) { /* do nothing */ } public void datastoreCreated(CreoleEvent e) { /* do nothing */ } public void datastoreClosed(CreoleEvent e) { /* do nothing */ } public void resourceRenamed(Resource resource, String oldName, String newName) { /* do nothing */ } }); }
/** * Loads any custom operators and annotation accessors into the ConstraintFactory. * * @throws ResourceInstantiationException */ protected void initCustomConstraints() throws ResourceInstantiationException { // Load operators if (operators != null) { for (String opName : operators) { Class<? extends ConstraintPredicate> clazz = null; try { clazz = Class.forName(opName, true, Gate.getClassLoader()) .asSubclass(ConstraintPredicate.class); } catch (ClassNotFoundException e) { // if couldn't find it that way, try with current thread class loader try { clazz = Class.forName(opName, true, Thread.currentThread().getContextClassLoader()) .asSubclass(ConstraintPredicate.class); } catch (ClassNotFoundException e1) { throw new ResourceInstantiationException( "Cannot load class for operator: " + opName, e1); } } catch (ClassCastException cce) { throw new ResourceInstantiationException( "Operator class '" + opName + "' must implement ConstraintPredicate"); } // instantiate an instance of the class so can get the operator string try { ConstraintPredicate predicate = clazz.newInstance(); String opSymbol = predicate.getOperator(); // now store it in ConstraintFactory Factory.getConstraintFactory().addOperator(opSymbol, clazz); } catch (Exception e) { throw new ResourceInstantiationException( "Cannot instantiate class for operator: " + opName, e); } } } // Load annotationAccessors if (annotationAccessors != null) { for (String accessorName : annotationAccessors) { Class<? extends AnnotationAccessor> clazz = null; try { clazz = Class.forName(accessorName, true, Gate.getClassLoader()) .asSubclass(AnnotationAccessor.class); } catch (ClassNotFoundException e) { // if couldn't find it that way, try with current thread class loader try { clazz = Class.forName(accessorName, true, Thread.currentThread().getContextClassLoader()) .asSubclass(AnnotationAccessor.class); } catch (ClassNotFoundException e1) { throw new ResourceInstantiationException( "Cannot load class for accessor: " + accessorName, e1); } } catch (ClassCastException cce) { throw new ResourceInstantiationException( "Operator class '" + accessorName + "' must implement AnnotationAccessor"); } // instantiate an instance of the class so can get the meta-property name string try { AnnotationAccessor aa = clazz.newInstance(); String accSymbol = (String) aa.getKey(); // now store it in ConstraintFactory Factory.getConstraintFactory().addMetaProperty(accSymbol, clazz); } catch (Exception e) { throw new ResourceInstantiationException( "Cannot instantiate class for accessor: " + accessorName, e); } } } }
public GateAnalyzer(String appPath) throws Exception { Gate.init(); controller = (SerialAnalyserController) PersistenceManager.loadObjectFromFile(new File(appPath)); }