public String translate( String sourceLang, String intermediateLang, String targetLang, String source) { try { File taeDescriptor = new File(descriptor); // File inputFile = new File(source); XMLInputSource in = new XMLInputSource(taeDescriptor); ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in); AnalysisEngine tae = UIMAFramework.produceAnalysisEngine(specifier); // String document = FileUtils.file2String(inputFile, "UTF-8"); JCas jcas = tae.newJCas(); jcas.setDocumentText(source); jcas.setDocumentLanguage(sourceLang + "," + intermediateLang + "," + targetLang); tae.process(jcas); String result = getResult(jcas, sourceLang, targetLang); return result; } catch (Exception e) { e.printStackTrace(); } return null; }
public void setCollectionReader() { try { CollectionReaderDescription collectionReaderDescription = UIMAFramework.getXMLParser() .parseCollectionReaderDescription(new XMLInputSource(collectionReaderDescriptor)); CollectionReader collectionReader = UIMAFramework.produceCollectionReader(collectionReaderDescription); uimaEEEngine.setCollectionReader(collectionReader); } catch (ResourceInitializationException e) { } catch (InvalidXMLException e) { } catch (IOException e) { } }
@Override public ConfigurationParameterSettings settings() throws Exception { ConfigurationParameterSettings parameters = this.getTermSuiteTool().getSettings().getMetaData().getConfigurationParameterSettings(); ConfigurationParameterSettings settings = UIMAFramework.getResourceSpecifierFactory().createConfigurationParameterSettings(); settings.setParameterValue( "Directory", (String) parameters.getParameterValue("OutputDirectory")); return settings; }
@Override public void load(String id, InputStream inputStream) throws IOException { if (!this.isLoaded(id)) { this.getLoaded().add(id); UIMAFramework.getLogger().log(Level.INFO, "Loading " + id); Set<Entry<String, String>> entries = this.parse(inputStream); for (Entry<String, String> entry : entries) { String source = entry.getKey(); String target = entry.getValue(); this.add(source, target); } } }
/** * Constructor for the class. * * @param args command line arguments into the program - see class description */ public SimpleQuestionRunCPE(String args[]) throws Exception { mStartTime = System.currentTimeMillis(); if (args.length == 0) { args = new String[1]; args[0] = new String("src/main/resources/CpeQuestionDescriptor.xml"); } // check command line args if (args.length < 1) { printUsageMessage(); System.exit(1); } // parse CPE descriptor System.out.println("Parsing CPE Descriptor"); CpeDescription cpeDesc = UIMAFramework.getXMLParser().parseCpeDescription(new XMLInputSource(args[0])); // instantiate CPE System.out.println("Instantiating CPE"); mCPE = UIMAFramework.produceCollectionProcessingEngine(cpeDesc); // Create and register a Status Callback Listener mCPE.addStatusCallbackListener(new StatusCallbackListenerImpl()); // Start Processing System.out.println("Running CPE"); mCPE.process(); // Allow user to abort by pressing Enter System.out.println("To abort processing, type \"abort\" and press enter."); while (true) { String line = new BufferedReader(new InputStreamReader(System.in)).readLine(); if ("abort".equals(line) && mCPE.isProcessing()) { System.out.println("Aborting..."); mCPE.stop(); break; } } }
/** * Method that initializes all vital prerequisites, including POS Tagger * * @param language Language to be processed with this copy of HeidelTime * @param typeToProcess Domain type to be processed * @param outputType Output type * @param configPath Path to the configuration file for HeidelTimeStandalone * @param posTagger POS Tagger to use for preprocessing * @param doIntervalTagging Whether or not to invoke the IntervalTagger */ public void initialize( Language language, DocumentType typeToProcess, OutputType outputType, String configPath, POSTagger posTagger, Boolean doIntervalTagging) { logger.log( Level.INFO, "HeidelTimeStandalone initialized with language " + this.language.getName()); // set the POS tagger this.posTagger = posTagger; // set doIntervalTagging flag this.doIntervalTagging = doIntervalTagging; // read in configuration in case it's not yet initialized if (!Config.isInitialized()) { if (configPath == null) readConfigFile(CLISwitch.CONFIGFILE.getValue().toString()); else readConfigFile(configPath); } try { heidelTime = new HeidelTime(); heidelTime.initialize( new UimaContextImpl(language, typeToProcess, CLISwitch.VERBOSITY2.getIsActive())); logger.log(Level.INFO, "HeidelTime initialized"); } catch (Exception e) { e.printStackTrace(); logger.log(Level.WARNING, "HeidelTime could not be initialized"); } // Initialize JCas factory ------------- logger.log(Level.FINE, "Initializing JCas factory..."); try { TypeSystemDescription[] descriptions = new TypeSystemDescription[] { UIMAFramework.getXMLParser() .parseTypeSystemDescription( new XMLInputSource( this.getClass() .getClassLoader() .getResource(Config.get(Config.TYPESYSTEMHOME)))) }; jcasFactory = new JCasFactoryImpl(descriptions); logger.log(Level.INFO, "JCas factory initialized"); } catch (Exception e) { e.printStackTrace(); logger.log(Level.WARNING, "JCas factory could not be initialized"); } }
public static void main(String[] args) throws Exception { String sLine; long startTime = System.currentTimeMillis(); URL descUrl = VectorSpaceRetrieval.class.getResource( "/descriptors/retrievalsystem/VectorSpaceRetrieval.xml"); if (descUrl == null) { throw new IllegalArgumentException("Error opening VectorSpaceRetrieval.xml"); } // create AnalysisEngine XMLInputSource input = new XMLInputSource(descUrl); AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(input); AnalysisEngine anAnalysisEngine = UIMAFramework.produceAnalysisEngine(desc); CAS aCas = anAnalysisEngine.newCAS(); URL docUrl = VectorSpaceRetrieval.class.getResource("/data/documents.txt"); if (docUrl == null) { throw new IllegalArgumentException("Error opening data/documents.txt"); } BufferedReader br = new BufferedReader(new InputStreamReader(docUrl.openStream())); while ((sLine = br.readLine()) != null) { aCas.setDocumentText(sLine); anAnalysisEngine.process(aCas); aCas.reset(); } br.close(); br = null; anAnalysisEngine.collectionProcessComplete(); anAnalysisEngine.destroy(); long endTime = System.currentTimeMillis(); double totalTime = (endTime - startTime) / 1000.0; System.out.println("Total time taken: " + totalTime); }
@Test public void test() throws Exception { TypeSystemDescription tsd = TypeSystemDescriptionFactory.createTypeSystemDescription("desc.types.test-TypeSystem"); CollectionReaderDescription colReaderDesc = CollectionReaderFactory.createReaderDescription( XmiCollectionReader.class, tsd, XmiCollectionReader.PARAM_INPUTDIR, inputFileXMIDir); // configure AE XMLInputSource aeDescInput = new XMLInputSource(U2BAggregateDesc); AnalysisEngineDescription aeDesc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(aeDescInput); SimplePipeline.runPipeline(colReaderDesc, aeDesc); }
public static void main(String[] args) throws IOException, InvalidXMLException, CASException, ResourceInitializationException, SAXException { JCas jcas = null; // Leer el descriptor del anotador XMLParser xmlParser = UIMAFramework.getXMLParser(); XMLInputSource in = new XMLInputSource("desc/ej4/MetricsAnnotatorPipeline.xml"); // Crear un AE en base al descriptor AnalysisEngineDescription tsDesc = xmlParser.parseAnalysisEngineDescription(in); // Obtener el CAS jcas = CasCreationUtils.createCas(tsDesc).getJCas(); if (jcas != null) { // De-serializar la anotacion de un fichero FileInputStream inputStream = null; inputStream = new FileInputStream("resources/annotation.xmi"); XmiCasDeserializer.deserialize(inputStream, jcas.getCas()); // Obtener el texto de la anotacion String sofaString = jcas.getDocumentText(); System.out.println(sofaString); // Usar las anotaciones del fichero FSIterator it = jcas.getAnnotationIndex(Metric.type).iterator(); while (it.isValid()) { Metric metric = (Metric) it.get(); Number number = metric.getNumber(); Unit unit = metric.getUnit(); Double value = (number.getIsDouble()) ? number.getAbsoluteDoubleValue() : Double.valueOf(number.getAbsoluteIntegerValue()); System.out.println("==================="); System.out.println("Metric: " + metric.getCoveredText()); System.out.println("Real value: " + value * number.getSign() * unit.getMultiplier()); System.out.println("Base unit: " + unit.getBaseUnit()); it.moveToNext(); } } }
/** * Called when the processing of a Document is completed. <br> * The process status can be looked at and corresponding actions taken. * * @param aCas CAS corresponding to the completed processing * @param aStatus EntityProcessStatus that holds the status of all the events for aEntity */ public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) { // if there is an error above the individual document level, // an entityProcessStatus is created with a null value for entity if (aCas == null) { for (int i = 0; i < aStatus.getFailedComponentNames().size(); i++) { LOG.info("[{}] FailedComponentNames", aStatus.getFailedComponentNames().get(i)); } for (int i = 0; i < aStatus.getExceptions().size(); i++) { LOG.info("[{}] Exceptions", aStatus.getExceptions().get(i)); } return; } try { entityCount++; // FIXME int dataSize = 0; // // get size here // Type t = aCas.getTypeSystem().getType("uima.cpm.FileLocation"); // Feature f = t.getFeatureByBaseName("DocumentSize"); // FSIterator fsI = aCas.getAnnotationIndex(t).iterator(); // if (fsI.isValid()) { // dataSize = fsI.get().getIntValue(f); // } // // size += dataSize; // to handle exceptions occured in any of the components for the // entity if (aStatus.isException()) { for (int q = 0; q < aStatus.getExceptions().size(); q++) { Exception e = (Exception) aStatus.getExceptions().get(q); e.printStackTrace(); } } } catch (Exception io) { UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, "", io); } }
@Test public void test() throws Exception { String html = "<Parent>\n"; html += "<Child1>Some content</Child1>\n"; html += "<Child2 attribute=“someValue” />\n"; html += "<Child3>More content.</Child3>\n"; html += "</Parent>\n"; URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml"); if (urlA == null) { urlA = HtmlAnnotator.class .getClassLoader() .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml"); } URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml"); if (urlC == null) { urlC = HtmlAnnotator.class .getClassLoader() .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml"); } XMLInputSource inA = new XMLInputSource(urlA); ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA); AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA); aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false); aeA.reconfigure(); XMLInputSource inC = new XMLInputSource(urlC); ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC); AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC); aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false); aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true); aeC.setConfigParameterValue( HtmlConverter.PARAM_GAP_INDUCING_TAGS, new String[] {"child1", "child2", "child3"}); aeC.setConfigParameterValue(HtmlConverter.PARAM_GAP_TEXT, "$"); aeC.reconfigure(); CAS cas = aeA.newCAS(); Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG"); AnnotationIndex<AnnotationFS> ai = null; FSIterator<AnnotationFS> iterator = null; cas.setDocumentText(html); aeA.process(cas); aeC.process(cas); CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW); assertEquals("$Some content$$More content.", plainTextCas.getDocumentText()); ai = plainTextCas.getAnnotationIndex(tagType); iterator = ai.iterator(); assertEquals(4, ai.size()); assertEquals("$Some content$$More content.", iterator.next().getCoveredText()); assertEquals("$Some content", iterator.next().getCoveredText()); assertEquals("$", iterator.next().getCoveredText()); assertEquals("$More content.", iterator.next().getCoveredText()); cas.release(); }
@Test public void testExpandOffsets() throws Exception { String html = "<Parent>\n"; html += "<Child1>Some content</Child1>\n"; html += "<Child2 attribute=“someValue” />\n"; html += "<Child3>More content.</Child3>\n"; html += "</Parent>\n"; URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml"); if (urlA == null) { urlA = HtmlAnnotator.class .getClassLoader() .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml"); } URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml"); if (urlC == null) { urlC = HtmlAnnotator.class .getClassLoader() .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml"); } XMLInputSource inA = new XMLInputSource(urlA); ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA); AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA); aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false); aeA.reconfigure(); XMLInputSource inC = new XMLInputSource(urlC); ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC); AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC); aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false); aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true); aeC.setConfigParameterValue(HtmlConverter.PARAM_EXPAND_OFFSETS, true); aeC.reconfigure(); CAS cas = aeA.newCAS(); Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG"); Feature expandedFeature = tagType.getFeatureByBaseName("expandedOffsets"); AnnotationIndex<AnnotationFS> ai = null; FSIterator<AnnotationFS> iterator = null; cas.setDocumentText(html); aeA.process(cas); aeC.process(cas); CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW); assertEquals("Some contentMore content.", plainTextCas.getDocumentText()); ai = plainTextCas.getAnnotationIndex(tagType); iterator = ai.iterator(); assertEquals(4, ai.size()); AnnotationFS next = null; next = iterator.next(); assertEquals(false, next.getBooleanValue(expandedFeature)); assertEquals("Some contentMore content.", next.getCoveredText()); next = iterator.next(); assertEquals(false, next.getBooleanValue(expandedFeature)); assertEquals("Some content", next.getCoveredText()); next = iterator.next(); boolean b1 = next.getBooleanValue(expandedFeature); assertEquals("More content.", next.getCoveredText()); next = iterator.next(); boolean b2 = next.getBooleanValue(expandedFeature); assertEquals("More content.", next.getCoveredText()); // for one of these two annotation (with same offsets) the feature must be set to true assertEquals(true, b1 || b2); cas.release(); }