public String translate( String sourceLang, String intermediateLang, String targetLang, String source) { try { File taeDescriptor = new File(descriptor); // File inputFile = new File(source); XMLInputSource in = new XMLInputSource(taeDescriptor); ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in); AnalysisEngine tae = UIMAFramework.produceAnalysisEngine(specifier); // String document = FileUtils.file2String(inputFile, "UTF-8"); JCas jcas = tae.newJCas(); jcas.setDocumentText(source); jcas.setDocumentLanguage(sourceLang + "," + intermediateLang + "," + targetLang); tae.process(jcas); String result = getResult(jcas, sourceLang, targetLang); return result; } catch (Exception e) { e.printStackTrace(); } return null; }
public static void main(String[] args) throws Exception { String sLine; long startTime = System.currentTimeMillis(); URL descUrl = VectorSpaceRetrieval.class.getResource( "/descriptors/retrievalsystem/VectorSpaceRetrieval.xml"); if (descUrl == null) { throw new IllegalArgumentException("Error opening VectorSpaceRetrieval.xml"); } // create AnalysisEngine XMLInputSource input = new XMLInputSource(descUrl); AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(input); AnalysisEngine anAnalysisEngine = UIMAFramework.produceAnalysisEngine(desc); CAS aCas = anAnalysisEngine.newCAS(); URL docUrl = VectorSpaceRetrieval.class.getResource("/data/documents.txt"); if (docUrl == null) { throw new IllegalArgumentException("Error opening data/documents.txt"); } BufferedReader br = new BufferedReader(new InputStreamReader(docUrl.openStream())); while ((sLine = br.readLine()) != null) { aCas.setDocumentText(sLine); anAnalysisEngine.process(aCas); aCas.reset(); } br.close(); br = null; anAnalysisEngine.collectionProcessComplete(); anAnalysisEngine.destroy(); long endTime = System.currentTimeMillis(); double totalTime = (endTime - startTime) / 1000.0; System.out.println("Total time taken: " + totalTime); }
@Test public void test() throws Exception { String html = "<Parent>\n"; html += "<Child1>Some content</Child1>\n"; html += "<Child2 attribute=“someValue” />\n"; html += "<Child3>More content.</Child3>\n"; html += "</Parent>\n"; URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml"); if (urlA == null) { urlA = HtmlAnnotator.class .getClassLoader() .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml"); } URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml"); if (urlC == null) { urlC = HtmlAnnotator.class .getClassLoader() .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml"); } XMLInputSource inA = new XMLInputSource(urlA); ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA); AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA); aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false); aeA.reconfigure(); XMLInputSource inC = new XMLInputSource(urlC); ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC); AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC); aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false); aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true); aeC.setConfigParameterValue( HtmlConverter.PARAM_GAP_INDUCING_TAGS, new String[] {"child1", "child2", "child3"}); aeC.setConfigParameterValue(HtmlConverter.PARAM_GAP_TEXT, "$"); aeC.reconfigure(); CAS cas = aeA.newCAS(); Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG"); AnnotationIndex<AnnotationFS> ai = null; FSIterator<AnnotationFS> iterator = null; cas.setDocumentText(html); aeA.process(cas); aeC.process(cas); CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW); assertEquals("$Some content$$More content.", plainTextCas.getDocumentText()); ai = plainTextCas.getAnnotationIndex(tagType); iterator = ai.iterator(); assertEquals(4, ai.size()); assertEquals("$Some content$$More content.", iterator.next().getCoveredText()); assertEquals("$Some content", iterator.next().getCoveredText()); assertEquals("$", iterator.next().getCoveredText()); assertEquals("$More content.", iterator.next().getCoveredText()); cas.release(); }
@Test public void testExpandOffsets() throws Exception { String html = "<Parent>\n"; html += "<Child1>Some content</Child1>\n"; html += "<Child2 attribute=“someValue” />\n"; html += "<Child3>More content.</Child3>\n"; html += "</Parent>\n"; URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml"); if (urlA == null) { urlA = HtmlAnnotator.class .getClassLoader() .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml"); } URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml"); if (urlC == null) { urlC = HtmlAnnotator.class .getClassLoader() .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml"); } XMLInputSource inA = new XMLInputSource(urlA); ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA); AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA); aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false); aeA.reconfigure(); XMLInputSource inC = new XMLInputSource(urlC); ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC); AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC); aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false); aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true); aeC.setConfigParameterValue(HtmlConverter.PARAM_EXPAND_OFFSETS, true); aeC.reconfigure(); CAS cas = aeA.newCAS(); Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG"); Feature expandedFeature = tagType.getFeatureByBaseName("expandedOffsets"); AnnotationIndex<AnnotationFS> ai = null; FSIterator<AnnotationFS> iterator = null; cas.setDocumentText(html); aeA.process(cas); aeC.process(cas); CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW); assertEquals("Some contentMore content.", plainTextCas.getDocumentText()); ai = plainTextCas.getAnnotationIndex(tagType); iterator = ai.iterator(); assertEquals(4, ai.size()); AnnotationFS next = null; next = iterator.next(); assertEquals(false, next.getBooleanValue(expandedFeature)); assertEquals("Some contentMore content.", next.getCoveredText()); next = iterator.next(); assertEquals(false, next.getBooleanValue(expandedFeature)); assertEquals("Some content", next.getCoveredText()); next = iterator.next(); boolean b1 = next.getBooleanValue(expandedFeature); assertEquals("More content.", next.getCoveredText()); next = iterator.next(); boolean b2 = next.getBooleanValue(expandedFeature); assertEquals("More content.", next.getCoveredText()); // for one of these two annotation (with same offsets) the feature must be set to true assertEquals(true, b1 || b2); cas.release(); }