Java UIMAFramework.getXMLParser示例，org.apache.uima.UIMAFramework.getXMLParser Java示例

示例#1

0

显示文件

文件： TwoHopTranslator.java 项目： trangmx/langrid_uima

  public String translate(
      String sourceLang, String intermediateLang, String targetLang, String source) {
    try {
      File taeDescriptor = new File(descriptor);
      //			File inputFile = new File(source);

      XMLInputSource in = new XMLInputSource(taeDescriptor);
      ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);

      AnalysisEngine tae = UIMAFramework.produceAnalysisEngine(specifier);

      //			String document = FileUtils.file2String(inputFile, "UTF-8");
      JCas jcas = tae.newJCas();
      jcas.setDocumentText(source);
      jcas.setDocumentLanguage(sourceLang + "," + intermediateLang + "," + targetLang);

      tae.process(jcas);

      String result = getResult(jcas, sourceLang, targetLang);
      return result;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return null;
  }

示例#2

0

显示文件

文件： RunAE.java 项目： lovebube/myUIMA

 public void setCollectionReader() {
   try {
     CollectionReaderDescription collectionReaderDescription =
         UIMAFramework.getXMLParser()
             .parseCollectionReaderDescription(new XMLInputSource(collectionReaderDescriptor));
     CollectionReader collectionReader =
         UIMAFramework.produceCollectionReader(collectionReaderDescription);
     uimaEEEngine.setCollectionReader(collectionReader);
   } catch (ResourceInitializationException e) {
   } catch (InvalidXMLException e) {
   } catch (IOException e) {
   }
 }

示例#3

0

显示文件

文件： HeidelTimeStandalone.java 项目： JonathanBowker/heideltime

  /**
   * Method that initializes all vital prerequisites, including POS Tagger
   *
   * @param language Language to be processed with this copy of HeidelTime
   * @param typeToProcess Domain type to be processed
   * @param outputType Output type
   * @param configPath Path to the configuration file for HeidelTimeStandalone
   * @param posTagger POS Tagger to use for preprocessing
   * @param doIntervalTagging Whether or not to invoke the IntervalTagger
   */
  public void initialize(
      Language language,
      DocumentType typeToProcess,
      OutputType outputType,
      String configPath,
      POSTagger posTagger,
      Boolean doIntervalTagging) {
    logger.log(
        Level.INFO, "HeidelTimeStandalone initialized with language " + this.language.getName());

    // set the POS tagger
    this.posTagger = posTagger;

    // set doIntervalTagging flag
    this.doIntervalTagging = doIntervalTagging;

    // read in configuration in case it's not yet initialized
    if (!Config.isInitialized()) {
      if (configPath == null) readConfigFile(CLISwitch.CONFIGFILE.getValue().toString());
      else readConfigFile(configPath);
    }

    try {
      heidelTime = new HeidelTime();
      heidelTime.initialize(
          new UimaContextImpl(language, typeToProcess, CLISwitch.VERBOSITY2.getIsActive()));
      logger.log(Level.INFO, "HeidelTime initialized");
    } catch (Exception e) {
      e.printStackTrace();
      logger.log(Level.WARNING, "HeidelTime could not be initialized");
    }

    // Initialize JCas factory -------------
    logger.log(Level.FINE, "Initializing JCas factory...");
    try {
      TypeSystemDescription[] descriptions =
          new TypeSystemDescription[] {
            UIMAFramework.getXMLParser()
                .parseTypeSystemDescription(
                    new XMLInputSource(
                        this.getClass()
                            .getClassLoader()
                            .getResource(Config.get(Config.TYPESYSTEMHOME))))
          };
      jcasFactory = new JCasFactoryImpl(descriptions);
      logger.log(Level.INFO, "JCas factory initialized");
    } catch (Exception e) {
      e.printStackTrace();
      logger.log(Level.WARNING, "JCas factory could not be initialized");
    }
  }

示例#4

0

显示文件

文件： U2BTest.java 项目： textocat/textokit-core

  @Test
  public void test() throws Exception {
    TypeSystemDescription tsd =
        TypeSystemDescriptionFactory.createTypeSystemDescription("desc.types.test-TypeSystem");

    CollectionReaderDescription colReaderDesc =
        CollectionReaderFactory.createReaderDescription(
            XmiCollectionReader.class, tsd,
            XmiCollectionReader.PARAM_INPUTDIR, inputFileXMIDir);

    // configure AE
    XMLInputSource aeDescInput = new XMLInputSource(U2BAggregateDesc);
    AnalysisEngineDescription aeDesc =
        UIMAFramework.getXMLParser().parseAnalysisEngineDescription(aeDescInput);

    SimplePipeline.runPipeline(colReaderDesc, aeDesc);
  }

示例#5

0

显示文件

文件： XMIReader.java 项目： hambith/UIMA-Examples

  public static void main(String[] args)
      throws IOException, InvalidXMLException, CASException, ResourceInitializationException,
          SAXException {
    JCas jcas = null;

    // Leer el descriptor del anotador
    XMLParser xmlParser = UIMAFramework.getXMLParser();
    XMLInputSource in = new XMLInputSource("desc/ej4/MetricsAnnotatorPipeline.xml");
    // Crear un AE en base al descriptor
    AnalysisEngineDescription tsDesc = xmlParser.parseAnalysisEngineDescription(in);
    // Obtener el CAS
    jcas = CasCreationUtils.createCas(tsDesc).getJCas();

    if (jcas != null) {
      // De-serializar la anotacion de un fichero
      FileInputStream inputStream = null;
      inputStream = new FileInputStream("resources/annotation.xmi");
      XmiCasDeserializer.deserialize(inputStream, jcas.getCas());

      // Obtener el texto de la anotacion
      String sofaString = jcas.getDocumentText();
      System.out.println(sofaString);

      // Usar las anotaciones del fichero
      FSIterator it = jcas.getAnnotationIndex(Metric.type).iterator();
      while (it.isValid()) {
        Metric metric = (Metric) it.get();
        Number number = metric.getNumber();
        Unit unit = metric.getUnit();
        Double value =
            (number.getIsDouble())
                ? number.getAbsoluteDoubleValue()
                : Double.valueOf(number.getAbsoluteIntegerValue());
        System.out.println("===================");
        System.out.println("Metric: " + metric.getCoveredText());
        System.out.println("Real value: " + value * number.getSign() * unit.getMultiplier());
        System.out.println("Base unit: " + unit.getBaseUnit());
        it.moveToNext();
      }
    }
  }

示例#6

0

显示文件

文件： SimpleQuestionRunCPE.java 项目： kar2905/qalabcmu

  /**
   * Constructor for the class.
   *
   * @param args command line arguments into the program - see class description
   */
  public SimpleQuestionRunCPE(String args[]) throws Exception {
    mStartTime = System.currentTimeMillis();
    if (args.length == 0) {
      args = new String[1];
      args[0] = new String("src/main/resources/CpeQuestionDescriptor.xml");
    }

    // check command line args
    if (args.length < 1) {
      printUsageMessage();
      System.exit(1);
    }

    // parse CPE descriptor
    System.out.println("Parsing CPE Descriptor");
    CpeDescription cpeDesc =
        UIMAFramework.getXMLParser().parseCpeDescription(new XMLInputSource(args[0]));
    // instantiate CPE
    System.out.println("Instantiating CPE");
    mCPE = UIMAFramework.produceCollectionProcessingEngine(cpeDesc);

    // Create and register a Status Callback Listener
    mCPE.addStatusCallbackListener(new StatusCallbackListenerImpl());

    // Start Processing
    System.out.println("Running CPE");
    mCPE.process();

    // Allow user to abort by pressing Enter
    System.out.println("To abort processing, type \"abort\" and press enter.");
    while (true) {
      String line = new BufferedReader(new InputStreamReader(System.in)).readLine();
      if ("abort".equals(line) && mCPE.isProcessing()) {
        System.out.println("Aborting...");
        mCPE.stop();
        break;
      }
    }
  }

示例#7

0

显示文件

文件： VectorSpaceRetrieval.java 项目： soumya-batra/hw4-soumyab

  public static void main(String[] args) throws Exception {

    String sLine;
    long startTime = System.currentTimeMillis();

    URL descUrl =
        VectorSpaceRetrieval.class.getResource(
            "/descriptors/retrievalsystem/VectorSpaceRetrieval.xml");
    if (descUrl == null) {
      throw new IllegalArgumentException("Error opening VectorSpaceRetrieval.xml");
    }
    // create AnalysisEngine
    XMLInputSource input = new XMLInputSource(descUrl);
    AnalysisEngineDescription desc =
        UIMAFramework.getXMLParser().parseAnalysisEngineDescription(input);
    AnalysisEngine anAnalysisEngine = UIMAFramework.produceAnalysisEngine(desc);
    CAS aCas = anAnalysisEngine.newCAS();

    URL docUrl = VectorSpaceRetrieval.class.getResource("/data/documents.txt");
    if (docUrl == null) {
      throw new IllegalArgumentException("Error opening data/documents.txt");
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(docUrl.openStream()));
    while ((sLine = br.readLine()) != null) {
      aCas.setDocumentText(sLine);
      anAnalysisEngine.process(aCas);
      aCas.reset();
    }
    br.close();
    br = null;
    anAnalysisEngine.collectionProcessComplete();
    anAnalysisEngine.destroy();
    long endTime = System.currentTimeMillis();

    double totalTime = (endTime - startTime) / 1000.0;
    System.out.println("Total time taken: " + totalTime);
  }

示例#8

0

显示文件

文件： HtmlConverterXmlTest.java 项目： renaud/ruta-core

  @Test
  public void test() throws Exception {
    String html = "<Parent>\n";
    html += "<Child1>Some content</Child1>\n";
    html += "<Child2 attribute=“someValue” />\n";
    html += "<Child3>More content.</Child3>\n";
    html += "</Parent>\n";

    URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml");
    if (urlA == null) {
      urlA =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml");
    }

    URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml");
    if (urlC == null) {
      urlC =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml");
    }

    XMLInputSource inA = new XMLInputSource(urlA);
    ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA);
    AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA);
    aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false);
    aeA.reconfigure();

    XMLInputSource inC = new XMLInputSource(urlC);
    ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC);
    AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true);
    aeC.setConfigParameterValue(
        HtmlConverter.PARAM_GAP_INDUCING_TAGS, new String[] {"child1", "child2", "child3"});
    aeC.setConfigParameterValue(HtmlConverter.PARAM_GAP_TEXT, "$");
    aeC.reconfigure();

    CAS cas = aeA.newCAS();
    Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG");
    AnnotationIndex<AnnotationFS> ai = null;
    FSIterator<AnnotationFS> iterator = null;

    cas.setDocumentText(html);
    aeA.process(cas);
    aeC.process(cas);

    CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW);

    assertEquals("$Some content$$More content.", plainTextCas.getDocumentText());

    ai = plainTextCas.getAnnotationIndex(tagType);
    iterator = ai.iterator();
    assertEquals(4, ai.size());
    assertEquals("$Some content$$More content.", iterator.next().getCoveredText());
    assertEquals("$Some content", iterator.next().getCoveredText());
    assertEquals("$", iterator.next().getCoveredText());
    assertEquals("$More content.", iterator.next().getCoveredText());

    cas.release();
  }

示例#9

0

显示文件

文件： HtmlConverterXmlTest.java 项目： renaud/ruta-core

  @Test
  public void testExpandOffsets() throws Exception {
    String html = "<Parent>\n";
    html += "<Child1>Some content</Child1>\n";
    html += "<Child2 attribute=“someValue” />\n";
    html += "<Child3>More content.</Child3>\n";
    html += "</Parent>\n";

    URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml");
    if (urlA == null) {
      urlA =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml");
    }

    URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml");
    if (urlC == null) {
      urlC =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml");
    }

    XMLInputSource inA = new XMLInputSource(urlA);
    ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA);
    AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA);
    aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false);
    aeA.reconfigure();

    XMLInputSource inC = new XMLInputSource(urlC);
    ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC);
    AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_EXPAND_OFFSETS, true);
    aeC.reconfigure();

    CAS cas = aeA.newCAS();
    Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG");
    Feature expandedFeature = tagType.getFeatureByBaseName("expandedOffsets");
    AnnotationIndex<AnnotationFS> ai = null;
    FSIterator<AnnotationFS> iterator = null;

    cas.setDocumentText(html);
    aeA.process(cas);
    aeC.process(cas);

    CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW);

    assertEquals("Some contentMore content.", plainTextCas.getDocumentText());

    ai = plainTextCas.getAnnotationIndex(tagType);
    iterator = ai.iterator();
    assertEquals(4, ai.size());
    AnnotationFS next = null;
    next = iterator.next();
    assertEquals(false, next.getBooleanValue(expandedFeature));
    assertEquals("Some contentMore content.", next.getCoveredText());
    next = iterator.next();
    assertEquals(false, next.getBooleanValue(expandedFeature));
    assertEquals("Some content", next.getCoveredText());
    next = iterator.next();
    boolean b1 = next.getBooleanValue(expandedFeature);
    assertEquals("More content.", next.getCoveredText());
    next = iterator.next();
    boolean b2 = next.getBooleanValue(expandedFeature);
    assertEquals("More content.", next.getCoveredText());
    // for one of these two annotation (with same offsets) the feature must be set to true
    assertEquals(true, b1 || b2);

    cas.release();
  }