예제 #1
0
  public JSONObject persian_sentiment(String text) throws Exception {

    oncreate();

    File PersianGapp = new File("C:/Users/mohammad/Desktop/New folder/Gate/application.xgapp");
    // initialise GATE - this must be done before calling any GATE APIs
    Gate.init();

    // load the saved application

    CorpusController application =
        (CorpusController) PersistenceManager.loadObjectFromFile(PersianGapp);

    // Create a Corpus to use.  We recycle the same Corpus object for each
    // iteration.  The string parameter to newCorpus() is simply the
    // GATE-internal name to use for the corpus.  It has no particular
    // significance.
    Corpus corpus = Factory.newCorpus("BatchProcessApp Corpus");
    application.setCorpus(corpus);

    // process the files one by one

    // load the document (using the specified encoding if one was given)

    Document doc = Factory.newDocument(text);

    // put the document in the corpus
    corpus.add(doc);

    // run the application
    application.execute();

    String featureName = "Doc_sentiment";
    FeatureMap features = doc.getFeatures();
    // remove the document from the corpus again
    corpus.clear();

    // doc.getFeatures().
    // Release the document, as it is no longer needed
    Factory.deleteResource(doc);

    LinkedHashMap originalContent = (LinkedHashMap) features.get(featureName);

    String obj = (String) originalContent.get("sentiment");
    // BigDecimal pos =(BigDecimal) originalContent.get("positive");
    // BigDecimal neg =(BigDecimal) originalContent.get("negative");
    // System.out.println(obj);
    // create Json for response to user
    JSONObject obj1 = new JSONObject();
    obj1.put("sentiment", obj);
    /*obj1.put("positive",pos);
    //obj1.put("negative",neg);
    System.out.print("----------");
    System.out.print(obj1);
    System.out.print("----------");*/
    // application.cleanup();
    return obj1;
  }
  /**
   * Initialise the ANNIE system. This creates a "corpus pipeline" application that can be used to
   * run sets of documents through the extraction system.
   */
  public void initAnnie() throws GateException, IOException {
    Out.prln("Initialising ANNIE...");

    // load the ANNIE application from the saved state in plugins/ANNIE
    File pluginsHome = Gate.getPluginsHome();
    File anniePlugin = new File(pluginsHome, "ANNIE");
    File annieGapp = new File(anniePlugin, "ANNIE_with_defaults.gapp");
    annieController = (CorpusController) PersistenceManager.loadObjectFromFile(annieGapp);

    Out.prln("...ANNIE loaded");
  } // initAnnie()
예제 #3
0
  public static void initController()
      throws PersistenceException, ResourceInstantiationException, IOException {
    GateUtils.deleteAllPublicGateResources();

    controller =
        (ConditionalSerialAnalyserController)
            PersistenceManager.loadObjectFromFile(
                new File("C:/Users/dedek/Desktop/DATLOWE/gate_apps/all.gapp"));

    controller.add(ie.getPR());

    corpus = Factory.newCorpus("SpcCorp");
    controller.setCorpus(corpus);
  }
예제 #4
0
  public void setConf(Configuration conf) {
    config = conf;

    if (applicationDescriptorPath == null)
      throw new RuntimeException("GATE application path is null");

    // create one instance of the GATE application
    // need to avoid concurrent access to the application
    try {

      if (inited == false) {
        File gateHome = new File(applicationDescriptorPath.getFile()).getParentFile();
        LOG.info("Setting GATE_HOME as " + gateHome);
        File pluginsHome = new File(gateHome, "plugins");
        // the config files are in the job archive - not in the GATE
        // application
        // zip
        // File siteConfigFile = new File(conf
        // .getResource("site-gate.xml").getFile());
        // File userConfig = new File(conf.getResource("user-gate.xml")
        // .getFile());
        Gate.runInSandbox(true);
        Gate.setGateHome(gateHome);
        Gate.setPluginsHome(pluginsHome);
        // Gate.setSiteConfigFile(siteConfigFile);
        // Gate.setUserConfigFile(userConfig);
        // the builtInCreoleDir files
        // are stored in the same place as the config ones
        // Gate.setBuiltinCreoleDir(conf.getResource("creole.xml"));
        Gate.init();
        inited = true;
      }

      corpus = Factory.newCorpus("DummyCorpus");

      this.GATEapplication =
          (CorpusController) PersistenceManager.loadObjectFromUrl(applicationDescriptorPath);

      // load the annotation and feature filters from the configuration
      this.filters = GATEAnnotationFilters.getFilters(config);

    } catch (Exception e) {
      LOG.error("Encountered error while initialising GATE", e);
      throw new RuntimeException(e);
    }
  }
  /**
   * The main entry point. First we parse the command line options (see usage() method for details),
   * then we take all remaining command line parameters to be file names to process. Each file is
   * loaded, processed using the application and the results written to the output file
   * (inputFile.out.xml).
   */
  public static void main(String[] args) throws Exception {
    parseCommandLine(args);

    // initialise GATE - this must be done before calling any GATE APIs
    Gate.init();

    // load the saved application
    CorpusController application =
        (CorpusController) PersistenceManager.loadObjectFromFile(gappFile);

    // Create a Corpus to use.  We recycle the same Corpus object for each
    // iteration.  The string parameter to newCorpus() is simply the
    // GATE-internal name to use for the corpus.  It has no particular
    // significance.

    ArrayList<String> files = getFilesFromDir(inputDir);
    gate.Corpus corpus = createCorpus(files);
    // Corpus corpus = Factory.newCorpus("BatchProcessApp Corpus");
    application.setCorpus(corpus);

    System.out.println("Processing " + files.size() + " files");

    // process the files one by one
    for (int i = 0; i < files.size(); i++) {

      // load the document (using the specified encoding if one was given)
      File docFile = new File(files.get(i));
      System.out.print("Processing document " + docFile + " (" + i + ") ...");
      Document doc = Factory.newDocument(docFile.toURL(), encoding);

      // put the document in the corpus
      corpus.add(doc);

      // run the application
      application.execute();

      // remove the document from the corpus again
      corpus.clear();

      String docXMLString = null;
      // if we want to just write out specific annotation types, we must
      // extract the annotations into a Set
      if (annotTypesToWrite != null) {
        // Create a temporary Set to hold the annotations we wish to write out
        Set annotationsToWrite = new HashSet();

        // we only extract annotations from the default (unnamed) AnnotationSet
        // in this example
        AnnotationSet defaultAnnots = doc.getAnnotations("Output");
        Iterator annotTypesIt = annotTypesToWrite.iterator();
        while (annotTypesIt.hasNext()) {
          // extract all the annotations of each requested type and add them to
          // the temporary set
          AnnotationSet annotsOfThisType = defaultAnnots.get((String) annotTypesIt.next());
          if (annotsOfThisType != null) {
            annotationsToWrite.addAll(annotsOfThisType);
          }
        }

        // create the XML string using these annotations
        docXMLString = doc.toXml(annotationsToWrite, true);
      }
      // otherwise, just write out the whole document as GateXML
      else {
        docXMLString = doc.toXml();
      }

      // Release the document, as it is no longer needed
      Factory.deleteResource(doc);

      // output the XML to <inputFile>.out.xml
      System.out.println("Writing file " + docFile.getName());
      String outputFileName = docFile.getName() + ".out.xml";
      // File outputFile = new File(docFile.getParentFile(), outputFileName);
      File outputFile = new File(new File(outputDir).getAbsolutePath(), outputFileName);

      // Write output files using the same encoding as the original
      FileOutputStream fos = new FileOutputStream(outputFile);
      BufferedOutputStream bos = new BufferedOutputStream(fos);
      OutputStreamWriter out;
      if (encoding == null) {
        out = new OutputStreamWriter(bos);
      } else {
        out = new OutputStreamWriter(bos, encoding);
      }

      out.write(docXMLString);

      out.close();
      System.out.println("done");
    } // for each file

    System.out.println("All done");
  } // void main(String[] args)
예제 #6
0
  public static void main(String[] args) throws Exception {

    int num_threads = Integer.parseInt(args[0]);
    int chunk_size = 100000;
    int total_lines_read = 0;
    if (args.length > 4) chunk_size = Integer.parseInt(args[4]);
    // initialise GATE - this must be done before calling any GATE APIs
    Gate.init();

    CorpusController application =
        (CorpusController)
            PersistenceManager.loadObjectFromFile(new File("TJInfoExtractor/application.xgapp"));

    List<CorpusController> applicationList = new ArrayList<CorpusController>();
    for (int i = 0; i < num_threads; ++i)
      applicationList.add((CorpusController) Factory.duplicate(application));

    String outfile = "Out.csv";
    if (args.length > 2) outfile = args[2];
    PrintWriter writer = new PrintWriter(outfile, "UTF-8");
    writer.println(
        "Perspective_1st,Perspective_3rd,Name,Age,Cost,Height_ft,Height_in,Weight,Cup,Chest,Waist,Hip,Ethnicity,SkinColor,EyeColor,HairColor,Restriction_Type,Restriction_Ethnicity,Restriction_Age,PhoneNumber,AreaCode_State,AreaCode_Cities,Email,Url,Media");

    outfile = "Out.txt";
    if (args.length > 3) outfile = args[3];
    PrintWriter writer2 = new PrintWriter(outfile, "UTF-8");

    // load the document
    System.out.println("Reading document " + args[1] + "...");
    BufferedReader br = new BufferedReader(new FileReader(args[1]));
    Boolean done = false;

    while (!done) {
      List<String> FileLines = new ArrayList<String>();
      // Create container for results
      List<String> AnnotationResults = new ArrayList<String>();
      List<String> AnnotationText = new ArrayList<String>();

      int LinesRead = 0;
      String fileline;
      // read the file
      while (true) {
        if (LinesRead >= chunk_size) break;
        if ((fileline = br.readLine()) == null) {
          done = true;
          break;
        }
        FileLines.add(fileline);
        LinesRead++;
        total_lines_read++;
      }

      // launch threads to process each chunk
      int step = (int) Math.ceil(((double) FileLines.size()) / ((double) num_threads));
      List<ExtractorThread> pool = new ArrayList<ExtractorThread>();
      for (int i = 0; i < num_threads; ++i) {
        pool.add(
            new ExtractorThread(
                FileLines.subList(i * step, Math.min((i + 1) * step, FileLines.size())),
                applicationList.get(i),
                i));
      }
      for (int i = 0; i < num_threads; ++i) {
        pool.get(i).t.join();
        if (pool.get(i).results != null) AnnotationResults.addAll(pool.get(i).results);
        if (pool.get(i).text != null) AnnotationText.addAll(pool.get(i).text);
      }

      for (String l : AnnotationResults) writer.println(l);
      for (String l : AnnotationText) writer2.println(l);
      System.out.println("Processed " + total_lines_read + " lines...");
    }

    br.close();
    writer.close();
    writer2.close();
    System.out.println("All done");
  }
 public GateAnalyzer(String appPath) throws Exception {
   Gate.init();
   controller =
       (SerialAnalyserController) PersistenceManager.loadObjectFromFile(new File(appPath));
 }