/** Returns a list of annotations to be added to the Behemoth document from the GATE one * */
  private List<com.digitalpebble.behemoth.Annotation> convertGATEAnnotationsToBehemoth(
      AnnotationSet GATEAnnotionSet, com.digitalpebble.behemoth.BehemothDocument behemoth) {

    List<com.digitalpebble.behemoth.Annotation> beheannotations =
        new ArrayList<com.digitalpebble.behemoth.Annotation>();

    AnnotationSet resultAS = GATEAnnotionSet.get(filters.getTypes());

    // sort the GATE annotations
    List<gate.Annotation> annotationList = new ArrayList<gate.Annotation>(resultAS);
    Collections.sort(annotationList, new OffsetComparator());
    Iterator<gate.Annotation> inputASIter = annotationList.iterator();

    while (inputASIter.hasNext()) {
      gate.Annotation source = inputASIter.next();

      com.digitalpebble.behemoth.Annotation target = new com.digitalpebble.behemoth.Annotation();
      target.setType(source.getType());
      target.setStart(source.getStartNode().getOffset().longValue());
      target.setEnd(source.getEndNode().getOffset().longValue());

      // now do the features
      // is the type listed?
      Set<String> expectedfeatnames = filters.getFeatureFilters().get(source.getType());
      if (expectedfeatnames != null) {
        Iterator featurenames = source.getFeatures().keySet().iterator();
        while (featurenames.hasNext()) {
          // cast the feature name to a string which will be right in
          // 99% of cases
          String featurename = featurenames.next().toString();
          // if this feature name is not wanted just ignore it
          if (expectedfeatnames.contains(featurename) == false) continue;
          // we know that we want to keep this feature
          // let's see what the best way of representing the value
          // would be
          // TODO later => find a better way of mapping when not a
          // string
          Object originalvalue = source.getFeatures().get(featurename);
          if (originalvalue == null) originalvalue = "null";
          target.getFeatures().put(featurename, originalvalue.toString());
        }
      }
      beheannotations.add(target);
    }
    return beheannotations;
  }
  public void setConf(Configuration conf) {
    config = conf;

    if (applicationDescriptorPath == null)
      throw new RuntimeException("GATE application path is null");

    // create one instance of the GATE application
    // need to avoid concurrent access to the application
    try {

      if (inited == false) {
        File gateHome = new File(applicationDescriptorPath.getFile()).getParentFile();
        LOG.info("Setting GATE_HOME as " + gateHome);
        File pluginsHome = new File(gateHome, "plugins");
        // the config files are in the job archive - not in the GATE
        // application
        // zip
        // File siteConfigFile = new File(conf
        // .getResource("site-gate.xml").getFile());
        // File userConfig = new File(conf.getResource("user-gate.xml")
        // .getFile());
        Gate.runInSandbox(true);
        Gate.setGateHome(gateHome);
        Gate.setPluginsHome(pluginsHome);
        // Gate.setSiteConfigFile(siteConfigFile);
        // Gate.setUserConfigFile(userConfig);
        // the builtInCreoleDir files
        // are stored in the same place as the config ones
        // Gate.setBuiltinCreoleDir(conf.getResource("creole.xml"));
        Gate.init();
        inited = true;
      }

      corpus = Factory.newCorpus("DummyCorpus");

      this.GATEapplication =
          (CorpusController) PersistenceManager.loadObjectFromUrl(applicationDescriptorPath);

      // load the annotation and feature filters from the configuration
      this.filters = GATEAnnotationFilters.getFilters(config);

    } catch (Exception e) {
      LOG.error("Encountered error while initialising GATE", e);
      throw new RuntimeException(e);
    }
  }
  // Process an input document with GATE and a Reporter
  public synchronized BehemothDocument[] process(BehemothDocument inputDoc, Reporter reporter) {
    if (reporter != null) reporter.setStatus("GATE : " + inputDoc.getUrl().toString());

    boolean clearBehemothAnnotations = config.getBoolean("gate.deleteBehemothAnnotations", false);

    // process the text passed as value with the application
    // a) create a GATE document based on the text value
    gate.Document gatedocument = null;
    try {

      gatedocument = generateGATEDoc(inputDoc);
      // add it to the current corpus
      corpus.add(gatedocument);
      // get the application and assign the corpus to it
      this.GATEapplication.setCorpus(corpus);
      // process it with GATE
      this.GATEapplication.execute();

      AnnotationSet annots = null;
      if ("".equals(filters.getAnnotationSetName())) annots = gatedocument.getAnnotations();
      else annots = gatedocument.getAnnotations(filters.getAnnotationSetName());

      // enrich the input doc with the annotations from
      // the GATE application
      // transfer the annotations from the GATE document
      // to the Behemoth one using the filters
      List<com.digitalpebble.behemoth.Annotation> beheannotations =
          convertGATEAnnotationsToBehemoth(annots, inputDoc);

      // sort the annotations before adding them?
      Collections.sort(beheannotations);

      // clear the existing behemoth annotations
      if (clearBehemothAnnotations) {
        inputDoc.getAnnotations().clear();
      }

      inputDoc.getAnnotations().addAll(beheannotations);

      // add counters about num of annotations added
      if (reporter != null)
        for (com.digitalpebble.behemoth.Annotation annot : beheannotations) {
          reporter.incrCounter("GATE", annot.getType(), 1);
        }

      // Add the document features from GATE to Behemoth
      Set<String> docFeatFilter = this.filters.getDocFeaturesFilter();
      MapWritable beheMD = inputDoc.getMetadata(true);
      if (docFeatFilter.size() > 0) {
        for (String docFeatName : docFeatFilter) {
          Object featValue = gatedocument.getFeatures().get(docFeatName);
          if (featValue != null) {
            beheMD.put(new Text(docFeatName), new Text(featValue.toString()));
          }
        }
      }

      if (reporter != null) reporter.incrCounter("GATE", "Document", 1);

    } catch (Exception e) {
      LOG.error(inputDoc.getUrl().toString(), e);
      if (reporter != null) reporter.incrCounter("GATE", "Exceptions", 1);
    } finally {
      // remove the document from the corpus again
      corpus.clear();
      // and from memory
      if (gatedocument != null) Factory.deleteResource(gatedocument);
    }
    // currently returns only the input document
    return new BehemothDocument[] {inputDoc};
  }