Exemplo n.º 1
0
  private static void initGlossary(IGlossary glossary, IInflector inflector, TaxonGroup taxonGroup)
      throws IOException {
    OTOClient otoClient = new OTOClient("http://biosemantics.arizona.edu:8080/OTO");
    GlossaryDownload glossaryDownload = new GlossaryDownload();
    String glossaryVersion = "latest";
    otoClient.open();
    Future<GlossaryDownload> futureGlossaryDownload =
        otoClient.getGlossaryDownload(taxonGroup.getDisplayName(), glossaryVersion);

    try {
      glossaryDownload = futureGlossaryDownload.get();
    } catch (Exception e) {
      otoClient.close();
      e.printStackTrace();
    }
    otoClient.close();

    // add the syn set of the glossary
    HashSet<Term> gsyns = new HashSet<Term>();
    for (TermSynonym termSyn : glossaryDownload.getTermSynonyms()) {

      // if(termSyn.getCategory().compareTo("structure")==0){
      if (termSyn.getCategory().matches("structure|taxon_name|substance")) {
        // take care of singular and plural forms
        String syns = "";
        String synp = "";
        String terms = "";
        String termp = "";
        if (inflector.isPlural(
            termSyn
                .getSynonym()
                .replaceAll(
                    "_",
                    "-"))) { // must convert _ to -, as matching entity phrases will be converted
                             // from leg iii to leg-iii in the sentence.
          synp = termSyn.getSynonym().replaceAll("_", "-");
          syns = inflector.getSingular(synp);
        } else {
          syns = termSyn.getSynonym().replaceAll("_", "-");
          synp = inflector.getPlural(syns);
        }

        if (inflector.isPlural(termSyn.getTerm().replaceAll("_", "-"))) {
          termp = termSyn.getTerm().replaceAll("_", "-");
          terms = inflector.getSingular(termp);
        } else {
          terms = termSyn.getTerm().replaceAll("_", "-");
          termp = inflector.getPlural(terms);
        }
        glossary.addSynonym(syns, termSyn.getCategory(), terms);
        glossary.addSynonym(synp, termSyn.getCategory(), termp);
        gsyns.add(new Term(syns, termSyn.getCategory()));
        gsyns.add(new Term(synp, termSyn.getCategory()));
      } else {
        // glossary.addSynonym(termSyn.getSynonym().replaceAll("_",  "-"), "arrangement",
        // termSyn.getTerm());
        glossary.addSynonym(
            termSyn.getSynonym().replaceAll("_", "-"), termSyn.getCategory(), termSyn.getTerm());
        gsyns.add(new Term(termSyn.getSynonym().replaceAll("_", "-"), termSyn.getCategory()));
        // gsyns.add(new Term(termSyn.getSynonym().replaceAll("_",  "-"), "arrangement"));
      }
    }

    // the glossary, excluding gsyns
    for (TermCategory termCategory : glossaryDownload.getTermCategories()) {
      if (!gsyns.contains(
          new Term(termCategory.getTerm().replaceAll("_", "-"), termCategory.getCategory())))
        glossary.addEntry(
            termCategory.getTerm().replaceAll("_", "-"),
            termCategory.getCategory()); // primocane_foliage =>primocane-foliage Hong 3/2014
    }

    List<Synonym> synonyms = new LinkedList<Synonym>();
    CSVReader reader =
        new CSVReader(
            new FileReader(
                "C:\\Users\\rodenhausen\\Desktop\\test-enhance\\"
                    + "Gordon_complexity_term_review\\category_mainterm_synonymterm-task-Gordon_complexity.csv"));
    List<String[]> lines = reader.readAll();
    int i = 0;
    Set<String> hasSynonym = new HashSet<String>();
    for (String[] line : lines) {
      synonyms.add(new Synonym(String.valueOf(i), line[1], line[0], line[2]));
      hasSynonym.add(line[1]);
    }

    reader =
        new CSVReader(
            new FileReader(
                "C:\\Users\\rodenhausen\\Desktop\\test-enhance\\"
                    + "Gordon_complexity_term_review\\category_term-task-Gordon_complexity.csv"));
    lines = reader.readAll();
    List<Decision> decisions = new LinkedList<Decision>();
    i = 0;
    for (String[] line : lines) {
      decisions.add(
          new Decision(String.valueOf(i), line[1], line[0], hasSynonym.contains(line[1]), ""));
    }

    Download download = new Download(true, decisions, synonyms);

    // add syn set of term_category
    HashSet<Term> dsyns = new HashSet<Term>();
    if (download != null) {
      for (Synonym termSyn : download.getSynonyms()) {
        // Hong TODO need to add category info to synonym entry in OTOLite
        // if(termSyn.getCategory().compareTo("structure")==0){
        if (termSyn.getCategory().matches("structure|taxon_name|substance")) {
          // take care of singular and plural forms
          String syns = "";
          String synp = "";
          String terms = "";
          String termp = "";
          if (inflector.isPlural(termSyn.getSynonym().replaceAll("_", "-"))) {
            synp = termSyn.getSynonym().replaceAll("_", "-");
            syns = inflector.getSingular(synp);
          } else {
            syns = termSyn.getSynonym().replaceAll("_", "-");
            synp = inflector.getPlural(syns);
          }

          if (inflector.isPlural(termSyn.getTerm().replaceAll("_", "-"))) {
            termp = termSyn.getTerm().replaceAll("_", "-");
            terms = inflector.getSingular(termp);
          } else {
            terms = termSyn.getTerm().replaceAll("_", "-");
            termp = inflector.getPlural(terms);
          }
          // glossary.addSynonym(syns, termSyn.getCategory(), terms);
          // glossary.addSynonym(synp, termSyn.getCategory(), termp);
          // dsyns.add(new Term(syns, termSyn.getCategory());
          // dsyns.add(new Term(synp, termSyn.getCategory());
          glossary.addSynonym(syns, termSyn.getCategory(), terms);
          glossary.addSynonym(synp, termSyn.getCategory(), termp);
          dsyns.add(new Term(syns, termSyn.getCategory()));
          dsyns.add(new Term(synp, termSyn.getCategory()));
        } else { // forking_1 and forking are syns 5/5/14 hong test, shouldn't _1 have already been
                 // removed?
          glossary.addSynonym(
              termSyn.getSynonym().replaceAll("_", "-"), termSyn.getCategory(), termSyn.getTerm());
          dsyns.add(new Term(termSyn.getSynonym().replaceAll("_", "-"), termSyn.getCategory()));
        }
      }

      // term_category from OTO, excluding dsyns
      for (Decision decision : download.getDecisions()) {
        if (!dsyns.contains(
            new Term(
                decision.getTerm().replaceAll("_", "-"),
                decision.getCategory()))) // calyx_tube => calyx-tube
        glossary.addEntry(decision.getTerm().replaceAll("_", "-"), decision.getCategory());
      }
    }
  }