Java Util.nullOrEmpty Examples

Programming Language: Java

Namespace/Package Name: edu.stanford.muse.util

Class/Type: Util

Method/Function: nullOrEmpty

Examples at hotexamples.com: 4

Java Util.nullOrEmpty - 4 examples found. These are the top rated real world Java examples of edu.stanford.muse.util.Util.nullOrEmpty extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

print_exception(5)

castOrCloneAsSet(4)

nullOrEmpty(4)

pluralize(3)

fieldsToString(2)

tokenize(2)

fieldsToCSV(1)

filesWithPrefixAndSuffix(1)

getLinesFromFile(1)

getLinesFromInputStream(1)

listIntersection(1)

setIntersection(1)

Example #1

Show file

File: Lexicon.java Project: ePADD/muse

    public Lexicon1Lang(String filename) throws IOException {
      captionToRawQuery = new LinkedHashMap<String, String>();
      captionToExpandedQuery = new LinkedHashMap<String, String>();
      List<String> lines =
          Util.getLinesFromInputStream(
              new FileInputStream(filename),
              false /* ignore comment lines = false, we'll strip comments here */);
      for (String line : lines) {
        int idx = line.indexOf('#'); // strip everything after the comment char
        if (idx >= 0) line = line.substring(0, idx);
        line = line.trim();
        if (line.length() == 0) continue; // ignore blank lines
        StringTokenizer st = new StringTokenizer(line, ":");
        if (st.countTokens() != 2) {
          log.warn("line ignored: " + line);
          continue;
        }

        String caption = st.nextToken().trim();
        String query = st.nextToken().trim();
        String existingQuery = captionToRawQuery.get(caption);
        if (!Util.nullOrEmpty(existingQuery)) query = existingQuery + "|" + query;
        captionToRawQuery.put(caption, query);
      }
      expandQueries();
    }

Example #2

Show file

File: Lexicon.java Project: ePADD/muse

    private void expandQueries() {
      captionToExpandedQuery
          .clear(); // clear the expanded query map first, we don't want any residue from the
      // previous state
      for (String caption : captionToRawQuery.keySet()) {
        String query = captionToRawQuery.get(caption);
        List<String> orTerms = Util.tokenize(query, "|");
        String expandedQuery = "";
        for (int i = 0; i < orTerms.size(); i++) {
          String t = orTerms.get(i).trim();
          if (t.length() == 0) continue;
          if (t.startsWith("{") && t.endsWith("}")) {
            String c = t.substring(1, t.length() - 1);
            String exp =
                captionToExpandedQuery.get(
                    c); // note: expanded map, not rawmap, to allow multi-level expansion
            if (exp == null) {
              t = captionToRawQuery.get(c);
              if (t == null) {
                log.warn("ERROR: no prev. caption: " + c + " in query " + query);
                continue;
              }
            } else t = exp;

            usedInOtherCaptions.add(c);
          }
          expandedQuery += t;
          // there is no point adding or(|), as the query is treated just as a text string and is
          // not handled specially in Indexer.lookupDocsAsId
          // however, adding a non-word, non-special character will enable tokenization at that
          // index and will be appended as many "or" terms.
          if (i < orTerms.size() - 1) expandedQuery += "|";
        }

        if (caption.length() > 0 && expandedQuery.length() > 0) {
          // if caption already exists, just add to it
          String existingQuery = captionToExpandedQuery.get(caption);
          if (!Util.nullOrEmpty(existingQuery)) expandedQuery = existingQuery + "|" + expandedQuery;
          captionToExpandedQuery.put(caption, expandedQuery);
        }
      }

      // remove the non top-level captions
      for (String caption : usedInOtherCaptions) captionToExpandedQuery.remove(caption);
    }

Example #3

Show file

File: Lexicon.java Project: ePADD/muse

    /**
     * main entry point: returns a category -> docs map for each (non-zero) category in the current
     * captionToQueryMap.
     *
     * @indexer must already have run
     * @docs results are restrictes to these docs. assumes all docs if docs is null or empty.
     * @captions (null/none = all)
     *     <p>vihari This is a weird name for a method that returns documents with emotions instead
     *     of emotions.
     */
    public Map<String, Collection<Document>> getEmotions(
        Indexer indexer,
        Collection<Document> docs,
        boolean originalContentOnly,
        String... captions) {
      Map<String, Collection<Document>> result = new LinkedHashMap<String, Collection<Document>>();
      Set<Document> docs_set = Util.castOrCloneAsSet(docs);
      //			for (String[] emotion: emotionsData)
      String[] selected_captions =
          captions.length > 0 ? captions : captionToExpandedQuery.keySet().toArray(new String[0]);
      for (String caption : selected_captions) {
        String query = captionToExpandedQuery.get(caption);
        if (query == null) {
          log.warn("Skipping unknown caption '" + caption + "'");
          continue;
        }

        // query is simply word1|word2|word3 etc for that sentiment
        // the -1 indicates that we want all docs in the indexer that match the query
        int threshold = 1;
        Indexer.QueryOptions options = new Indexer.QueryOptions();
        options.setThreshold(threshold);
        options.setQueryType(Indexer.QueryType.ORIGINAL);
        Collection<Document> docsForCaption = indexer.docsForQuery(query, options);
        /*
        log.info (docsForCaption.size() + " before");
        threshold = 2;
        docsForCaption = indexer.docsForQuery(query, -1, threshold);
        log.info (docsForCaption.size() + " after");
        */
        //				Set<Document> docs = indexer.docsWithPhraseThreshold(query, -1, 2); // in future, we
        // may have a higher threshold for sentiment matching
        // if @param docs is present, retain only those docs that match, otherwise retain all
        if (!Util.nullOrEmpty(docs_set))
          // docsForCaption.retainAll(docs_set);
          docsForCaption = Util.listIntersection(docsForCaption, docs_set);

        // put it in the result only if at least 1 doc matches
        if (docsForCaption.size() > 0) result.put(caption, docsForCaption);
      }
      return result;
    }

Example #4

Show file

File: MemoryStudy.java Project: ePADD/muse

  /**
   * Generates list of questions and stores it in the current instance of MemoryStudy We handle two
   * kinds of questions namely, person names tests and non-person name tests. Non-person name test
   * is a fill in the blank kind where the blank is to be filled with the correct non-person entity
   * to complete the sentence person name test is to guess the person in correspondent list based on
   * some distinctive sentences in the mail
   *
   * @param maxInt - max. number of questions from a interval
   * @throws IOException
   */
  public void generateQuestions(
      Archive archive,
      NERModel nerModel,
      Collection<EmailDocument> allDocs,
      Lexicon lex,
      int maxInt,
      boolean personTest)
      throws IOException, GeneralSecurityException, ClassNotFoundException, ReadContentsException,
          ParseException {
    this.archive = archive;
    if (allDocs == null) allDocs = (Collection) archive.getAllDocs();
    questions = new ArrayList<>();
    ArchiveCluer cluer = new ArchiveCluer(null, archive, nerModel, null, lex);

    Short[] itypes =
        new Short[] {
          FeatureDictionary.BUILDING,
          FeatureDictionary.PLACE,
          FeatureDictionary.RIVER,
          FeatureDictionary.ROAD,
          FeatureDictionary.UNIVERSITY,
          FeatureDictionary.MOUNTAIN,
          FeatureDictionary.AIRPORT,
          FeatureDictionary.ISLAND,
          FeatureDictionary.MUSEUM,
          FeatureDictionary.BRIDGE,
          FeatureDictionary.AIRLINE,
          FeatureDictionary.THEATRE,
          FeatureDictionary.LIBRARY,
          FeatureDictionary.LAWFIRM,
          FeatureDictionary.GOVAGENCY
        };
    double CUTOFF = 0.001;
    tabooCluesSet = new LinkedHashSet<>();
    archive.assignThreadIds();

    List<Document> docs = archive.getAllDocs();
    Map<String, Date> entityToLastDate = new LinkedHashMap<>();
    Multimap<String, EmailDocument> entityToMessages = LinkedHashMultimap.create();
    Multimap<String, Long> entityToThreads = LinkedHashMultimap.create();
    Multimap<String, String> ceToDisplayEntity = LinkedHashMultimap.create();

    int di = 0;

    // sort by date
    Collections.sort(docs);

    Set<String> ownerNames = archive.ownerNames;
    Date earliestDate = null, latestDate = null;
    Set<String> allEntities = new LinkedHashSet<>();
    for (Document doc : docs) {
      EmailDocument ed = (EmailDocument) doc;
      if (earliestDate == null || ed.date.before(earliestDate)) earliestDate = ed.date;
      if (latestDate == null || ed.date.after(latestDate)) latestDate = ed.date;

      List<String> entities = new ArrayList<>();
      if (!personTest) {
        entities.addAll(
            Arrays.asList(archive.getAllNamesInDoc(doc, true))
                .stream()
                .filter(n -> n.typeScore > CUTOFF)
                .map(n -> n.text)
                .collect(Collectors.toList()));
      } else {
        // do not consider mailing lists
        if (ed.sentToMailingLists != null && ed.sentToMailingLists.length > 0) continue;
        // discard doc if it is not a sent mail
        if ((ed.sentOrReceived(archive.addressBook) & EmailDocument.SENT_MASK) == 0) continue;

        List<Address> addrs = new ArrayList<>();
        if (ed.to != null) for (Address addr : ed.to) addrs.add(addr);

        List<String> names = new ArrayList<>();
        for (Address addr : addrs) {
          Contact c = archive.addressBook.lookupByAddress(addr);
          names.add(c.pickBestName());
        }

        for (String name : names) {
          if (!ownerNames.contains(name) && !DictUtils.hasDictionaryWord(name)) {
            entities.add(name);
          }
        }
      }
      allEntities.addAll(entities);

      // get entities
      for (String e : entities) {
        if (Util.nullOrEmpty(e)) continue;
        e = e.replaceAll("^\\W+|\\W+$", "");
        if (e.length() > 10 && e.toUpperCase().equals(e))
          continue; // all upper case, more than 10 letters, you're out.

        String ce = DictUtils.canonicalize(e); // canonicalize
        if (ce == null) {
          JSPHelper.log.info("Dropping entity: " + e);
          continue;
        }

        ceToDisplayEntity.put(ce, e);
        entityToLastDate.put(ce, ed.date);
        entityToMessages.put(ce, ed);
        entityToThreads.put(ce, ed.threadID);
      }

      if ((++di) % 1000 == 0) log.info(di + " of " + docs.size() + " messages processed...<br/>");
    }
    log.info(
        "Considered #"
            + allEntities.size()
            + " unique entities and #"
            + ceToDisplayEntity.size()
            + " good ones in #"
            + docs.size()
            + " docs<br>");
    log.info("Owner Names: " + ownerNames);
    JSPHelper.log.info(
        "Considered #"
            + allEntities.size()
            + " unique entities and #"
            + ceToDisplayEntity.size()
            + " good ones in #"
            + docs.size()
            + "docs");

    JSPHelper.log.info(
        "earliest date = "
            + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(earliestDate));
    JSPHelper.log.info(
        "latest date = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(latestDate));

    Multimap<String, String> tokenToCE = LinkedHashMultimap.create();
    for (String ce : ceToDisplayEntity.keySet()) {
      List<String> tokens = Util.tokenize(ce);
      for (String t : tokens) tokenToCE.put(t, ce);
    }

    // Compute date intervals
    int DAYS_PER_INTERVAL = 30;
    List<Pair<Date, Date>> intervals = new ArrayList<Pair<Date, Date>>();
    {
      JSPHelper.log.info("computing time intervals");
      Date closingDate = latestDate;

      JSPHelper.log.info(
          "closing = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(closingDate));
      while (earliestDate.before(closingDate)) {
        Calendar cal = new GregorianCalendar();
        cal.setTime(closingDate); // this is the time of the last sighting of the term
        // scroll to the beginning of this month
        cal.set(Calendar.HOUR_OF_DAY, 23);
        cal.set(Calendar.MINUTE, 59);
        cal.set(Calendar.SECOND, 59);
        Date endDate = cal.getTime();

        cal.add(
            Calendar.DATE,
            (1
                - DAYS_PER_INTERVAL)); // 1- because we want from 0:00 of first date to 23:59 of
                                       // last date
        cal.set(Calendar.HOUR_OF_DAY, 0);
        cal.set(Calendar.MINUTE, 0);
        cal.set(Calendar.SECOND, 0);
        Date startDate = cal.getTime();

        intervals.add(new Pair<Date, Date>(startDate, endDate));
        // ok we got an interval

        // closing date for the next interval is 1 day before endDate
        cal.add(Calendar.DATE, -1);
        closingDate = cal.getTime();
      }
      JSPHelper.log.info("done computing intervals, #time intervals: " + intervals.size());
      for (Pair<Date, Date> p : intervals)
        JSPHelper.log.info(
            "Interval: "
                + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getFirst())
                + " - "
                + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getSecond()));
    }

    // initialize clueInfos to empty lists
    List<ClueInfo> clueInfos[] = new ArrayList[intervals.size()];
    for (int i = 0; i < intervals.size(); i++) {
      clueInfos[i] = new ArrayList<ClueInfo>();
    }

    Map<Integer, Integer> intervalCount = new LinkedHashMap<>();
    // nSent is the number of sentences allowed in a clue text
    int nvalidclues = 0, nSent = 2;
    // generate clueInfos for each entity
    for (String ce : entityToLastDate.keySet()) {
      Date lastSeenDate = entityToLastDate.get(ce);

      // compute displayEntity (which has red for core words) and fullAnswer, which is a simple
      // string
      String fullAnswer = "";
      {
        List<String> tokens = Util.tokenize(ceToDisplayEntity.get(ce).iterator().next());
        for (String t : tokens) {
          if (EnglishDictionary.stopWords.contains(t.toLowerCase())) continue;
          fullAnswer += t + " ";
        }
        fullAnswer = fullAnswer.trim();
      }
      // dont want the answer to be scored low just because it has extra non-word chars in the begin
      // or end
      fullAnswer = fullAnswer.replaceAll("^\\W+|\\W+$", "");

      // which interval does this date belong to?
      int interval = -1;
      Date intervalStart = null, intervalEnd = null;
      {
        int i = 0;
        for (Pair<Date, Date> p : intervals) {
          intervalStart = p.getFirst();
          intervalEnd = p.getSecond();

          if ((intervalStart.before(lastSeenDate) && intervalEnd.after(lastSeenDate))
              || intervalStart.equals(lastSeenDate)
              || intervalEnd.equals(lastSeenDate)) {
            interval = i;
            break;
          }
          i++;
        }
      }
      if (interval < 0 || interval == intervals.size())
        JSPHelper.log.info(
            "What, no interval!? for "
                + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(lastSeenDate));
      if (!intervalCount.containsKey(interval)) intervalCount.put(interval, 0);
      if (intervalCount.get(interval) > maxInt) continue;
      intervalCount.put(interval, intervalCount.get(interval) + 1);

      List<Integer> lengthList = Crossword.convertToWord(fullAnswer).getSecond();
      String lengthDescr = "";
      if (lengthList.size() > 1) lengthDescr += Integer.toString(lengthList.size()) + " words: ";

      for (Integer i : lengthList) {
        lengthDescr += Util.pluralize(i, "letter") + ", ";
      }
      lengthDescr = lengthDescr.substring(0, lengthDescr.length() - 2); // subtract the extra comma.

      ClueInfo ci = new ClueInfo();
      ci.link = "../browse?term=\"" + fullAnswer + "\"&sort_by=recent&searchType=original";
      ci.lastSeenDate = lastSeenDate;
      ci.nMessages = entityToMessages.get(ce).size();
      ci.nThreads = entityToThreads.get(ce).size();

      // TODO: we are doing default initialisation of evaluators by setting it to null below, it is
      // more appropriate to consider it as an argument for this method
      Clue clue =
          cluer.createClue(
              fullAnswer,
              (personTest
                  ? ArchiveCluer.QuestionType.GUESS_CORRESPONDENT
                  : ArchiveCluer.QuestionType.FILL_IN_THE_BLANK),
              null,
              tabooCluesSet,
              null,
              intervalStart,
              intervalEnd,
              nSent,
              archive);
      if (clue != null) ci.clues = new Clue[] {clue};

      if (ci.clues == null || ci.clues.length == 0 || clue == null) {
        JSPHelper.log.warn("Did not find any clue for: " + fullAnswer);
      } else {
        // is the times value of the clue important?
        questions.add(new MemoryQuestion(this, fullAnswer, clue, 1, lengthDescr));
        nvalidclues++;
        // makes sure that the clue with the same statement is not generated again
        tabooCluesSet.add(clue.clue);
      }
      clueInfos[interval].add(ci);
    }
    log.info("Found valid clues for " + nvalidclues + " answers");
    JSPHelper.log.info("Found valid clues for " + nvalidclues + " answers");

    log.info("Top candidates are:");
    for (MemoryQuestion mq : questions)
      log.info(mq.correctAnswer + " times=" + mq.stats.nMessagesWithAnswer);

    // sort q's by clue score
    Collections.sort(questions);

    //		log.info("Based on clue score, top answers:");
    //		for (MemoryQuestion mq: questions)
    //			log.info (mq.correctAnswer + " times= clue=" + mq.clue.clue);

    // now we have up to 2*N questions, sorted by cluescore.
    // drop ones that are prefix/suffix of another, and cap to N
    int prev_size = questions.size();

    int new_size = questions.size();

    //	log.info ("#questions before prefix-suffix elim: " + prev_size + " after: " + new_size);

    int count = 0;
    for (MemoryQuestion mq : questions) {
      mq.setQuestionNum(count++);
    }

    // log the questions as well, just in case we don't get to the final point due to user fatigue
    // or crashes
    logStats("questions.final", false);
  }