@RequestMapping(value = "/enrich", method = RequestMethod.GET) @Transactional public @ResponseBody Map<String, Object> researcherEnrich( @RequestParam(value = "id", required = false) final String authorId, final HttpServletResponse response) throws InterruptedException, IOException, ExecutionException, URISyntaxException, ParseException, TimeoutException { Author author = persistenceStrategy.getAuthorDAO().getById(authorId); publicationCollectionService.enrichPublicationByExtractOriginalSources( new ArrayList<Publication>(author.getPublications()), author, true); return Collections.emptyMap(); }
/** * Landing page of researcher page * * @param sessionId * @param response * @return * @throws InterruptedException */ @RequestMapping(method = RequestMethod.GET) @Transactional public ModelAndView researcherPage( @RequestParam(value = "id", required = false) final String id, @RequestParam(value = "name", required = false) String name, @RequestParam(value = "add", required = false) final String add, final HttpServletResponse response) throws InterruptedException { ModelAndView model = TemplateHelper.createViewWithLink("researcher", LINK_NAME); List<Widget> widgets = new ArrayList<Widget>(); User user = securityService.getUser(); if (user != null) { List<UserWidget> userWidgets = persistenceStrategy .getUserWidgetDAO() .getWidget(user, WidgetType.RESEARCHER, WidgetStatus.ACTIVE); for (UserWidget userWidget : userWidgets) { Widget widget = userWidget.getWidget(); widget.setColor(userWidget.getWidgetColor()); widget.setWidgetHeight(userWidget.getWidgetHeight()); widget.setWidgetWidth(userWidget.getWidgetWidth()); widget.setPosition(userWidget.getPosition()); widgets.add(widget); } } else widgets.addAll( persistenceStrategy .getWidgetDAO() .getWidget(WidgetType.RESEARCHER, WidgetStatus.DEFAULT)); // assign the model model.addObject("widgets", widgets); // assign query if (id != null) { model.addObject("targetId", id); if (name == null) { Author author = persistenceStrategy.getAuthorDAO().getById(id); if (author != null) name = author.getName(); } } if (name != null) model.addObject("targetName", name); if (add != null) model.addObject("targetAdd", add); return model; }
/** * Create a cluster for publications, based on language and year * * @param author * @param publicationClustersMap */ public void constructPublicationClusterByLanguageAndYear( Author author, Map<String, PublicationClusterHelper> publicationClustersMap) { // fill publication clusters // prepare calendar for publication year Calendar calendar = Calendar.getInstance(); // get all publications from specific author and put it into cluster for (Publication publication : author.getPublications()) { // only proceed publication that have date, language and abstract if (publication.getAbstractText() == null || publication.getAbstractText().equals("")) continue; if (publication.getPublicationDate() == null) continue; if (publication.getLanguage() == null) continue; // get publication year calendar.setTime(publication.getPublicationDate()); // construct clusterMap key String clusterMapKey = publication.getLanguage() + calendar.get(Calendar.YEAR); // construct publication map if (publicationClustersMap.get(clusterMapKey) == null) { // not exist create new cluster PublicationClusterHelper publicationCluster = new PublicationClusterHelper(); publicationCluster.setLangauge(publication.getLanguage()); publicationCluster.setYear(calendar.get(Calendar.YEAR)); publicationCluster.addPublicationAndUpdate(publication); // add into map publicationClustersMap.put(clusterMapKey, publicationCluster); } else { // exist on map, get the cluster PublicationClusterHelper publicationCluster = publicationClustersMap.get(clusterMapKey); publicationCluster.addPublicationAndUpdate(publication); } } }
/** * Get list of author given query ( author name ) * * @param query * @param page * @param maxresult * @param response * @return JSON Maps of response with researcher list * @throws IOException * @throws InterruptedException * @throws ExecutionException * @throws OAuthProblemException * @throws OAuthSystemException * @throws org.apache.http.ParseException */ @SuppressWarnings("unchecked") @Transactional @RequestMapping(value = "/search", method = RequestMethod.GET) public @ResponseBody Map<String, Object> getAuthorList( @RequestParam(value = "query", required = false) String query, @RequestParam(value = "queryType", required = false) String queryType, @RequestParam(value = "page", required = false) Integer startPage, @RequestParam(value = "maxresult", required = false) Integer maxresult, @RequestParam(value = "source", required = false) String source, @RequestParam(value = "addedAuthor", required = false) String addedAuthor, @RequestParam(value = "fulltextSearch", required = false) String fulltextSearch, @RequestParam(value = "persist", required = false) String persist, HttpServletRequest request, HttpServletResponse response) throws IOException, InterruptedException, ExecutionException, org.apache.http.ParseException, OAuthSystemException, OAuthProblemException { /* == Set Default Values== */ if (query == null) query = ""; if (queryType == null) queryType = "name"; if (startPage == null) startPage = 0; if (maxresult == null) maxresult = 50; if (source == null) source = "internal"; if (addedAuthor == null) addedAuthor = "no"; if (fulltextSearch == null) fulltextSearch = "no"; if (persist == null) persist = "no"; // create JSON mapper for response Map<String, Object> responseMap = new LinkedHashMap<String, Object>(); boolean persistResult = false; responseMap.put("query", query); if (!queryType.equals("name")) responseMap.put("queryType", queryType); responseMap.put("page", startPage); responseMap.put("maxresult", maxresult); responseMap.put("source", source); if (!fulltextSearch.equals("no")) responseMap.put("fulltextSearch", fulltextSearch); if (!persist.equals("no")) { responseMap.put("persist", persist); persistResult = true; } if (addedAuthor.equals("yes")) responseMap.put("addedAuthor", addedAuthor); Map<String, Object> authorsMap = researcherFeature .getResearcherSearch() .getResearcherMapByQuery( query, queryType, startPage, maxresult, source, addedAuthor, fulltextSearch, persistResult); // store in session if (source.equals("external") || source.equals("all")) { request.getSession().setAttribute("researchers", authorsMap.get("authors")); // recheck if session really has been updated // (there is a bug in spring session, which makes session is // not updated sometimes) - a little workaround boolean isSessionUpdated = false; while (!isSessionUpdated) { Object authors = request.getSession().getAttribute("researchers"); if (authors.equals(authorsMap.get("authors"))) isSessionUpdated = true; else request.getSession().setAttribute("researchers", authorsMap.get("authors")); } log.info("\nRESEARCHER SESSION SEARCH"); @SuppressWarnings("unchecked") List<Author> sessionAuthors = (List<Author>) request.getSession().getAttribute("researchers"); // get author from session -> just for debug if (sessionAuthors != null && !sessionAuthors.isEmpty()) { for (Author sessionAuthor : sessionAuthors) { for (AuthorSource as : sessionAuthor.getAuthorSources()) { log.info( sessionAuthor.getId() + "-" + sessionAuthor.getName() + " - " + as.getSourceType() + " -> " + as.getSourceUrl()); } } } } if (authorsMap != null && (Integer) authorsMap.get("totalCount") > 0) { responseMap.put("totalCount", (Integer) authorsMap.get("totalCount")); return researcherFeature .getResearcherSearch() .printJsonOutput(responseMap, (List<Author>) authorsMap.get("authors")); } else { responseMap.put("totalCount", 0); responseMap.put("count", 0); return responseMap; } }
/** * Collect the author interest result as JSON object * * @param author * @param responseMap * @return */ private Map<String, Object> getInterestFromDatabase( Author author, Map<String, Object> responseMap) { // get available year List<String> years = persistenceStrategy.getPublicationDAO().getDistinctPublicationYearByAuthor(author, "ASC"); List<AuthorInterestProfile> authorInterestProfiles = new ArrayList<AuthorInterestProfile>(); authorInterestProfiles.addAll(author.getAuthorInterestProfiles()); // sort based on profile length ( currently there is no attribute to // store position) Collections.sort( authorInterestProfiles, new AuthorInterestProfileByProfileNameLengthComparator()); // the whole result related to interest List<Object> authorInterestResult = new ArrayList<Object>(); for (AuthorInterestProfile authorInterestProfile : authorInterestProfiles) { // put profile on map Map<String, Object> authorInterestResultProfilesMap = new HashMap<String, Object>(); // get interest profile name and description String interestProfileName = authorInterestProfile.getName(); String interestProfileDescription = authorInterestProfile.getDescription(); // get authorInterest set on profile Set<AuthorInterest> authorInterests = authorInterestProfile.getAuthorInterests(); // if profile contain no authorInterest just skip if (authorInterests == null || authorInterests.isEmpty()) continue; // a map for storing authorInterst based on language Map<String, List<AuthorInterest>> authorInterestLanguageMap = new HashMap<String, List<AuthorInterest>>(); // split authorinterest based on language and put it on the map for (AuthorInterest authorInterest : authorInterests) { if (authorInterestLanguageMap.get(authorInterest.getLanguage()) != null) { authorInterestLanguageMap.get(authorInterest.getLanguage()).add(authorInterest); } else { List<AuthorInterest> authorInterestList = new ArrayList<AuthorInterest>(); authorInterestList.add(authorInterest); authorInterestLanguageMap.put(authorInterest.getLanguage(), authorInterestList); } } // prepare calendar for extractind year from date Calendar calendar = Calendar.getInstance(); // result author interest based on language List<Object> authorInterestResultLanguageList = new ArrayList<Object>(); // sort authorinterest based on year for (Map.Entry<String, List<AuthorInterest>> authorInterestLanguageIterator : authorInterestLanguageMap.entrySet()) { // result container Map<String, Object> authorInterestResultLanguageMap = new LinkedHashMap<String, Object>(); // hashmap value String interestLanguage = authorInterestLanguageIterator.getKey(); List<AuthorInterest> interestList = authorInterestLanguageIterator.getValue(); // sort based on year Collections.sort(interestList, new AuthorInterestByDateComparator()); // term values based on year result container List<Object> authorInterestResultYearList = new ArrayList<Object>(); // get interest year, term and value int indexYear = 0; boolean increaseIndex = true; for (AuthorInterest authorInterest : interestList) { increaseIndex = true; // just skip if contain no term weights if (authorInterest.getTermWeights() == null || authorInterest.getTermWeights().isEmpty()) continue; // get year calendar.setTime(authorInterest.getYear()); String year = Integer.toString(calendar.get(Calendar.YEAR)); while (!years.get(indexYear).equals(year)) { // empty result Map<String, Object> authorInterestResultYearMap = new LinkedHashMap<String, Object>(); authorInterestResultYearMap.put("year", years.get(indexYear)); authorInterestResultYearMap.put("termvalue", Collections.emptyList()); indexYear++; increaseIndex = false; // remove duplicated year if (!authorInterestResultYearList.isEmpty()) { @SuppressWarnings("unchecked") Map<String, Object> prevAuthorInterestResultYearMap = (Map<String, Object>) authorInterestResultYearList.get(authorInterestResultYearList.size() - 1); if (prevAuthorInterestResultYearMap.get("year").equals(years.get(indexYear - 1))) continue; } authorInterestResultYearList.add(authorInterestResultYearMap); } List<Object> termValueResult = new ArrayList<Object>(); // put term and value for (Map.Entry<Interest, Double> termWeightMap : authorInterest.getTermWeights().entrySet()) { // just remove not significant value if (termWeightMap.getValue() < 0.4) continue; List<Object> termWeightObjects = new ArrayList<Object>(); termWeightObjects.add(termWeightMap.getKey().getId()); termWeightObjects.add(termWeightMap.getKey().getTerm()); termWeightObjects.add(termWeightMap.getValue()); termValueResult.add(termWeightObjects); } // result container Map<String, Object> authorInterestResultYearMap = new LinkedHashMap<String, Object>(); authorInterestResultYearMap.put("year", year); authorInterestResultYearMap.put("termvalue", termValueResult); authorInterestResultYearList.add(authorInterestResultYearMap); if (increaseIndex) indexYear++; } // continue interest year which is missing for (int i = indexYear + 1; i < years.size(); i++) { Map<String, Object> authorInterestResultYearMap = new LinkedHashMap<String, Object>(); authorInterestResultYearMap.put("year", years.get(i)); authorInterestResultYearMap.put("termvalue", Collections.emptyList()); authorInterestResultYearList.add(authorInterestResultYearMap); } authorInterestResultLanguageMap.put("language", interestLanguage); authorInterestResultLanguageMap.put("interestyears", authorInterestResultYearList); if (interestLanguage.equals("english")) authorInterestResultLanguageList.add(0, authorInterestResultLanguageMap); else authorInterestResultLanguageList.add(authorInterestResultLanguageMap); } // put profile map authorInterestResultProfilesMap.put("profile", interestProfileName); authorInterestResultProfilesMap.put("description", interestProfileDescription); authorInterestResultProfilesMap.put("interestlanguages", authorInterestResultLanguageList); authorInterestResult.add(authorInterestResultProfilesMap); } responseMap.put("interest", authorInterestResult); // put also publication return responseMap; }
/** * Get author interests from active interest profiles * * @param responseMap * @param author * @param updateAuthorInterest * @return * @throws ParseException */ public Map<String, Object> getInterestFromAuthor( Map<String, Object> responseMap, Author author, boolean updateAuthorInterest) throws ParseException { logger.info("start mining interest "); // get default interest profile List<InterestProfile> interestProfilesDefault = persistenceStrategy .getInterestProfileDAO() .getAllActiveInterestProfile(InterestProfileType.DEFAULT); // get default interest profile List<InterestProfile> interestProfilesDerived = persistenceStrategy .getInterestProfileDAO() .getAllActiveInterestProfile(InterestProfileType.DERIVED); if (interestProfilesDefault.isEmpty() && interestProfilesDerived.isEmpty()) { logger.warn("No active interest profile found"); return responseMap; } if (author.getPublications() == null || author.getPublications().isEmpty()) { logger.warn("No publication found"); return responseMap; } // update for all author interest profile // updateAuthorInterest = true; if (!updateAuthorInterest) { // get interest profile from author Set<AuthorInterestProfile> authorInterestProfiles = author.getAuthorInterestProfiles(); if (authorInterestProfiles != null && !authorInterestProfiles.isEmpty()) { // check for missing default interest profile in author // only calculate missing one for (Iterator<InterestProfile> interestProfileIterator = interestProfilesDefault.iterator(); interestProfileIterator.hasNext(); ) { InterestProfile interestProfileDefault = interestProfileIterator.next(); for (AuthorInterestProfile authorInterestProfile : authorInterestProfiles) { if (authorInterestProfile.getInterestProfile() != null && authorInterestProfile.getInterestProfile().equals(interestProfileDefault)) { interestProfileIterator.remove(); break; } } } // check for missing derivative interest profile for (Iterator<InterestProfile> interestProfileIterator = interestProfilesDerived.iterator(); interestProfileIterator.hasNext(); ) { InterestProfile interestProfileDerived = interestProfileIterator.next(); for (AuthorInterestProfile authorInterestProfile : authorInterestProfiles) { if (authorInterestProfile.getInterestProfile() != null && authorInterestProfile.getInterestProfile().equals(interestProfileDerived)) { interestProfileIterator.remove(); break; } } } } } else { // clear previous results if (author.getAuthorInterestProfiles() != null && !author.getAuthorInterestProfiles().isEmpty()) { author.getAuthorInterestProfiles().clear(); } } // if defaultInterestProfile not null, // means interest calculation from beginning is needed if (!interestProfilesDefault.isEmpty()) { // first create publication cluster // prepare the cluster container Map<String, PublicationClusterHelper> publicationClustersMap = new HashMap<String, PublicationClusterHelper>(); // construct the cluster logger.info("Construct publication cluster "); constructPublicationClusterByLanguageAndYear(author, publicationClustersMap); // cluster is ready if (!publicationClustersMap.isEmpty()) { // calculate default interest profile calculateInterestProfilesDefault(author, publicationClustersMap, interestProfilesDefault); } } // check for derived interest profile if (!interestProfilesDerived.isEmpty()) { // calculate derived interest profile calculateInterestProfilesDerived(author, interestProfilesDerived); } // get and put author interest profile into map or list getInterestFromDatabase(author, responseMap); return responseMap; }
/** * Calculate each default InterestProfile * * @param author * @param interestProfileDefault * @param publicationClustersMap */ public void calculateEachInterestProfileDefault( Author author, Set<Interest> newInterests, InterestProfile interestProfileDefault, Map<String, PublicationClusterHelper> publicationClustersMap) { // get author interest profile Calendar calendar = Calendar.getInstance(); // default profile name [DEFAULT_PROFILENAME] String authorInterestProfileName = interestProfileDefault.getName(); // create new author interest profile for c-value AuthorInterestProfile authorInterestProfile = new AuthorInterestProfile(); authorInterestProfile.setCreated(calendar.getTime()); authorInterestProfile.setDescription( "Interest mining using " + interestProfileDefault.getName() + " algorithm"); authorInterestProfile.setName(authorInterestProfileName); // CorePhrase and WordFreq specific, according to Svetoslav Evtimov thesis // yearFactor Map format Map< Language-Year , value > // totalYearsFactor Map< Language, value > Map<String, Double> yearFactorMap = new HashMap<String, Double>(); Map<String, Double> totalYearsFactorMap = new HashMap<String, Double>(); // calculate some weighting factors // if ( interestProfileDefault.getName().toLowerCase().equals( "corephrase" ) || // interestProfileDefault.getName().toLowerCase().equals( "wordfreq" ) ) // { // yearFactorMap = CorePhraseAndWordFreqHelper.calculateYearFactor( publicationClustersMap, // 0.25 ); // totalYearsFactorMap = CorePhraseAndWordFreqHelper.calculateTotalYearsFactor( // publicationClustersMap ); // } // get the number of active extraction services int numberOfExtractionService = applicationService.getExtractionServices().size(); // loop to each cluster and calculate default profiles for (Map.Entry<String, PublicationClusterHelper> publicationClusterEntry : publicationClustersMap.entrySet()) { PublicationClusterHelper publicationCluster = publicationClusterEntry.getValue(); if (publicationCluster.getTermMap() == null || publicationCluster.getTermMap().isEmpty()) continue; // prepare variables AuthorInterest authorInterest = new AuthorInterest(); // assign author interest method if (interestProfileDefault.getName().toLowerCase().equals("cvalue")) { cValueInterestProfile.doCValueCalculation( authorInterest, newInterests, publicationCluster, numberOfExtractionService); } else if (interestProfileDefault.getName().toLowerCase().equals("corephrase")) { Double yearFactor = yearFactorMap.get(publicationCluster.getLanguage() + publicationCluster.getYear()); Double totalYearFactor = totalYearsFactorMap.get(publicationCluster.getLanguage()); corePhraseInterestProfile.doCorePhraseCalculation( authorInterest, newInterests, publicationCluster, yearFactor, totalYearFactor, numberOfExtractionService); } else if (interestProfileDefault.getName().toLowerCase().equals("wordfreq")) { Double yearFactor = yearFactorMap.get(publicationCluster.getLanguage() + publicationCluster.getYear()); Double totalYearFactor = totalYearsFactorMap.get(publicationCluster.getLanguage()); wordFreqInterestProfile.doWordFreqCalculation( authorInterest, newInterests, publicationCluster, yearFactor, totalYearFactor, numberOfExtractionService); } // Put other default interest profiles else if (interestProfileDefault.getName().toLowerCase().equals("lda")) { } // at the end persist new interests // for ( Interest newInterest : newInterests ) // persistenceStrategy.getInterestDAO().persist( newInterest ); // check author interest calculation result if (authorInterest.getTermWeights() != null && !authorInterest.getTermWeights().isEmpty()) { authorInterest.setAuthorInterestProfile(authorInterestProfile); authorInterestProfile.addAuthorInterest(authorInterest); authorInterestProfile.setInterestProfile(interestProfileDefault); // persistenceStrategy.getAuthorInterestProfileDAO().persist( authorInterestProfile ); } } // at the end persist if (authorInterestProfile.getAuthorInterests() != null && !authorInterestProfile.getAuthorInterests().isEmpty()) { authorInterestProfile.setAuthor(author); author.addAuthorInterestProfiles(authorInterestProfile); persistenceStrategy.getAuthorDAO().persist(author); } }
/** * Calculated derived interest profile (Intersection and/or Union between interest profile) in an * author * * @param author * @param interestProfilesDerived */ private void calculateInterestProfilesDerived( Author author, List<InterestProfile> interestProfilesDerived) { // get authorInterest set on profile for (InterestProfile interestProfileDerived : interestProfilesDerived) { String[] derivedInterestProfileName = interestProfileDerived.getName().split("\\s+"); // at list profile name has three segment if (derivedInterestProfileName.length < 3) continue; // prepare variables AuthorInterestProfile authorInterestProfile1 = null; AuthorInterestProfile authorInterestProfile2 = null; AuthorInterestProfile authorInterestProfileResult = null; String operationType = null; for (String partOfProfileName : derivedInterestProfileName) { // ? sometimes problem on encoding if (partOfProfileName.equals("∩") || partOfProfileName.equals("?") || partOfProfileName.equals("+") || partOfProfileName.equals("∪")) { if (authorInterestProfileResult != null) { authorInterestProfile1 = authorInterestProfileResult; authorInterestProfileResult = null; } if (partOfProfileName.equals("∩") || partOfProfileName.equals("?") || partOfProfileName.equals("+")) operationType = "INTERSECTION"; else operationType = "UNION"; } else { if (authorInterestProfile1 == null) { authorInterestProfile1 = author.getSpecificAuthorInterestProfile(partOfProfileName); if (authorInterestProfile1 == null) { logger.error("AuthorInterestProfile " + partOfProfileName + " not found"); // continue to next derived author profile, if exist break; } } else { authorInterestProfile2 = author.getSpecificAuthorInterestProfile(partOfProfileName); if (authorInterestProfile2 == null) { logger.error("AuthorInterestProfile " + partOfProfileName + " not found"); // continue to next derived author profile, if exist break; } } // calculate and persist if (authorInterestProfile1 != null && authorInterestProfile2 != null && operationType != null) { if (operationType.equals("INTERSECTION")) authorInterestProfileResult = calculateIntersectionOfAuthorInterestProfiles( authorInterestProfile1, authorInterestProfile2, interestProfileDerived); else authorInterestProfileResult = calculateUnionOfAuthorInterestProfiles( authorInterestProfile1, authorInterestProfile2, interestProfileDerived); } } } // persist result if (authorInterestProfileResult != null && (authorInterestProfileResult.getAuthorInterests() != null && !authorInterestProfileResult.getAuthorInterests().isEmpty())) { authorInterestProfileResult.setAuthor(author); author.addAuthorInterestProfiles(authorInterestProfileResult); persistenceStrategy.getAuthorDAO().persist(author); persistenceStrategy.getAuthorInterestProfileDAO().persist(authorInterestProfileResult); } } }